Changeset 6a490b2


Ignore:
Timestamp:
May 11, 2020, 1:53:29 PM (5 years ago)
Author:
Thierry Delisle <tdelisle@…>
Branches:
ADT, arm-eh, ast-experimental, enum, forall-pointer-decay, jacob/cs343-translation, master, new-ast, new-ast-unique-expr, pthread-emulation, qualifiedEnum
Children:
504a7dc
Parents:
b7d6a36 (diff), a7b486b (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' into relaxed_ready

Files:
94 added
9 deleted
137 edited
2 moved

Legend:

Unmodified
Added
Removed
  • Jenkinsfile

    rb7d6a36 r6a490b2  
    126126                        }
    127127
    128                         sh "${SrcDir}/configure CXX=${Settings.Compiler.CXX} CC=${Settings.Compiler.CC} ${Settings.Architecture.flags} ${targets} --quiet"
     128                        sh "${SrcDir}/configure CXX=${Settings.Compiler.CXX} CC=${Settings.Compiler.CC} ${Settings.Architecture.flags} AR=gcc-ar RANLIB=gcc-ranlib ${targets} --quiet --prefix=${BuildDir}"
    129129
    130130                        // Configure libcfa
     
    155155                dir (BuildDir) {
    156156                        sh "make -j 8 --no-print-directory -C libcfa/${Settings.Architecture.name}-nodebug"
     157                }
     158        }
     159
     160        build_stage('Build : install', true) {
     161                // Build outside of the src tree to ease cleaning
     162                dir (BuildDir) {
     163                        sh "make -j 8 --no-print-directory install"
    157164                }
    158165        }
     
    179186                echo "Archiving core dumps"
    180187                dir (BuildDir) {
    181                         archiveArtifacts artifacts: "tests/crashes/**/*", fingerprint: true
     188                        archiveArtifacts artifacts: "tests/crashes/**/*,lib/**/lib*.so*", fingerprint: true
    182189                }
    183190                throw err
     
    325332        public String CXX
    326333        public String CC
    327 
    328         CC_Desc(String name, String CXX, String CC) {
     334        public String lto
     335
     336        CC_Desc(String name, String CXX, String CC, String lto) {
    329337                this.name = name
    330338                this.CXX = CXX
    331                 this.CC = CC
     339                this.CC  = CC
     340                this.lto = lto
    332341        }
    333342}
     
    364373                switch( param.Compiler ) {
    365374                        case 'gcc-9':
    366                                 this.Compiler = new CC_Desc('gcc-9', 'g++-9', 'gcc-9')
     375                                this.Compiler = new CC_Desc('gcc-9', 'g++-9', 'gcc-9', '-flto=auto')
    367376                        break
    368377                        case 'gcc-8':
    369                                 this.Compiler = new CC_Desc('gcc-8', 'g++-8', 'gcc-8')
     378                                this.Compiler = new CC_Desc('gcc-8', 'g++-8', 'gcc-8', '-flto=auto')
    370379                        break
    371380                        case 'gcc-7':
    372                                 this.Compiler = new CC_Desc('gcc-7', 'g++-7', 'gcc-7')
     381                                this.Compiler = new CC_Desc('gcc-7', 'g++-7', 'gcc-7', '-flto=auto')
    373382                        break
    374383                        case 'gcc-6':
    375                                 this.Compiler = new CC_Desc('gcc-6', 'g++-6', 'gcc-6')
     384                                this.Compiler = new CC_Desc('gcc-6', 'g++-6', 'gcc-6', '-flto=auto')
    376385                        break
    377386                        case 'gcc-5':
    378                                 this.Compiler = new CC_Desc('gcc-5', 'g++-5', 'gcc-5')
     387                                this.Compiler = new CC_Desc('gcc-5', 'g++-5', 'gcc-5', '-flto=auto')
    379388                        break
    380389                        case 'gcc-4.9':
    381                                 this.Compiler = new CC_Desc('gcc-4.9', 'g++-4.9', 'gcc-4.9')
     390                                this.Compiler = new CC_Desc('gcc-4.9', 'g++-4.9', 'gcc-4.9', '-flto=auto')
    382391                        break
    383392                        case 'clang':
    384                                 this.Compiler = new CC_Desc('clang', 'clang++-6.0', 'gcc-6')
     393                                this.Compiler = new CC_Desc('clang', 'clang++-6.0', 'gcc-6', '-flto=thin -flto-jobs=0')
    385394                        break
    386395                        default :
     
    439448        // prepare the properties
    440449        properties ([                                                                                                   \
     450                buildDiscarder(logRotator(                                                                              \
     451                        artifactDaysToKeepStr: '',                                                                      \
     452                        artifactNumToKeepStr: '',                                                                       \
     453                        daysToKeepStr: '730',                                                                           \
     454                        numToKeepStr: '1000'                                                                            \
     455                )),                                                                                                             \
    441456                [$class: 'ParametersDefinitionProperty',                                                                \
    442457                        parameterDefinitions: [                                                                         \
  • benchmark/Makefile.am

    rb7d6a36 r6a490b2  
    1111## Created On       : Sun May 31 09:08:15 2015
    1212## Last Modified By : Peter A. Buhr
    13 ## Last Modified On : Sat Jan 25 09:20:44 2020
    14 ## Update Count     : 255
     13## Last Modified On : Tue Mar 10 11:41:18 2020
     14## Update Count     : 258
    1515###############################################################################
    1616
     
    108108creation_cfa_coroutine_DURATION = 100000000
    109109creation_cfa_coroutine_eager_DURATION = 10000000
     110creation_cfa_generator_DURATION = 1000000000
    110111creation_upp_coroutine_DURATION = ${creation_cfa_coroutine_eager_DURATION}
    111112creation_cfa_thread_DURATION = 10000000
     
    513514compile-typeof$(EXEEXT):
    514515        $(CFACOMPILE) -fsyntax-only -w $(testdir)/typeof.cfa
     516
     517## =========================================================================================================
     518
     519size$(EXEEXT) : size-cfa.runquiet
     520
     521size-cfa$(EXEEXT):
     522        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/size/size.cfa
  • benchmark/Makefile.in

    rb7d6a36 r6a490b2  
    447447creation_cfa_coroutine_DURATION = 100000000
    448448creation_cfa_coroutine_eager_DURATION = 10000000
     449creation_cfa_generator_DURATION = 1000000000
    449450creation_upp_coroutine_DURATION = ${creation_cfa_coroutine_eager_DURATION}
    450451creation_cfa_thread_DURATION = 10000000
     
    11471148        $(CFACOMPILE) -fsyntax-only -w $(testdir)/typeof.cfa
    11481149
     1150size$(EXEEXT) : size-cfa.runquiet
     1151
     1152size-cfa$(EXEEXT):
     1153        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/size/size.cfa
     1154
    11491155# Tell versions [3.59,3.63) of GNU make to not export all variables.
    11501156# Otherwise a system limit (for SysV at least) may be exceeded.
  • benchmark/creation/cfa_gen.cfa

    rb7d6a36 r6a490b2  
    1 #include "bench.h"
     1#include "../bench.h"
    22
    3 struct C {
     3generator G {
    44        volatile int restart; // ensure compiler does not optimize away all the code
    55};
    6 void ?{}( C & c ) { c.restart = 0; }
    7 void main( C & ) {}
     6void ?{}( G & g ) { g.restart = 0; }
     7void main( G & ) {}
    88
    99int main( int argc, char * argv[] ) {
     
    1111        BENCH(
    1212                for ( times ) {
    13                          C c;
     13                         G g;
    1414                },
    1515                result
  • benchmark/ctxswitch/cfa_cor.cfa

    rb7d6a36 r6a490b2  
    22#include <thread.hfa>
    33
    4 #include "bench.h"
     4#include "../bench.h"
    55
    6 coroutine C {} c;
     6coroutine C {};
    77void main( __attribute__((unused)) C & ) {
    8         while () {
    9                 suspend();
     8        for () {
     9                suspend;
    1010        }
    1111}
    1212int main( int argc, char * argv[] ) {
     13        C c;
    1314        BENCH_START()
    1415        BENCH(
  • benchmark/ctxswitch/cfa_gen.cfa

    rb7d6a36 r6a490b2  
    11#include "../bench.h"
    22
    3 typedef struct {
    4         void * next;
    5 } C;
    6 
    7 void comain( C * c ) {
    8         if ( __builtin_expect(c->next != 0, 1) ) goto *(c->next);
    9         c->next = &&s1;
     3generator G {};
     4void main( G & ) {
    105        for () {
    11                 return;
    12           s1: ;
     6                suspend;
    137        }
    148}
    159
    1610int main( int argc, char * argv[] ) {
     11        G g;
    1712        BENCH_START()
    18         C c = { 0 };
    1913        BENCH(
    2014                for ( times ) {
    21                         comain( &c );
     15                        resume( g );
    2216                },
    2317                result
  • configure

    rb7d6a36 r6a490b2  
    25572557# don't use the default CFLAGS as they unconditonnaly add -O2
    25582558: ${CFLAGS=""}
     2559: ${CXXFLAGS=""}
    25592560
    25602561am__api_version='1.15'
  • configure.ac

    rb7d6a36 r6a490b2  
    1414# don't use the default CFLAGS as they unconditonnaly add -O2
    1515: ${CFLAGS=""}
     16: ${CXXFLAGS=""}
    1617
    1718AM_INIT_AUTOMAKE([subdir-objects])
  • doc/bibliography/pl.bib

    rb7d6a36 r6a490b2  
    99%    Predefined journal names:
    1010%  acmcs: Computing Surveys             acta: Acta Infomatica
    11 @string{acta="Acta Infomatica"}
    1211%  cacm: Communications of the ACM
    1312%  ibmjrd: IBM J. Research & Development ibmsj: IBM Systems Journal
     
    2221%  tcs: Theoretical Computer Science
    2322
     23@string{acta="Acta Infomatica"}
    2424string{ieeepds="IEEE Transactions on Parallel and Distributed Systems"}
    2525@string{ieeepds="IEEE Trans. Parallel Distrib. Syst."}
     
    124124    series      = {ACM Distinguished Dissertations},
    125125    year        = 1983,
     126}
     127
     128@article{Zhang19,
     129    keywords    = {Algebraic effects, dynamic scoping, exceptions, parametricity, type systems},
     130    author      = {Zhang, Yizhou and Myers, Andrew C.},
     131    title       = {Abstraction-safe Effect Handlers via Tunneling},
     132    journal     = {Proc. ACM Program. Lang.},
     133    issue_date  = {January 2019},
     134    volume      = {3},
     135    number      = {POPL},
     136    month       = jan,
     137    year        = {2019},
     138    issn        = {2475-1421},
     139    pages       = {5:1--5:29},
     140    articleno   = {5},
     141    publisher   = {ACM},
     142    address     = {New York, NY, USA},
     143}
     144
     145@inproceedings{Zhang16,
     146    keywords    = {Exception tunneling, Genus, exception handling},
     147    author      = {Zhang, Yizhou and Salvaneschi, Guido and Beightol, Quinn and Liskov, Barbara and Myers, Andrew C.},
     148    title       = {Accepting Blame for Safe Tunneled Exceptions},
     149    booktitle   = {Proceedings of the 37th ACM SIGPLAN Conference on Programming Language Design and Implementation},
     150    series      = {PLDI'16},
     151    year        = {2016},
     152    location    = {Santa Barbara, CA, USA},
     153    pages       = {281--295},
     154    publisher   = {ACM},
     155    address     = {New York, NY, USA},
    126156}
    127157
     
    398428    journal     = sigplan,
    399429    year        = 1981,
    400     month       = feb, volume = 16, number = 2, pages = {48-52},
     430    month       = feb,
     431    volume      = 16,
     432    number      = 2,
     433    pages       = {48-52},
    401434    comment     = {
    402435        A one-pass, top-down algorithm for overload resolution.  Input is a
     
    477510    title       = {An Alternative to Subclassing},
    478511    journal     = sigplan,
    479     volume      = {21},    number = {11},
     512    volume      = {21},
     513    number      = {11},
    480514    pages       = {424-428},
    481     month       = nov, year = 1986,
     515    month       = nov,
     516    year        = 1986,
    482517    comment     = {
    483518        The Smalltalk class hierarchy has three uses: factoring out code;
     
    533568    isbn        = {3-540-66538-2},
    534569    location    = {Toulouse, France},
    535     doi         = {http://doi.acm.org/10.1145/318773.319251},
    536570    publisher   = {Springer},
    537571    address     = {London, UK},
     
    631665    year        = 2010,
    632666    pages       = {39--50},
    633     numpages    = {12},
    634667    publisher   = {IEEE Computer Society},
    635668    address     = {Washington, DC, USA},
     
    922955}
    923956
     957@manual{C99,
     958    keywords    = {ISO/IEC C 9899},
     959    contributer = {pabuhr@plg},
     960    key         = {C99},
     961    title       = {C Programming Language {ISO/IEC} 9899:1999(E)},
     962    edition     = {2nd},
     963    publisher   = {International Standard Organization},
     964    address     = {\href{https://webstore.ansi.org/Standards/INCITS/INCITSISOIEC98991999R2005}{https://webstore.ansi.org/\-Standards/\-INCITS/\-INCITSISOIEC98991999R2005}},
     965    year        = 1999,
     966}
     967
    924968@manual{C11,
    925969    keywords    = {ISO/IEC C 11},
     
    13051349    location    = {London, United Kingdom},
    13061350    pages       = {41--53},
    1307     numpages    = {13},
    1308     url         = {http://doi.acm.org/10.1145/360204.360207},
    1309     doi         = {10.1145/360204.360207},
    1310     acmid       = {360207},
    13111351    publisher   = {ACM},
    13121352    address     = {New York, NY, USA},
     
    24082448    year        = 1993,
    24092449    pages       = {201--208},
    2410     url         = {http://doi.acm.org/10.1145/155360.155580},
    24112450    publisher   = {ACM},
    24122451    address     = {New York, NY, USA},
     
    26062645    location    = {Boulder, Colorado, USA},
    26072646    pages       = {91--97},
    2608     numpages    = {7},
    26092647    publisher   = {ACM},
    26102648    address     = {New York, NY, USA},
     
    26372675    issn        = {0004-5411},
    26382676    pages       = {215--225},
    2639     numpages    = {11},
    2640     url         = {http://doi.acm.org/10.1145/321879.321884},
    2641     doi         = {10.1145/321879.321884},
    2642     acmid       = {321884},
    26432677    publisher   = {ACM},
    26442678    address     = {New York, NY, USA},
     
    27082742}
    27092743
     2744@misc{Drepper13,
     2745    keywords    = {thread-local storage},
     2746    contributer = {pabuhr@plg},
     2747    author      = {Ulrich Drepper},
     2748    title       = {{ELF} Handling For Thread-Local Storage},
     2749    year        = 2013,
     2750    month       = aug,
     2751    note        = {WikipediA},
     2752    howpublished= {\href{http://www.akkadia.org/drepper/tls.pdf}
     2753                  {http://\-www.akkadia.org/\-drepper/\-tls.pdf}},
     2754}
     2755
    27102756@misc{Turley99,
    27112757    keywords    = {embedded system, micrprocessor},
     
    27182764    howpublished= {\href{https://www.eetimes.com/author.asp?sectionid=36&doc_id=1287712}
    27192765                  {https://\-www.eetimes.com/\-author.asp?sectionid=\-36&doc_id=1287712}},
     2766}
     2767
     2768@article{Xiao19,
     2769    keywords    = {bug classification, fault trigger, Linux operating system, regression bug},
     2770    contributer = {pabuhr@plg},
     2771    author      = {Guanping Xiao and Zheng Zheng and Beibei Yin and Kishor S. Trivedi and Xiaoting Du and Kai-Yuan Cai},
     2772    title       = {An Empirical Study of Fault Triggers in the Linux Operating System: An Evolutionary Perspective},
     2773    journal     = {IEEE Transactions on Reliability},
     2774    month       = dec,
     2775    year        = 2019,
     2776    volume      = 68,
     2777    number      = 4,
     2778    pages       = {1356-1383},
    27202779}
    27212780
     
    31373196}
    31383197
     3198@inproceedings{Palix11,
     3199    keywords    = {Linux, fault-finding tools},
     3200    contributer = {pabuhr@plg},
     3201    author      = {Nicolas Palix and Ga\"el Thomas and Suman Saha and Christophe Calv\`es and Julia Lawall and Gilles Muller},
     3202    title       = {Faults in Linux: Ten Years Later},
     3203    booktitle   = {Proc. of the 16 International Conf. on Arch. Support for Prog. Lang. and Oper. Sys.},
     3204    series      = {ASPLOS'11},
     3205    month       = mar,
     3206    year        = 2011,
     3207    location    = {Newport Beach, California, USA},
     3208    pages       = {305-318},
     3209    publisher   = {ACM},
     3210    address     = {New York, NY, USA},
     3211}
     3212
    31393213@article{Lamport87,
    31403214    keywords    = {software solutions, mutual exclusion, fast},
     
    32583332    issn        = {0001-0782},
    32593333    pages       = {107--115},
    3260     numpages    = {9},
    3261     url         = {http://doi.acm.org/10.1145/1538788.1538814},
    3262     doi         = {10.1145/1538788.1538814},
    3263     acmid       = {1538814},
    32643334    publisher   = {ACM},
    32653335    address     = {New York, NY, USA},
     
    36643734}
    36653735
     3736@mastersthesis{Radhakrishnan19,
     3737    author      = {Srihari Radhakrishnan},
     3738    title       = {High Performance Web Servers: A Study In Concurrent Programming Models},
     3739    school      = {School of Computer Sc., University of Waterloo},
     3740    year        = 2019,
     3741    optaddress  = {Waterloo, Ontario, Canada, N2L 3G1},
     3742    note        = {\href{https://uwspace.uwaterloo.ca/handle/10012/14706}{https://\-uwspace.uwaterloo.ca/\-handle/\-10012/\-14706}},
     3743}
     3744
    36663745@article{katzenelson83b,
    36673746    contributer = {gjditchfield@plg},
     
    36973776    pages       = {115-138},
    36983777    year        = 1971,
     3778}
     3779
     3780@inproceedings{Hagersten03,
     3781    keywords    = {cache storage, parallel architectures, performance evaluation, shared memory systems},
     3782    author      = {Zoran Radovi\'{c} and Erik Hagersten},
     3783    title       = {Hierarchical backoff locks for nonuniform communication architectures},
     3784    booktitle   = {Proceedings of the Ninth International Symposium on High-Performance Computer Architecture},
     3785    year        = {2003},
     3786    location    = {Anaheim, CA, USA},
     3787    pages       = {241-252},
     3788    publisher   = {IEEE},
    36993789}
    37003790
     
    43654455}
    43664456
     4457@misc{gccValueLabels,
     4458    keywords    = {gcc extension, value labels},
     4459    contributer = {pabuhr@plg},
     4460    key         = {Labels as Values},
     4461    author      = {{gcc Extension}},
     4462    title       = {Labels as Values},
     4463    year        = {since gcc-3},
     4464    howpublished= {\href{https://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html}
     4465                  {https:\-//gcc.gnu.org/\-onlinedocs/\-gcc/\-Labels-as-Values.html}},
     4466}
     4467
    43674468@mastersthesis{Clarke90,
    43684469    keywords    = {concurrency, postponing requests},
     
    44234524}
    44244525
     4526@misc{libfibre,
     4527    key         = {libfibre},
     4528    author      = {Martin Karsten},
     4529    title       = {{libfibre:~User-Level Threading Runtime}},
     4530    howpublished= {\href{https://git.uwaterloo.ca/mkarsten/libfibre}
     4531                  {https://\-git.uwaterloo.ca/\-mkarsten/\-libfibre}},
     4532    note        = {[Online; accessed 2020-04-15]},
     4533}
     4534
    44254535@article{Linda,
    44264536    keywords    = {Linda, concurrency},
     
    44564566}
    44574567
     4568@inproceedings{Fang06,
     4569    author      = {Fang, Yi and McMillan, Kenneth L. and Pnueli, Amir and Zuck, Lenore D.},
     4570    editor      = {Najm, Elie and Pradat-Peyre, Jean-Fran{\c{c}}ois and Donzeau-Gouge, V{\'e}ronique Vigui{\'e}},
     4571    title       = {Liveness by Invisible Invariants},
     4572    booktitle   = {Formal Techniques for Networked and Distributed Systems - FORTE 2006},
     4573    year        = 2006,
     4574    publisher   = {Springer Berlin Heidelberg},
     4575    address     = {Berlin, Heidelberg},
     4576    pages       = {356--371},
     4577}
     4578
    44584579@article{Pierce00,
    4459     keywords    = {Scala},
     4580    keywords    = {Scala, polymorphism, subtyping, type inference},
    44604581    contributer = {a3moss@uwaterloo.ca},
    44614582    author      = {Pierce, Benjamin C. and Turner, David N.},
     
    44694590    issn        = {0164-0925},
    44704591    pages       = {1--44},
    4471     numpages    = {44},
    4472     url         = {http://doi.acm.org/10.1145/345099.345100},
    4473     doi         = {10.1145/345099.345100},
    4474     acmid       = {345100},
    44754592    publisher   = {ACM},
    44764593    address     = {New York, NY, USA},
    4477     keywords    = {polymorphism, subtyping, type inference},
    44784594}
     4595
     4596@article{Dice15,
     4597    keywords    = {Concurrency, NUMA, hierarchical locks, locks, multicore, mutex, mutual exclusion, spin locks},
     4598    author      = {Dice, David and Marathe, Virendra J. and Shavit, Nir},
     4599    title       = {Lock Cohorting: A General Technique for Designing NUMA Locks},
     4600    journal     = {ACM Trans. Parallel Comput.},
     4601    issue_date  = {January 2015},
     4602    volume      = 1,
     4603    number      = 2,
     4604    month       = feb,
     4605    year        = 2015,
     4606    pages       = {13:1--13:42},
     4607    publisher   = {ACM},
     4608    address     = {New York, NY, USA},
     4609}
    44794610
    44804611@article{Sundell08,
     
    45544685    journal     = sigplan,
    45554686    year        = 1989,
    4556     month       = jun, volume = 24, number = 6, pages = {37-48},
     4687    month       = jun,
     4688    volume      = 24,
     4689    number      = 6,
     4690    pages       = {37-48},
    45574691    abstract    = {
    45584692        This paper describes a scheme we have used to manage a large
     
    46254759    contributer = {pabuhr@plg},
    46264760    author      = {Gregory R. Andrews},
    4627     title       = {A Method for Solving Synronization Problems},
     4761    title       = {A Method for Solving Synchronization Problems},
    46284762    journal     = scp,
    46294763    volume      = 13,
     
    49505084    title       = {Multiple Inheritance for {C}{\kern-.1em\hbox{\large\texttt{+\kern-.25em+}}}},
    49515085    booktitle   = {Proceedings of the Spring '87 EUUG Conference},
    4952     month       = may, year = 1987
     5086    month       = may,
     5087    year        = 1987,
    49535088}
    49545089
     
    49955130    year        = 1986,
    49965131    pages       = {313--326},
    4997     numpages    = {14},
    49985132    publisher   = {ACM},
    49995133    address     = {New York, NY, USA},
     
    50115145    year        = 1986,
    50125146    pages       = {327--348},
    5013     numpages    = {22},
    50145147    publisher   = {ACM},
    50155148    address     = {New York, NY, USA},
     
    52085341    year        = 2005,
    52095342    pages       = {146-196},
    5210     numpages    = {51},
    52115343    publisher   = {ACM},
    52125344    address     = {New York, NY, USA},
     
    53545486    year        = 2000,
    53555487    pages       = {29-46},
    5356     note        = {OOPSLA'00, Oct. 15--19, 2000, Minneapolis, Minnesota, U.S.A.},
     5488    note        = {OOPSLA'00, Oct. 15--19, 2000, Minneapolis, Minn., U.S.A.},
    53575489}
    53585490
     
    54685600    location    = {San Diego, California, USA},
    54695601    pages       = {101--112},
    5470     numpages    = {12},
    5471     url         = {http://doi.acm.org/10.1145/2535838.2535878},
    5472     doi         = {10.1145/2535838.2535878},
    5473     acmid       = {2535878},
    54745602    publisher   = {ACM},
    54755603    address     = {New York, NY, USA},
     
    55755703    issn        = {0362-1340},
    55765704    pages       = {30--42},
    5577     numpages    = {13},
    5578     url         = {http://doi.acm.org/10.1145/947586.947589},
    5579     doi         = {10.1145/947586.947589},
    55805705    publisher   = {ACM},
    55815706    address     = {New York, NY, USA}
     
    61126237    month       = 9,
    61136238    year        = 2005,
     6239}
     6240
     6241@article{Bauer15,
     6242    keywords    = {resumption exceptions, theory},
     6243    contributer = {pabuhr@plg},
     6244    author      = {Andrej Bauer and Matija Pretnar},
     6245    title       = {Programming with Algebraic Effects and Handlers},
     6246    journal     = {Journal of Logical and Algebraic Methods in Programming},
     6247    publisher   = {Elsevier BV},
     6248    volume      = 84,
     6249    number      = 1,
     6250    month       = jan,
     6251    year        = 2015,
     6252    pages       = {108-123},
    61146253}
    61156254
     
    64996638    issn        = {0164-0925},
    65006639    pages       = {429-475},
    6501     url         = {http://doi.acm.org/10.1145/1133651.1133653},
    6502     doi         = {10.1145/1133651.1133653},
    6503     acmid       = {1133653},
    65046640    publisher   = {ACM},
    65056641    address     = {New York, NY, USA},
     
    65296665    address     = {\href{http://docs.paralleluniverse.co/quasar}{http://\-docs.paralleluniverse.co/\-quasar}},
    65306666    year        = 2018,
     6667}
     6668
     6669@article{Aravind09,
     6670    author      = {Alex A. Aravind and Wim H. Hesselink},
     6671    title       = {A Queue Based Mutual Exclusion Algorithm},
     6672    journal     = acta,
     6673    volume      = 46,
     6674    pages       = {73--86},
     6675    year        = 2009,
    65316676}
    65326677
     
    68797024    issn        = {0001-0782},
    68807025    pages       = {565--569},
    6881     numpages    = {5},
    6882     url         = {http://doi.acm.org/10.1145/359545.359566},
    6883     doi         = {10.1145/359545.359566},
    6884     acmid       = {359566},
    68857026    publisher   = {ACM},
    68867027    address     = {New York, NY, USA}
     
    69007041    issn        = {0362-1340},
    69017042    pages       = {145--147},
    6902     numpages    = {3},
    6903     url         = {http://doi.acm.org/10.1145/122598.122614},
    6904     doi         = {10.1145/122598.122614},
    6905     acmid       = {122614},
    69067043    publisher   = {ACM},
    69077044    address     = {New York, NY, USA},
     
    70067143    issn        = {0362-1340},
    70077144    pages       = {82--87},
    7008     numpages    = {6},
    7009     url         = {http://doi.acm.org/10.1145/947680.947688},
    7010     doi         = {10.1145/947680.947688},
    70117145    publisher   = {ACM},
    70127146    address     = {New York, NY, USA},
     
    71537287}
    71547288
     7289@article{Cascaval08,
     7290    author      = {Cascaval, Calin and Blundell, Colin and Michael, Maged and Cain, Harold W. and Wu, Peng and Chiras, Stefanie and Chatterjee, Siddhartha},
     7291    title       = {Software Transactional Memory: Why Is It Only a Research Toy?},
     7292    journal     = {Queue},
     7293    volume      = {6},
     7294    number      = {5},
     7295    month       = sep,
     7296    year        = {2008},
     7297    pages       = {40:46--40:58},
     7298    publisher   = {ACM},
     7299    address     = {New York, NY, USA},
     7300}
     7301
    71557302@article{Dijkstra65a,
    71567303    keywords    = {N-thread software-solution mutual exclusion},
     
    73637510    year        = 1974,
    73647511    pages       = {261-301},
    7365     issn        = {0360-0300},
    7366     doi         = {http://doi.acm.org/10.1145/356635.356640},
    73677512    publisher   = {ACM},
    73687513    address     = {New York, NY, USA},
     
    74547599    publisher   = {ACM Press},
    74557600    address     = {New York, NY, USA},
    7456     doi         = {http://doi.acm.org/10.1145/356586.356588},
    74577601}
    74587602
     
    77557899    howpublished= {\href{https://projects.eclipse.org/proposals/trace-compass}{https://\-projects.eclipse.org/\-proposals/\-trace-compass}},
    77567900}
    7757  
     7901
     7902@inproceedings{Boehm09,
     7903    author      = {Boehm, Hans-J.},
     7904    title       = {Transactional Memory Should Be an Implementation Technique, Not a Programming Interface},
     7905    booktitle   = {Proceedings of the First USENIX Conference on Hot Topics in Parallelism},
     7906    series      = {HotPar'09},
     7907    year        = {2009},
     7908    location    = {Berkeley, California},
     7909    publisher   = {USENIX Association},
     7910    address     = {Berkeley, CA, USA},
     7911}
     7912
    77587913@article{Leroy00,
    77597914    keywords    = {type-systems, exceptions},
     
    78057960    number      = {2},
    78067961    pages       = {204-214},
    7807     month       = apr, year = 1988,
     7962    month       = apr,
     7963    year        = 1988,
    78087964    comment     = {
    78097965        Extended record types add fields to their base record.  Assignment
     
    79048060}
    79058061
     8062@article{Karsten20,
     8063    author      = {Karsten, Martin and Barghi, Saman},
     8064    title       = {{User-level Threading: Have Your Cake and Eat It Too}},
     8065    year        = {2020},
     8066    issue_date  = {March 2020},
     8067    publisher   = {Association for Computing Machinery},
     8068    address     = {New York, NY, USA},
     8069    volume      = {4},
     8070    number      = {1},
     8071    url         = {https://doi.org/10.1145/3379483},
     8072    doi         = {10.1145/3379483},
     8073    journal     = {Proc. ACM Meas. Anal. Comput. Syst.},
     8074    month       = mar,
     8075    numpages    = {30},
     8076}
     8077
    79068078@techreport{Harmony,
    79078079    keywords    = {messages, concurrency},
     
    79198091    contributer = {gjditchfield@plg},
    79208092    author      = {Henry Lieverman},
    7921     title       = {Using Prototypical Objects to Implement Shared Behavior in
    7922                   Object Oriented Systems},
     8093    title       = {Using Prototypical Objects to Implement Shared Behavior in Object Oriented Systems},
    79238094    journal     = sigplan,
    7924     month       = nov, year = 1986,
    7925     volume      = 21, number = 11, pages = {214-223}
     8095    month       = nov,
     8096    year        = 1986,
     8097    volume      = 21,
     8098    number      = 11,
     8099    pages       = {214-223}
    79268100}
    79278101
     
    81108284    issn        = {0004-5411},
    81118285    pages       = {245--281},
    8112     numpages    = {37},
    8113     url         = {http://doi.acm.org/10.1145/62.2160},
    8114     doi         = {10.1145/62.2160},
    8115     acmid       = {2160},
    81168286    publisher   = {ACM},
    81178287    address     = {New York, NY, USA},
     
    81268296    contributer = {pabuhr@plg},
    81278297    author      = {Boehm, Hans-J. and Adve, Sarita V.},
    8128     title       = {You Don'T Know Jack About Shared Variables or Memory Models},
     8298    title       = {You Don't Know Jack About Shared Variables or Memory Models},
    81298299    journal     = cacm,
    81308300    volume      = 55,
  • doc/papers/concurrency/Paper.tex

    rb7d6a36 r6a490b2  
    6161\newcommand{\CCseventeen}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}17\xspace} % C++17 symbolic name
    6262\newcommand{\CCtwenty}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}20\xspace} % C++20 symbolic name
    63 \newcommand{\Csharp}{C\raisebox{-0.7ex}{\Large$^\sharp$}\xspace} % C# symbolic name
     63\newcommand{\Csharp}{C\raisebox{-0.7ex}{\large$^\sharp$}\xspace} % C# symbolic name
    6464
    6565%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     
    127127\newcommand*{\etc}{%
    128128        \@ifnextchar{.}{\ETC}%
    129         {\ETC.\xspace}%
     129                {\ETC.\xspace}%
    130130}}{}%
    131131\@ifundefined{etal}{
    132132\newcommand{\ETAL}{\abbrevFont{et}~\abbrevFont{al}}
    133133\newcommand*{\etal}{%
    134         \@ifnextchar{.}{\protect\ETAL}%
    135                 {\protect\ETAL.\xspace}%
     134        \@ifnextchar{.}{\ETAL}%
     135                {\ETAL.\xspace}%
    136136}}{}%
    137137\@ifundefined{viz}{
     
    163163                __float80, float80, __float128, float128, forall, ftype, generator, _Generic, _Imaginary, __imag, __imag__,
    164164                inline, __inline, __inline__, __int128, int128, __label__, monitor, mutex, _Noreturn, one_t, or,
    165                 otype, restrict, __restrict, __restrict__, __signed, __signed__, _Static_assert, thread,
     165                otype, restrict, resume, __restrict, __restrict__, __signed, __signed__, _Static_assert, suspend, thread,
    166166                _Thread_local, throw, throwResume, timeout, trait, try, ttype, typeof, __typeof, __typeof__,
    167167                virtual, __volatile, __volatile__, waitfor, when, with, zero_t},
    168168        moredirectives={defined,include_next},
    169169        % replace/adjust listing characters that look bad in sanserif
    170         literate={-}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.8ex}{0.1ex}}}}1 {^}{\raisebox{0.6ex}{$\scriptstyle\land\,$}}1
     170        literate={-}{\makebox[1ex][c]{\raisebox{0.5ex}{\rule{0.8ex}{0.1ex}}}}1 {^}{\raisebox{0.6ex}{$\scriptstyle\land\,$}}1
    171171                {~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 % {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1
    172172                {<}{\textrm{\textless}}1 {>}{\textrm{\textgreater}}1
     
    197197                _Else, _Enable, _Event, _Finally, _Monitor, _Mutex, _Nomutex, _PeriodicTask, _RealTimeTask,
    198198                _Resume, _Select, _SporadicTask, _Task, _Timeout, _When, _With, _Throw},
    199 }
    200 \lstdefinelanguage{Golang}{
    201         morekeywords=[1]{package,import,func,type,struct,return,defer,panic,recover,select,var,const,iota,},
    202         morekeywords=[2]{string,uint,uint8,uint16,uint32,uint64,int,int8,int16,int32,int64,
    203                 bool,float32,float64,complex64,complex128,byte,rune,uintptr, error,interface},
    204         morekeywords=[3]{map,slice,make,new,nil,len,cap,copy,close,true,false,delete,append,real,imag,complex,chan,},
    205         morekeywords=[4]{for,break,continue,range,goto,switch,case,fallthrough,if,else,default,},
    206         morekeywords=[5]{Println,Printf,Error,},
    207         sensitive=true,
    208         morecomment=[l]{//},
    209         morecomment=[s]{/*}{*/},
    210         morestring=[b]',
    211         morestring=[b]",
    212         morestring=[s]{`}{`},
    213199}
    214200
     
    241227{}
    242228\lstnewenvironment{uC++}[1][]
    243 {\lstset{#1}}
     229{\lstset{language=uC++,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}
    244230{}
    245231\lstnewenvironment{Go}[1][]
     
    262248}
    263249
    264 \newbox\myboxA
    265 \newbox\myboxB
    266 \newbox\myboxC
    267 \newbox\myboxD
     250\newsavebox{\myboxA}
     251\newsavebox{\myboxB}
     252\newsavebox{\myboxC}
     253\newsavebox{\myboxD}
    268254
    269255\title{\texorpdfstring{Advanced Control-flow and Concurrency in \protect\CFA}{Advanced Control-flow in Cforall}}
     
    282268\CFA is a polymorphic, non-object-oriented, concurrent, backwards-compatible extension of the C programming language.
    283269This paper discusses the design philosophy and implementation of its advanced control-flow and concurrent/parallel features, along with the supporting runtime written in \CFA.
    284 These features are created from scratch as ISO C has only low-level and/or unimplemented concurrency, so C programmers continue to rely on library features like pthreads.
     270These features are created from scratch as ISO C has only low-level and/or unimplemented concurrency, so C programmers continue to rely on library approaches like pthreads.
    285271\CFA introduces modern language-level control-flow mechanisms, like generators, coroutines, user-level threading, and monitors for mutual exclusion and synchronization.
    286272% Library extension for executors, futures, and actors are built on these basic mechanisms.
     
    295281
    296282\begin{document}
    297 \linenumbers                                            % comment out to turn off line numbering
     283\linenumbers                            % comment out to turn off line numbering
    298284
    299285\maketitle
     
    302288\section{Introduction}
    303289
    304 This paper discusses the design philosophy and implementation of advanced language-level control-flow and concurrent/parallel features in \CFA~\cite{Moss18,Cforall} and its runtime, which is written entirely in \CFA.
    305 \CFA is a modern, polymorphic, non-object-oriented\footnote{
    306 \CFA has features often associated with object-oriented programming languages, such as constructors, destructors, virtuals and simple inheritance.
     290\CFA~\cite{Moss18,Cforall} is a modern, polymorphic, non-object-oriented\footnote{
     291\CFA has object-oriented features, such as constructors, destructors, virtuals and simple trait/interface inheritance.
     292% Go interfaces, Rust traits, Swift Protocols, Haskell Type Classes and Java Interfaces.
     293% "Trait inheritance" works for me. "Interface inheritance" might also be a good choice, and distinguish clearly from implementation inheritance.
     294% You'll want to be a little bit careful with terms like "structural" and "nominal" inheritance as well. CFA has structural inheritance (I think Go as well) -- it's inferred based on the structure of the code. Java, Rust, and Haskell (not sure about Swift) have nominal inheritance, where there needs to be a specific statement that "this type inherits from this type".
    307295However, functions \emph{cannot} be nested in structures, so there is no lexical binding between a structure and set of functions (member/method) implemented by an implicit \lstinline@this@ (receiver) parameter.},
    308296backwards-compatible extension of the C programming language.
    309 In many ways, \CFA is to C as Scala~\cite{Scala} is to Java, providing a \emph{research vehicle} for new typing and control-flow capabilities on top of a highly popular programming language allowing immediate dissemination.
    310 Within the \CFA framework, new control-flow features are created from scratch because ISO \Celeven defines only a subset of the \CFA extensions, where the overlapping features are concurrency~\cite[\S~7.26]{C11}.
    311 However, \Celeven concurrency is largely wrappers for a subset of the pthreads library~\cite{Butenhof97,Pthreads}, and \Celeven and pthreads concurrency is simple, based on thread fork/join in a function and mutex/condition locks, which is low-level and error-prone;
    312 no high-level language concurrency features are defined.
    313 Interestingly, almost a decade after publication of the \Celeven standard, neither gcc-8, clang-9 nor msvc-19 (most recent versions) support the \Celeven include @threads.h@, indicating little interest in the C11 concurrency approach (possibly because the effort to add concurrency to \CC).
    314 Finally, while the \Celeven standard does not state a threading model, the historical association with pthreads suggests implementations would adopt kernel-level threading (1:1)~\cite{ThreadModel}.
    315 
     297In many ways, \CFA is to C as Scala~\cite{Scala} is to Java, providing a \emph{research vehicle} for new typing and control-flow capabilities on top of a highly popular programming language\footnote{
     298The TIOBE index~\cite{TIOBE} for December 2019 ranks the top five \emph{popular} programming languages as Java 17\%, C 16\%, Python 10\%, and \CC 6\%, \Csharp 5\% = 54\%, and over the past 30 years, C has always ranked either first or second in popularity.}
     299allowing immediate dissemination.
     300This paper discusses the design philosophy and implementation of advanced language-level control-flow and concurrent/parallel features in \CFA and its runtime, which is written entirely in \CFA.
     301The \CFA control-flow framework extends ISO \Celeven~\cite{C11} with new call/return and concurrent/parallel control-flow.
     302
     303% The call/return extensions retain state between callee and caller versus losing the callee's state on return;
     304% the concurrency extensions allow high-level management of threads.
     305
     306Call/return control-flow with argument/parameter passing appeared in the first programming languages.
     307Over the past 50 years, call/return has been augmented with features like static/dynamic call, exceptions (multi-level return) and generators/coroutines (retain state between calls).
     308While \CFA has mechanisms for dynamic call (algebraic effects) and exceptions\footnote{
     309\CFA exception handling will be presented in a separate paper.
     310The key feature that dovetails with this paper is nonlocal exceptions allowing exceptions to be raised across stacks, with synchronous exceptions raised among coroutines and asynchronous exceptions raised among threads, similar to that in \uC~\cite[\S~5]{uC++}}, this work only discusses retaining state between calls via generators/coroutines.
     311\newterm{Coroutining} was introduced by Conway~\cite{Conway63} (1963), discussed by Knuth~\cite[\S~1.4.2]{Knuth73V1}, implemented in Simula67~\cite{Simula67}, formalized by Marlin~\cite{Marlin80}, and is now popular and appears in old and new programming languages: CLU~\cite{CLU}, \Csharp~\cite{Csharp}, Ruby~\cite{Ruby}, Python~\cite{Python}, JavaScript~\cite{JavaScript}, Lua~\cite{Lua}, \CCtwenty~\cite{C++20Coroutine19}.
     312Coroutining is sequential execution requiring direct handoff among coroutines, \ie only the programmer is controlling execution order.
     313If coroutines transfer to an internal event-engine for scheduling the next coroutines, the program transitions into the realm of concurrency~\cite[\S~3]{Buhr05a}.
     314Coroutines are only a stepping stone towards concurrency where the commonality is that coroutines and threads retain state between calls.
     315
     316\Celeven/\CCeleven define concurrency~\cite[\S~7.26]{C11}, but it is largely wrappers for a subset of the pthreads library~\cite{Pthreads}.\footnote{Pthreads concurrency is based on simple thread fork/join in a function and mutex/condition locks, which is low-level and error-prone}
     317Interestingly, almost a decade after the \Celeven standard, neither gcc-9, clang-9 nor msvc-19 (most recent versions) support the \Celeven include @threads.h@, indicating no interest in the C11 concurrency approach (possibly because of the recent effort to add concurrency to \CC).
     318While the \Celeven standard does not state a threading model, the historical association with pthreads suggests implementations would adopt kernel-level threading (1:1)~\cite{ThreadModel}, as for \CC.
    316319In contrast, there has been a renewed interest during the past decade in user-level (M:N, green) threading in old and new programming languages.
    317320As multi-core hardware became available in the 1980/90s, both user and kernel threading were examined.
    318321Kernel threading was chosen, largely because of its simplicity and fit with the simpler operating systems and hardware architectures at the time, which gave it a performance advantage~\cite{Drepper03}.
    319322Libraries like pthreads were developed for C, and the Solaris operating-system switched from user (JDK 1.1~\cite{JDK1.1}) to kernel threads.
    320 As a result, languages like Java, Scala, Objective-C~\cite{obj-c-book}, \CCeleven~\cite{C11}, and C\#~\cite{Csharp} adopt the 1:1 kernel-threading model, with a variety of presentation mechanisms.
    321 From 2000 onwards, languages like Go~\cite{Go}, Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, D~\cite{D}, and \uC~\cite{uC++,uC++book} have championed the M:N user-threading model, and many user-threading libraries have appeared~\cite{Qthreads,MPC,Marcel}, including putting green threads back into Java~\cite{Quasar}.
    322 The main argument for user-level threading is that it is lighter weight than kernel threading (locking and context switching do not cross the kernel boundary), so there is less restriction on programming styles that encourage large numbers of threads performing medium work units to facilitate load balancing by the runtime~\cite{Verch12}.
     323As a result, many current languages implementations adopt the 1:1 kernel-threading model, like Java (Scala), Objective-C~\cite{obj-c-book}, \CCeleven~\cite{C11}, C\#~\cite{Csharp} and Rust~\cite{Rust}, with a variety of presentation mechanisms.
     324From 2000 onwards, several language implementations have championed the M:N user-threading model, like Go~\cite{Go}, Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, D~\cite{D}, and \uC~\cite{uC++,uC++book}, including putting green threads back into Java~\cite{Quasar}, and many user-threading libraries have appeared~\cite{Qthreads,MPC,Marcel}.
     325The main argument for user-level threading is that it is lighter weight than kernel threading (locking and context switching do not cross the kernel boundary), so there is less restriction on programming styles that encourages large numbers of threads performing medium-sized work to facilitate load balancing by the runtime~\cite{Verch12}.
    323326As well, user-threading facilitates a simpler concurrency approach using thread objects that leverage sequential patterns versus events with call-backs~\cite{Adya02,vonBehren03}.
    324327Finally, performant user-threading implementations (both time and space) meet or exceed direct kernel-threading implementations, while achieving the programming advantages of high concurrency levels and safety.
    325328
    326 A further effort over the past two decades is the development of language memory models to deal with the conflict between language features and compiler/hardware optimizations, \ie some language features are unsafe in the presence of aggressive sequential optimizations~\cite{Buhr95a,Boehm05}.
     329A further effort over the past two decades is the development of language memory models to deal with the conflict between language features and compiler/hardware optimizations, \eg some language features are unsafe in the presence of aggressive sequential optimizations~\cite{Buhr95a,Boehm05}.
    327330The consequence is that a language must provide sufficient tools to program around safety issues, as inline and library code is all sequential to the compiler.
    328331One solution is low-level qualifiers and functions (\eg @volatile@ and atomics) allowing \emph{programmers} to explicitly write safe (race-free~\cite{Boehm12}) programs.
    329 A safer solution is high-level language constructs so the \emph{compiler} knows the optimization boundaries, and hence, provides implicit safety.
    330 This problem is best known with respect to concurrency, but applies to other complex control-flow, like exceptions\footnote{
    331 \CFA exception handling will be presented in a separate paper.
    332 The key feature that dovetails with this paper is nonlocal exceptions allowing exceptions to be raised across stacks, with synchronous exceptions raised among coroutines and asynchronous exceptions raised among threads, similar to that in \uC~\cite[\S~5]{uC++}
    333 } and coroutines.
    334 Finally, language solutions allow matching constructs with language paradigm, \ie imperative and functional languages often have different presentations of the same concept to fit their programming model.
    335 
    336 Finally, it is important for a language to provide safety over performance \emph{as the default}, allowing careful reduction of safety for performance when necessary.
    337 Two concurrency violations of this philosophy are \emph{spurious wakeup} (random wakeup~\cite[\S~8]{Buhr05a}) and \emph{barging}\footnote{
    338 The notion of competitive succession instead of direct handoff, \ie a lock owner releases the lock and an arriving thread acquires it ahead of preexisting waiter threads.
     332A safer solution is high-level language constructs so the \emph{compiler} knows the concurrency boundaries (where mutual exclusion and synchronization are acquired/released) and provide implicit safety at and across these boundaries.
     333While the optimization problem is best known with respect to concurrency, it applies to other complex control-flow, like exceptions and coroutines.
     334As well, language solutions allow matching the language paradigm with the approach, \eg matching the functional paradigm with data-flow programming or the imperative paradigm with thread programming.
     335
     336Finally, it is important for a language to provide safety over performance \emph{as the default}, allowing careful reduction of safety (unsafe code) for performance when necessary.
     337Two concurrency violations of this philosophy are \emph{spurious wakeup} (random wakeup~\cite[\S~9]{Buhr05a}) and \emph{barging}\footnote{
     338Barging is competitive succession instead of direct handoff, \ie after a lock is released both arriving and preexisting waiter threads compete to acquire the lock.
     339Hence, an arriving thread can temporally \emph{barge} ahead of threads already waiting for an event, which can repeat indefinitely leading to starvation of waiter threads.
    339340} (signals-as-hints~\cite[\S~8]{Buhr05a}), where one is a consequence of the other, \ie once there is spurious wakeup, signals-as-hints follow.
    340 However, spurious wakeup is \emph{not} a foundational concurrency property~\cite[\S~8]{Buhr05a}, it is a performance design choice.
    341 Similarly, signals-as-hints are often a performance decision.
    342 We argue removing spurious wakeup and signals-as-hints make concurrent programming significantly safer because it removes local non-determinism and matches with programmer expectation.
    343 (Author experience teaching concurrency is that students are highly confused by these semantics.)
    344 Clawing back performance, when local non-determinism is unimportant, should be an option not the default.
    345 
    346 \begin{comment}
    347 Most augmented traditional (Fortran 18~\cite{Fortran18}, Cobol 14~\cite{Cobol14}, Ada 12~\cite{Ada12}, Java 11~\cite{Java11}) and new languages (Go~\cite{Go}, Rust~\cite{Rust}, and D~\cite{D}), except \CC, diverge from C with different syntax and semantics, only interoperate indirectly with C, and are not systems languages, for those with managed memory.
    348 As a result, there is a significant learning curve to move to these languages, and C legacy-code must be rewritten.
    349 While \CC, like \CFA, takes an evolutionary approach to extend C, \CC's constantly growing complex and interdependent features-set (\eg objects, inheritance, templates, etc.) mean idiomatic \CC code is difficult to use from C, and C programmers must expend significant effort learning \CC.
    350 Hence, rewriting and retraining costs for these languages, even \CC, are prohibitive for companies with a large C software-base.
    351 \CFA with its orthogonal feature-set, its high-performance runtime, and direct access to all existing C libraries circumvents these problems.
    352 \end{comment}
    353 
    354 \CFA embraces user-level threading, language extensions for advanced control-flow, and safety as the default.
    355 We present comparative examples so the reader can judge if the \CFA control-flow extensions are better and safer than those in other concurrent, imperative programming languages, and perform experiments to show the \CFA runtime is competitive with other similar mechanisms.
     341(Author experience teaching concurrency is that students are confused by these semantics.)
     342However, spurious wakeup is \emph{not} a foundational concurrency property~\cite[\S~9]{Buhr05a};
     343it is a performance design choice.
     344We argue removing spurious wakeup and signals-as-hints make concurrent programming simpler and safer as there is less local non-determinism to manage.
     345If barging acquisition is allowed, its specialized performance advantage should be available as an option not the default.
     346
     347\CFA embraces language extensions for advanced control-flow, user-level threading, and safety as the default.
     348We present comparative examples to support our argument that the \CFA control-flow extensions are as expressive and safe as those in other concurrent imperative programming languages, and perform experiments to show the \CFA runtime is competitive with other similar mechanisms.
    356349The main contributions of this work are:
    357 \begin{itemize}[topsep=3pt,itemsep=1pt]
     350\begin{itemize}[topsep=3pt,itemsep=0pt]
    358351\item
    359 language-level generators, coroutines and user-level threading, which respect the expectations of C programmers.
     352a set of fundamental execution properties that dictate which language-level control-flow features need to be supported,
     353
    360354\item
    361 monitor synchronization without barging, and the ability to safely acquiring multiple monitors \emph{simultaneously} (deadlock free), while seamlessly integrating these capabilities with all monitor synchronization mechanisms.
     355integration of these language-level control-flow features, while respecting the style and expectations of C programmers,
     356
    362357\item
    363 providing statically type-safe interfaces that integrate with the \CFA polymorphic type-system and other language features.
     358monitor synchronization without barging, and the ability to safely acquiring multiple monitors \emph{simultaneously} (deadlock free), while seamlessly integrating these capabilities with all monitor synchronization mechanisms,
     359
     360\item
     361providing statically type-safe interfaces that integrate with the \CFA polymorphic type-system and other language features,
     362
    364363% \item
    365364% library extensions for executors, futures, and actors built on the basic mechanisms.
     365
    366366\item
    367 a runtime system with no spurious wakeup.
     367a runtime system without spurious wake-up and no performance loss,
     368
    368369\item
    369 a dynamic partitioning mechanism to segregate the execution environment for specialized requirements.
     370a dynamic partitioning mechanism to segregate groups of executing user and kernel threads performing specialized work (\eg web-server or compute engine) or requiring different scheduling (\eg NUMA or real-time).
     371
    370372% \item
    371373% a non-blocking I/O library
     374
    372375\item
    373 experimental results showing comparable performance of the new features with similar mechanisms in other programming languages.
     376experimental results showing comparable performance of the \CFA features with similar mechanisms in other languages.
    374377\end{itemize}
    375378
    376 Section~\ref{s:StatefulFunction} begins advanced control by introducing sequential functions that retain data and execution state between calls, which produces constructs @generator@ and @coroutine@.
    377 Section~\ref{s:Concurrency} begins concurrency, or how to create (fork) and destroy (join) a thread, which produces the @thread@ construct.
     379Section~\ref{s:FundamentalExecutionProperties} presents the compositional hierarchy of execution properties directing the design of control-flow features in \CFA.
     380Section~\ref{s:StatefulFunction} begins advanced control by introducing sequential functions that retain data and execution state between calls producing constructs @generator@ and @coroutine@.
     381Section~\ref{s:Concurrency} begins concurrency, or how to create (fork) and destroy (join) a thread producing the @thread@ construct.
    378382Section~\ref{s:MutualExclusionSynchronization} discusses the two mechanisms to restricted nondeterminism when controlling shared access to resources (mutual exclusion) and timing relationships among threads (synchronization).
    379383Section~\ref{s:Monitor} shows how both mutual exclusion and synchronization are safely embedded in the @monitor@ and @thread@ constructs.
    380384Section~\ref{s:CFARuntimeStructure} describes the large-scale mechanism to structure (cluster) threads and virtual processors (kernel threads).
    381 Section~\ref{s:Performance} uses a series of microbenchmarks to compare \CFA threading with pthreads, Java OpenJDK-9, Go 1.12.6 and \uC 7.0.0.
     385Section~\ref{s:Performance} uses a series of microbenchmarks to compare \CFA threading with pthreads, Java 11.0.6, Go 1.12.6, Rust 1.37.0, Python 3.7.6, Node.js 12.14.1, and \uC 7.0.0.
     386
     387
     388\section{Fundamental Execution Properties}
     389\label{s:FundamentalExecutionProperties}
     390
     391The features in a programming language should be composed from a set of fundamental properties rather than an ad hoc collection chosen by the designers.
     392To this end, the control-flow features created for \CFA are based on the fundamental properties of any language with function-stack control-flow (see also \uC~\cite[pp.~140-142]{uC++}).
     393The fundamental properties are execution state, thread, and mutual-exclusion/synchronization (MES).
     394These independent properties can be used alone, in pairs, or in triplets to compose different language features, forming a compositional hierarchy where the most advanced feature has all the properties (state/thread/MES).
     395While it is possible for a language to only support the most advanced feature~\cite{Hermes90}, this unnecessarily complicates and makes inefficient solutions to certain classes of problems.
     396As is shown, each of the (non-rejected) composed features solves a particular set of problems, and hence, has a defensible position in a programming language.
     397If a compositional feature is missing, a programmer has too few/many fundamental properties resulting in a complex and/or is inefficient solution.
     398
     399In detail, the fundamental properties are:
     400\begin{description}[leftmargin=\parindent,topsep=3pt,parsep=0pt]
     401\item[\newterm{execution state}:]
     402is the state information needed by a control-flow feature to initialize, manage compute data and execution location(s), and de-initialize.
     403State is retained in fixed-sized aggregate structures and dynamic-sized stack(s), often allocated in the heap(s) managed by the runtime system.
     404The lifetime of the state varies with the control-flow feature, where longer life-time and dynamic size provide greater power but also increase usage complexity and cost.
     405Control-flow transfers among execution states occurs in multiple ways, such as function call, context switch, asynchronous await, etc.
     406Because the programming language determines what constitutes an execution state, implicitly manages this state, and defines movement mechanisms among states, execution state is an elementary property of the semantics of a programming language.
     407% An execution-state is related to the notion of a process continuation \cite{Hieb90}.
     408
     409\item[\newterm{threading}:]
     410is execution of code that occurs independently of other execution, \ie the execution resulting from a thread is sequential.
     411Multiple threads provide \emph{concurrent execution};
     412concurrent execution becomes parallel when run on multiple processing units (hyper-threading, cores, sockets).
     413There must be language mechanisms to create, block/unblock, and join with a thread.
     414
     415\item[\newterm{MES}:]
     416is the concurrency mechanisms to perform an action without interruption and establish timing relationships among multiple threads.
     417These two properties are independent, \ie mutual exclusion cannot provide synchronization and vice versa without introducing additional threads~\cite[\S~4]{Buhr05a}.
     418Limiting MES, \eg no access to shared data, results in contrived solutions and inefficiency on multi-core von Neumann computers where shared memory is a foundational aspect of its design.
     419\end{description}
     420These properties are fundamental because they cannot be built from existing language features, \eg a basic programming language like C99~\cite{C99} cannot create new control-flow features, concurrency, or provide MES using atomic hardware mechanisms.
     421
     422
     423\subsection{Execution Properties}
     424
     425Table~\ref{t:ExecutionPropertyComposition} shows how the three fundamental execution properties: state, thread, and mutual exclusion compose a hierarchy of control-flow features needed in a programming language.
     426(When doing case analysis, not all combinations are meaningful.)
     427Note, basic von Neumann execution requires at least one thread and an execution state providing some form of call stack.
     428For table entries missing these minimal components, the property is borrowed from the invoker (caller).
     429
     430Case 1 is a function that borrows storage for its state (stack frame/activation) and a thread from its invoker and retains this state across \emph{callees}, \ie function local-variables are retained on the stack across calls.
     431Case 2 is case 1 with access to shared state so callers are restricted during update (mutual exclusion) and scheduling for other threads (synchronization).
     432Case 3 is a stateful function supporting resume/suspend along with call/return to retain state across \emph{callers}, but has some restrictions because the function's state is stackless.
     433Note, stackless functions still borrow the caller's stack and thread, where the stack is used to preserve state across its callees.
     434Case 4 is cases 2 and 3 with protection to shared state for stackless functions.
     435Cases 5 and 6 are the same as 3 and 4 but only the thread is borrowed as the function state is stackful, so resume/suspend is a context switch from the caller's to the function's stack.
     436Cases 7 and 8 are rejected because a function that is given a new thread must have its own stack where the thread begins and stack frames are stored for calls, \ie there is no stack to borrow.
     437Cases 9 and 10 are rejected because a thread with a fixed state (no stack) cannot accept calls, make calls, block, or be preempted, all of which require an unknown amount of additional dynamic state.
     438Hence, once started, this kind of thread must execute to completion, \ie computation only, which severely restricts runtime management.
     439Cases 11 and 12 have a stackful thread with and without safe access to shared state.
     440Execution properties increase the cost of creation and execution along with complexity of usage.
     441
     442\begin{table}
     443\caption{Execution property composition}
     444\centering
     445\label{t:ExecutionPropertyComposition}
     446\renewcommand{\arraystretch}{1.25}
     447%\setlength{\tabcolsep}{5pt}
     448\begin{tabular}{c|c||l|l}
     449\multicolumn{2}{c||}{execution properties} & \multicolumn{2}{c}{mutual exclusion / synchronization} \\
     450\hline
     451stateful                        & thread        & \multicolumn{1}{c|}{No} & \multicolumn{1}{c}{Yes} \\
     452\hline   
     453\hline   
     454No                                      & No            & \textbf{1}\ \ \ function                              & \textbf{2}\ \ \ @monitor@ function    \\
     455\hline   
     456Yes (stackless)         & No            & \textbf{3}\ \ \ @generator@                   & \textbf{4}\ \ \ @monitor@ @generator@ \\
     457\hline   
     458Yes (stackful)          & No            & \textbf{5}\ \ \ @coroutine@                   & \textbf{6}\ \ \ @monitor@ @coroutine@ \\
     459\hline   
     460No                                      & Yes           & \textbf{7}\ \ \ {\color{red}rejected} & \textbf{8}\ \ \ {\color{red}rejected} \\
     461\hline   
     462Yes (stackless)         & Yes           & \textbf{9}\ \ \ {\color{red}rejected} & \textbf{10}\ \ \ {\color{red}rejected} \\
     463\hline   
     464Yes (stackful)          & Yes           & \textbf{11}\ \ \ @thread@                             & \textbf{12}\ \ @monitor@ @thread@             \\
     465\end{tabular}
     466\end{table}
     467
     468Given the execution-properties taxonomy, programmers can now answer three basic questions: is state necessary across calls and how much, is a separate thread necessary, is access to shared state necessary.
     469The answers define the optimal language feature need for implementing a programming problem.
     470The next sections discusses how \CFA fills in the table with language features, while other programming languages may only provide a subset of the table.
     471
     472
     473\subsection{Design Requirements}
     474
     475The following design requirements largely stem from building \CFA on top of C.
     476\begin{itemize}[topsep=3pt,parsep=0pt]
     477\item
     478All communication must be statically type checkable for early detection of errors and efficient code generation.
     479This requirement is consistent with the fact that C is a statically-typed programming-language.
     480
     481\item
     482Direct interaction among language features must be possible allowing any feature to be selected without restricting comm\-unication.
     483For example, many concurrent languages do not provide direct communication (calls) among threads, \ie threads only communicate indirectly through monitors, channels, messages, and/or futures.
     484Indirect communication increases the number of objects, consuming more resources, and require additional synchronization and possibly data transfer.
     485
     486\item
     487All communication is performed using function calls, \ie data is transmitted from argument to parameter and results are returned from function calls.
     488Alternative forms of communication, such as call-backs, message passing, channels, or communication ports, step outside of C's normal form of communication.
     489
     490\item
     491All stateful features must follow the same declaration scopes and lifetimes as other language data.
     492For C that means at program startup, during block and function activation, and on demand using dynamic allocation.
     493
     494\item
     495MES must be available implicitly in language constructs as well as explicitly for specialized requirements, because requiring programmers to build MES using low-level locks often leads to incorrect programs.
     496Furthermore, reducing synchronization scope by encapsulating it within language constructs further reduces errors in concurrent programs.
     497
     498\item
     499Both synchronous and asynchronous communication are needed.
     500However, we believe the best way to provide asynchrony, such as call-buffering/chaining and/or returning futures~\cite{multilisp}, is building it from expressive synchronous features.
     501
     502\item
     503Synchronization must be able to control the service order of requests including prioritizing selection from different kinds of outstanding requests, and postponing a request for an unspecified time while continuing to accept new requests.
     504Otherwise, certain concurrency problems are difficult, e.g.\ web server, disk scheduling, and the amount of concurrency is inhibited~\cite{Gentleman81}.
     505\end{itemize}
     506We have satisfied these requirements in \CFA while maintaining backwards compatibility with the huge body of legacy C programs.
     507% In contrast, other new programming languages must still access C programs (\eg operating-system service routines), but do so through fragile C interfaces.
     508
     509
     510\subsection{Asynchronous Await / Call}
     511
     512Asynchronous await/call is a caller mechanism for structuring programs and/or increasing concurrency, where the caller (client) postpones an action into the future, which is subsequently executed by a callee (server).
     513The caller detects the action's completion through a \newterm{future}/\newterm{promise}.
     514The benefit is asynchronous caller execution with respect to the callee until future resolution.
     515For single-threaded languages like JavaScript, an asynchronous call passes a callee action, which is queued in the event-engine, and continues execution with a promise.
     516When the caller needs the promise to be fulfilled, it executes @await@.
     517A promise-completion call-back can be part of the callee action or the caller is rescheduled;
     518in either case, the call back is executed after the promise is fulfilled.
     519While asynchronous calls generate new callee (server) events, we content this mechanism is insufficient for advanced control-flow mechanisms like generators or coroutines (which are discussed next).
     520Specifically, control between caller and callee occurs indirectly through the event-engine precluding direct handoff and cycling among events, and requires complex resolution of a control promise and data.
     521Note, @async-await@ is just syntactic-sugar over the event engine so it does not solve these deficiencies.
     522For multi-threaded languages like Java, the asynchronous call queues a callee action with an executor (server), which subsequently executes the work by a thread in the executor thread-pool.
     523The problem is when concurrent work-units need to interact and/or block as this effects the executor, \eg stops threads.
     524While it is possible to extend this approach to support the necessary mechanisms, \eg message passing in Actors, we show monitors and threads provide an equally competitive approach that does not deviate from normal call communication and can be used to build asynchronous call, as is done in Java.
    382525
    383526
     
    385528\label{s:StatefulFunction}
    386529
    387 The stateful function is an old idea~\cite{Conway63,Marlin80} that is new again~\cite{C++20Coroutine19}, where execution is temporarily suspended and later resumed, \eg plugin, device driver, finite-state machine.
    388 Hence, a stateful function may not end when it returns to its caller, allowing it to be restarted with the data and execution location present at the point of suspension.
    389 This capability is accomplished by retaining a data/execution \emph{closure} between invocations.
    390 If the closure is fixed size, we call it a \emph{generator} (or \emph{stackless}), and its control flow is restricted, \eg suspending outside the generator is prohibited.
    391 If the closure is variable size, we call it a \emph{coroutine} (or \emph{stackful}), and as the names implies, often implemented with a separate stack with no programming restrictions.
    392 Hence, refactoring a stackless coroutine may require changing it to stackful.
    393 A foundational property of all \emph{stateful functions} is that resume/suspend \emph{do not} cause incremental stack growth, \ie resume/suspend operations are remembered through the closure not the stack.
    394 As well, activating a stateful function is \emph{asymmetric} or \emph{symmetric}, identified by resume/suspend (no cycles) and resume/resume (cycles).
    395 A fixed closure activated by modified call/return is faster than a variable closure activated by context switching.
    396 Additionally, any storage management for the closure (especially in unmanaged languages, \ie no garbage collection) must also be factored into design and performance.
    397 Therefore, selecting between stackless and stackful semantics is a tradeoff between programming requirements and performance, where stackless is faster and stackful is more general.
    398 Note, creation cost is amortized across usage, so activation cost is usually the dominant factor.
     530A \emph{stateful function} has the ability to remember state between calls, where state can be either data or execution, \eg plugin, device driver, finite-state machine (FSM).
     531A simple technique to retain data state between calls is @static@ declarations within a function, which is often implemented by hoisting the declarations to the global scope but hiding the names within the function using name mangling.
     532However, each call starts the function at the top making it difficult to determine the last point of execution in an algorithm, and requiring multiple flag variables and testing to reestablish the continuation point.
     533Hence, the next step of generalizing function state is implicitly remembering the return point between calls and reentering the function at this point rather than the top, called \emph{generators}\,/\,\emph{iterators} or \emph{stackless coroutines}.
     534For example, a Fibonacci generator retains data and execution state allowing it to remember prior values needed to generate the next value and the location in the algorithm to compute that value.
     535The next step of generalization is instantiating the function to allow multiple named instances, \eg multiple Fibonacci generators, where each instance has its own state, and hence, can generate an independent sequence of values.
     536Note, a subset of generator state is a function \emph{closure}, \ie the technique of capturing lexical references when returning a nested function.
     537A further generalization is adding a stack to a generator's state, called a \emph{coroutine}, so it can suspend outside of itself, \eg call helper functions to arbitrary depth before suspending back to its resumer without unwinding these calls.
     538For example, a coroutine iterator for a binary tree can stop the traversal at the visit point (pre, infix, post traversal), return the node value to the caller, and then continue the recursive traversal from the current node on the next call.
     539
     540There are two styles of activating a stateful function, \emph{asymmetric} or \emph{symmetric}, identified by resume/suspend (no cycles) and resume/resume (cycles).
     541These styles \emph{do not} cause incremental stack growth, \eg a million resume/suspend or resume/resume cycles do not remember each cycle just the last resumer for each cycle.
     542Selecting between stackless/stackful semantics and asymmetric/symmetric style is a tradeoff between programming requirements, performance, and design, where stackless is faster and smaller (modified call/return between closures), stackful is more general but slower and larger (context switching between distinct stacks), and asymmetric is simpler control-flow than symmetric.
     543Additionally, storage management for the closure/stack (especially in unmanaged languages, \ie no garbage collection) must be factored into design and performance.
     544Note, creation cost (closure/stack) is amortized across usage, so activation cost (resume/suspend) is usually the dominant factor.
     545
     546% The stateful function is an old idea~\cite{Conway63,Marlin80} that is new again~\cite{C++20Coroutine19}, where execution is temporarily suspended and later resumed, \eg plugin, device driver, finite-state machine.
     547% Hence, a stateful function may not end when it returns to its caller, allowing it to be restarted with the data and execution location present at the point of suspension.
     548% If the closure is fixed size, we call it a \emph{generator} (or \emph{stackless}), and its control flow is restricted, \eg suspending outside the generator is prohibited.
     549% If the closure is variable size, we call it a \emph{coroutine} (or \emph{stackful}), and as the names implies, often implemented with a separate stack with no programming restrictions.
     550% Hence, refactoring a stackless coroutine may require changing it to stackful.
     551% A foundational property of all \emph{stateful functions} is that resume/suspend \emph{do not} cause incremental stack growth, \ie resume/suspend operations are remembered through the closure not the stack.
     552% As well, activating a stateful function is \emph{asymmetric} or \emph{symmetric}, identified by resume/suspend (no cycles) and resume/resume (cycles).
     553% A fixed closure activated by modified call/return is faster than a variable closure activated by context switching.
     554% Additionally, any storage management for the closure (especially in unmanaged languages, \ie no garbage collection) must also be factored into design and performance.
     555% Therefore, selecting between stackless and stackful semantics is a tradeoff between programming requirements and performance, where stackless is faster and stackful is more general.
     556% nppNote, creation cost is amortized across usage, so activation cost is usually the dominant factor.
     557
     558For example, Python presents asymmetric generators as a function object, \uC presents symmetric coroutines as a \lstinline[language=C++]|class|-like object, and many languages present threading using function pointers, @pthreads@~\cite{Butenhof97}, \Csharp~\cite{Csharp}, Go~\cite{Go}, and Scala~\cite{Scala}.
     559\begin{center}
     560\begin{tabular}{@{}l|l|l@{}}
     561\multicolumn{1}{@{}c|}{Python asymmetric generator} & \multicolumn{1}{c|}{\uC symmetric coroutine} & \multicolumn{1}{c@{}}{Pthreads thread} \\
     562\hline
     563\begin{python}
     564`def Gen():` $\LstCommentStyle{\color{red}// function}$
     565        ... yield val ...
     566gen = Gen()
     567for i in range( 10 ):
     568        print( next( gen ) )
     569\end{python}
     570&
     571\begin{uC++}
     572`_Coroutine Cycle {` $\LstCommentStyle{\color{red}// class}$
     573        Cycle * p;
     574        void main() { p->cycle(); }
     575        void cycle() { resume(); }  `};`
     576Cycle c1, c2; c1.p=&c2; c2.p=&c1; c1.cycle();
     577\end{uC++}
     578&
     579\begin{cfa}
     580void * rtn( void * arg ) { ... }
     581int i = 3, rc;
     582pthread_t t; $\C{// thread id}$
     583$\LstCommentStyle{\color{red}// function pointer}$
     584rc=pthread_create(&t, `rtn`, (void *)i);
     585\end{cfa}
     586\end{tabular}
     587\end{center}
     588\CFA's preferred presentation model for generators/coroutines/threads is a hybrid of functions and classes, giving an object-oriented flavour.
     589Essentially, the generator/coroutine/thread function is semantically coupled with a generator/coroutine/thread custom type via the type's name.
     590The custom type solves several issues, while accessing the underlying mechanisms used by the custom types is still allowed for flexibility reasons.
     591Each custom type is discussed in detail in the following sections.
     592
     593
     594\subsection{Generator}
     595
     596Stackless generators (Table~\ref{t:ExecutionPropertyComposition} case 3) have the potential to be very small and fast, \ie as small and fast as function call/return for both creation and execution.
     597The \CFA goal is to achieve this performance target, possibly at the cost of some semantic complexity.
     598A series of different kinds of generators and their implementation demonstrate how this goal is accomplished.\footnote{
     599The \CFA operator syntax uses \lstinline|?| to denote operands, which allows precise definitions for pre, post, and infix operators, \eg \lstinline|?++|, \lstinline|++?|, and \lstinline|?+?|, in addition \lstinline|?\{\}| denotes a constructor, as in \lstinline|foo `f` = `\{`...`\}`|, \lstinline|^?\{\}| denotes a destructor, and \lstinline|?()| is \CC function call \lstinline|operator()|.
     600Operator \lstinline+|+ is overloaded for printing, like bit-shift \lstinline|<<| in \CC.
     601The \CFA \lstinline|with| clause opens an aggregate scope making its fields directly accessible, like Pascal \lstinline|with|, but using parallel semantics;
     602multiple aggregates may be opened.
     603\CFA has rebindable references \lstinline|int i, & ip = i, j; `&ip = &j;`| and non-rebindable references \lstinline|int i, & `const` ip = i, j; `&ip = &j;` // disallowed|.
     604}%
    399605
    400606\begin{figure}
     
    410616
    411617
     618
     619
    412620        int fn = f->fn; f->fn = f->fn1;
    413621                f->fn1 = f->fn + fn;
    414622        return fn;
    415 
    416623}
    417624int main() {
     
    432639void `main(Fib & fib)` with(fib) {
    433640
     641
    434642        [fn1, fn] = [1, 0];
    435643        for () {
     
    451659\begin{cfa}[aboveskip=0pt,belowskip=0pt]
    452660typedef struct {
    453         int fn1, fn;  void * `next`;
     661        int `restart`, fn1, fn;
    454662} Fib;
    455 #define FibCtor { 1, 0, NULL }
     663#define FibCtor { `0`, 1, 0 }
    456664Fib * comain( Fib * f ) {
    457         if ( f->next ) goto *f->next;
    458         f->next = &&s1;
     665        `static void * states[] = {&&s0, &&s1};`
     666        `goto *states[f->restart];`
     667  s0: f->`restart` = 1;
    459668        for ( ;; ) {
    460669                return f;
    461670          s1:; int fn = f->fn + f->fn1;
    462                         f->fn1 = f->fn; f->fn = fn;
     671                f->fn1 = f->fn; f->fn = fn;
    463672        }
    464673}
     
    472681\end{lrbox}
    473682
    474 \subfloat[C asymmetric generator]{\label{f:CFibonacci}\usebox\myboxA}
     683\subfloat[C]{\label{f:CFibonacci}\usebox\myboxA}
    475684\hspace{3pt}
    476685\vrule
    477686\hspace{3pt}
    478 \subfloat[\CFA asymmetric generator]{\label{f:CFAFibonacciGen}\usebox\myboxB}
     687\subfloat[\CFA]{\label{f:CFAFibonacciGen}\usebox\myboxB}
    479688\hspace{3pt}
    480689\vrule
    481690\hspace{3pt}
    482 \subfloat[C generator implementation]{\label{f:CFibonacciSim}\usebox\myboxC}
     691\subfloat[C generated code for \CFA version]{\label{f:CFibonacciSim}\usebox\myboxC}
    483692\caption{Fibonacci (output) asymmetric generator}
    484693\label{f:FibonacciAsymmetricGenerator}
     
    493702};
    494703void ?{}( Fmt & fmt ) { `resume(fmt);` } // constructor
    495 void ^?{}( Fmt & f ) with(f) { $\C[1.75in]{// destructor}$
     704void ^?{}( Fmt & f ) with(f) { $\C[2.25in]{// destructor}$
    496705        if ( g != 0 || b != 0 ) sout | nl; }
    497706void `main( Fmt & f )` with(f) {
     
    499708                for ( ; g < 5; g += 1 ) { $\C{// groups}$
    500709                        for ( ; b < 4; b += 1 ) { $\C{// blocks}$
    501                                 `suspend;` $\C{// wait for character}$
    502                                 while ( ch == '\n' ) `suspend;` // ignore
    503                                 sout | ch;                                              // newline
    504                         } sout | " ";  // block spacer
    505                 } sout | nl; // group newline
     710                                do { `suspend;` $\C{// wait for character}$
     711                                while ( ch == '\n' ); // ignore newline
     712                                sout | ch;                      $\C{// print character}$
     713                        } sout | " ";  $\C{// block separator}$
     714                } sout | nl; $\C{// group separator}$
    506715        }
    507716}
     
    521730\begin{cfa}[aboveskip=0pt,belowskip=0pt]
    522731typedef struct {
    523         void * next;
     732        int `restart`, g, b;
    524733        char ch;
    525         int g, b;
    526734} Fmt;
    527735void comain( Fmt * f ) {
    528         if ( f->next ) goto *f->next;
    529         f->next = &&s1;
     736        `static void * states[] = {&&s0, &&s1};`
     737        `goto *states[f->restart];`
     738  s0: f->`restart` = 1;
    530739        for ( ;; ) {
    531740                for ( f->g = 0; f->g < 5; f->g += 1 ) {
    532741                        for ( f->b = 0; f->b < 4; f->b += 1 ) {
    533                                 return;
    534                           s1:;  while ( f->ch == '\n' ) return;
     742                                do { return;  s1: ;
     743                                } while ( f->ch == '\n' );
    535744                                printf( "%c", f->ch );
    536745                        } printf( " " );
     
    539748}
    540749int main() {
    541         Fmt fmt = { NULL };  comain( &fmt ); // prime
     750        Fmt fmt = { `0` };  comain( &fmt ); // prime
    542751        for ( ;; ) {
    543752                scanf( "%c", &fmt.ch );
     
    550759\end{lrbox}
    551760
    552 \subfloat[\CFA asymmetric generator]{\label{f:CFAFormatGen}\usebox\myboxA}
    553 \hspace{3pt}
     761\subfloat[\CFA]{\label{f:CFAFormatGen}\usebox\myboxA}
     762\hspace{35pt}
    554763\vrule
    555764\hspace{3pt}
    556 \subfloat[C generator simulation]{\label{f:CFormatSim}\usebox\myboxB}
     765\subfloat[C generated code for \CFA version]{\label{f:CFormatGenImpl}\usebox\myboxB}
    557766\hspace{3pt}
    558767\caption{Formatter (input) asymmetric generator}
     
    560769\end{figure}
    561770
    562 Stateful functions appear as generators, coroutines, and threads, where presentations are based on function objects or pointers~\cite{Butenhof97, C++14, MS:VisualC++, BoostCoroutines15}.
    563 For example, Python presents generators as a function object:
    564 \begin{python}
    565 def Gen():
    566         ... `yield val` ...
    567 gen = Gen()
    568 for i in range( 10 ):
    569         print( next( gen ) )
    570 \end{python}
    571 Boost presents coroutines in terms of four functor object-types:
    572 \begin{cfa}
    573 asymmetric_coroutine<>::pull_type
    574 asymmetric_coroutine<>::push_type
    575 symmetric_coroutine<>::call_type
    576 symmetric_coroutine<>::yield_type
    577 \end{cfa}
    578 and many languages present threading using function pointers, @pthreads@~\cite{Butenhof97}, \Csharp~\cite{Csharp}, Go~\cite{Go}, and Scala~\cite{Scala}, \eg pthreads:
    579 \begin{cfa}
    580 void * rtn( void * arg ) { ... }
    581 int i = 3, rc;
    582 pthread_t t; $\C{// thread id}$
    583 `rc = pthread_create( &t, rtn, (void *)i );` $\C{// create and initialized task, type-unsafe input parameter}$
    584 \end{cfa}
    585 % void mycor( pthread_t cid, void * arg ) {
    586 %       int * value = (int *)arg;                               $\C{// type unsafe, pointer-size only}$
    587 %       // thread body
    588 % }
    589 % int main() {
    590 %       int input = 0, output;
    591 %       coroutine_t cid = coroutine_create( &mycor, (void *)&input ); $\C{// type unsafe, pointer-size only}$
    592 %       coroutine_resume( cid, (void *)input, (void **)&output ); $\C{// type unsafe, pointer-size only}$
    593 % }
    594 \CFA's preferred presentation model for generators/coroutines/threads is a hybrid of objects and functions, with an object-oriented flavour.
    595 Essentially, the generator/coroutine/thread function is semantically coupled with a generator/coroutine/thread custom type.
    596 The custom type solves several issues, while accessing the underlying mechanisms used by the custom types is still allowed.
    597 
    598 
    599 \subsection{Generator}
    600 
    601 Stackless generators have the potential to be very small and fast, \ie as small and fast as function call/return for both creation and execution.
    602 The \CFA goal is to achieve this performance target, possibly at the cost of some semantic complexity.
    603 A series of different kinds of generators and their implementation demonstrate how this goal is accomplished.
    604 
    605 Figure~\ref{f:FibonacciAsymmetricGenerator} shows an unbounded asymmetric generator for an infinite sequence of Fibonacci numbers written in C and \CFA, with a simple C implementation for the \CFA version.
     771Figure~\ref{f:FibonacciAsymmetricGenerator} shows an unbounded asymmetric generator for an infinite sequence of Fibonacci numbers written (left to right) in C, \CFA, and showing the underlying C implementation for the \CFA version.
    606772This generator is an \emph{output generator}, producing a new result on each resumption.
    607773To compute Fibonacci, the previous two values in the sequence are retained to generate the next value, \ie @fn1@ and @fn@, plus the execution location where control restarts when the generator is resumed, \ie top or middle.
     
    611777The C version only has the middle execution state because the top execution state is declaration initialization.
    612778Figure~\ref{f:CFAFibonacciGen} shows the \CFA approach, which also has a manual closure, but replaces the structure with a custom \CFA @generator@ type.
    613 This generator type is then connected to a function that \emph{must be named \lstinline|main|},\footnote{
    614 The name \lstinline|main| has special meaning in C, specifically the function where a program starts execution.
    615 Hence, overloading this name for other starting points (generator/coroutine/thread) is a logical extension.}
    616 called a \emph{generator main},which takes as its only parameter a reference to the generator type.
     779Each generator type must have a function named \lstinline|main|,
     780% \footnote{
     781% The name \lstinline|main| has special meaning in C, specifically the function where a program starts execution.
     782% Leveraging starting semantics to this name for generator/coroutine/thread is a logical extension.}
     783called a \emph{generator main} (leveraging the starting semantics for program @main@ in C), which is connected to the generator type via its single reference parameter.
    617784The generator main contains @suspend@ statements that suspend execution without ending the generator versus @return@.
    618 For the Fibonacci generator-main,\footnote{
    619 The \CFA \lstinline|with| opens an aggregate scope making its fields directly accessible, like Pascal \lstinline|with|, but using parallel semantics.
    620 Multiple aggregates may be opened.}
     785For the Fibonacci generator-main,
    621786the top initialization state appears at the start and the middle execution state is denoted by statement @suspend@.
    622787Any local variables in @main@ \emph{are not retained} between calls;
     
    627792Resuming an ended (returned) generator is undefined.
    628793Function @resume@ returns its argument generator so it can be cascaded in an expression, in this case to print the next Fibonacci value @fn@ computed in the generator instance.
    629 Figure~\ref{f:CFibonacciSim} shows the C implementation of the \CFA generator only needs one additional field, @next@, to handle retention of execution state.
    630 The computed @goto@ at the start of the generator main, which branches after the previous suspend, adds very little cost to the resume call.
    631 Finally, an explicit generator type provides both design and performance benefits, such as multiple type-safe interface functions taking and returning arbitrary types.\footnote{
    632 The \CFA operator syntax uses \lstinline|?| to denote operands, which allows precise definitions for pre, post, and infix operators, \eg \lstinline|++?|, \lstinline|?++|, and \lstinline|?+?|, in addition \lstinline|?\{\}| denotes a constructor, as in \lstinline|foo `f` = `\{`...`\}`|, \lstinline|^?\{\}| denotes a destructor, and \lstinline|?()| is \CC function call \lstinline|operator()|.
    633 }%
     794Figure~\ref{f:CFibonacciSim} shows the C implementation of the \CFA asymmetric generator.
     795Only one execution-state field, @restart@, is needed to subscript the suspension points in the generator.
     796At the start of the generator main, the @static@ declaration, @states@, is initialized to the N suspend points in the generator (where operator @&&@ dereferences/references a label~\cite{gccValueLabels}).
     797Next, the computed @goto@ selects the last suspend point and branches to it.
     798The  cost of setting @restart@ and branching via the computed @goto@ adds very little cost to the suspend/resume calls.
     799
     800An advantage of the \CFA explicit generator type is the ability to allow multiple type-safe interface functions taking and returning arbitrary types.
    634801\begin{cfa}
    635802int ?()( Fib & fib ) { return `resume( fib )`.fn; } $\C[3.9in]{// function-call interface}$
    636 int ?()( Fib & fib, int N ) { for ( N - 1 ) `fib()`; return `fib()`; } $\C{// use function-call interface to skip N values}$
    637 double ?()( Fib & fib ) { return (int)`fib()` / 3.14159; } $\C{// different return type, cast prevents recursive call}\CRT$
    638 sout | (int)f1() | (double)f1() | f2( 2 ); // alternative interface, cast selects call based on return type, step 2 values
     803int ?()( Fib & fib, int N ) { for ( N - 1 ) `fib()`; return `fib()`; } $\C{// add parameter to skip N values}$
     804double ?()( Fib & fib ) { return (int)`fib()` / 3.14159; } $\C{// different return type, cast prevents recursive call}$
     805Fib f;  int i;  double d;
     806i = f();  i = f( 2 );  d = f();                                         $\C{// alternative interfaces}\CRT$
    639807\end{cfa}
    640808Now, the generator can be a separately compiled opaque-type only accessed through its interface functions.
    641809For contrast, Figure~\ref{f:PythonFibonacci} shows the equivalent Python Fibonacci generator, which does not use a generator type, and hence only has a single interface, but an implicit closure.
    642810
    643 Having to manually create the generator closure by moving local-state variables into the generator type is an additional programmer burden.
    644 (This restriction is removed by the coroutine in Section~\ref{s:Coroutine}.)
    645 This requirement follows from the generality of variable-size local-state, \eg local state with a variable-length array requires dynamic allocation because the array size is unknown at compile time.
     811\begin{figure}
     812%\centering
     813\newbox\myboxA
     814\begin{lrbox}{\myboxA}
     815\begin{python}[aboveskip=0pt,belowskip=0pt]
     816def Fib():
     817        fn1, fn = 0, 1
     818        while True:
     819                `yield fn1`
     820                fn1, fn = fn, fn1 + fn
     821f1 = Fib()
     822f2 = Fib()
     823for i in range( 10 ):
     824        print( next( f1 ), next( f2 ) )
     825
     826
     827
     828
     829
     830
     831
     832
     833
     834
     835\end{python}
     836\end{lrbox}
     837
     838\newbox\myboxB
     839\begin{lrbox}{\myboxB}
     840\begin{python}[aboveskip=0pt,belowskip=0pt]
     841def Fmt():
     842        try:
     843                while True:                                             $\C[2.5in]{\# until destructor call}$
     844                        for g in range( 5 ):            $\C{\# groups}$
     845                                for b in range( 4 ):    $\C{\# blocks}$
     846                                        while True:
     847                                                ch = (yield)    $\C{\# receive from send}$
     848                                                if '\n' not in ch: $\C{\# ignore newline}$
     849                                                        break
     850                                        print( ch, end='' )     $\C{\# print character}$
     851                                print( '  ', end='' )   $\C{\# block separator}$
     852                        print()                                         $\C{\# group separator}$
     853        except GeneratorExit:                           $\C{\# destructor}$
     854                if g != 0 | b != 0:                             $\C{\# special case}$
     855                        print()
     856fmt = Fmt()
     857`next( fmt )`                                                   $\C{\# prime, next prewritten}$
     858for i in range( 41 ):
     859        `fmt.send( 'a' );`                                      $\C{\# send to yield}$
     860\end{python}
     861\end{lrbox}
     862
     863\hspace{30pt}
     864\subfloat[Fibonacci]{\label{f:PythonFibonacci}\usebox\myboxA}
     865\hspace{3pt}
     866\vrule
     867\hspace{3pt}
     868\subfloat[Formatter]{\label{f:PythonFormatter}\usebox\myboxB}
     869\caption{Python generator}
     870\label{f:PythonGenerator}
     871\end{figure}
     872
     873Having to manually create the generator closure by moving local-state variables into the generator type is an additional programmer burden (removed by the coroutine in Section~\ref{s:Coroutine}).
     874This manual requirement follows from the generality of allowing variable-size local-state, \eg local state with a variable-length array requires dynamic allocation as the array size is unknown at compile time.
    646875However, dynamic allocation significantly increases the cost of generator creation/destruction and is a showstopper for embedded real-time programming.
    647876But more importantly, the size of the generator type is tied to the local state in the generator main, which precludes separate compilation of the generator main, \ie a generator must be inlined or local state must be dynamically allocated.
    648 With respect to safety, we believe static analysis can discriminate local state from temporary variables in a generator, \ie variable usage spanning @suspend@, and generate a compile-time error.
    649 Finally, our current experience is that most generator problems have simple data state, including local state, but complex execution state, so the burden of creating the generator type is small.
     877With respect to safety, we believe static analysis can discriminate persistent generator state from temporary generator-main state and raise a compile-time error for temporary usage spanning suspend points.
     878Our experience using generators is that the problems have simple data state, including local state, but complex execution state, so the burden of creating the generator type is small.
    650879As well, C programmers are not afraid of this kind of semantic programming requirement, if it results in very small, fast generators.
    651880
     
    669898The example takes advantage of resuming a generator in the constructor to prime the loops so the first character sent for formatting appears inside the nested loops.
    670899The destructor provides a newline, if formatted text ends with a full line.
    671 Figure~\ref{f:CFormatSim} shows the C implementation of the \CFA input generator with one additional field and the computed @goto@.
    672 For contrast, Figure~\ref{f:PythonFormatter} shows the equivalent Python format generator with the same properties as the Fibonacci generator.
    673 
    674 Figure~\ref{f:DeviceDriverGen} shows a \emph{killer} asymmetric generator, a device-driver, because device drivers caused 70\%-85\% of failures in Windows/Linux~\cite{Swift05}.
    675 Device drives follow the pattern of simple data state but complex execution state, \ie finite state-machine (FSM) parsing a protocol.
    676 For example, the following protocol:
     900Figure~\ref{f:CFormatGenImpl} shows the C implementation of the \CFA input generator with one additional field and the computed @goto@.
     901For contrast, Figure~\ref{f:PythonFormatter} shows the equivalent Python format generator with the same properties as the format generator.
     902
     903% https://dl-acm-org.proxy.lib.uwaterloo.ca/
     904
     905Figure~\ref{f:DeviceDriverGen} shows an important application for an asymmetric generator, a device-driver, because device drivers are a significant source of operating-system errors: 85\% in Windows XP~\cite[p.~78]{Swift05} and 51.6\% in Linux~\cite[p.~1358,]{Xiao19}. %\cite{Palix11}
     906Swift \etal~\cite[p.~86]{Swift05} restructure device drivers using the Extension Procedure Call (XPC) within the kernel via functions @nooks_driver_call@ and @nooks_kernel_call@, which have coroutine properties context switching to separate stacks with explicit hand-off calls;
     907however, the calls do not retain execution state, and hence always start from the top.
     908The alternative approach for implementing device drivers is using stack-ripping.
     909However, Adya \etal~\cite{Adya02} argue against stack ripping in Section 3.2 and suggest a hybrid approach in Section 4 using cooperatively scheduled \emph{fibers}, which is coroutining.
     910
     911As an example, the following protocol:
    677912\begin{center}
    678913\ldots\, STX \ldots\, message \ldots\, ESC ETX \ldots\, message \ldots\, ETX 2-byte crc \ldots
    679914\end{center}
    680 is a network message beginning with the control character STX, ending with an ETX, and followed by a 2-byte cyclic-redundancy check.
     915is for a simple network message beginning with the control character STX, ending with an ETX, and followed by a 2-byte cyclic-redundancy check.
    681916Control characters may appear in a message if preceded by an ESC.
    682917When a message byte arrives, it triggers an interrupt, and the operating system services the interrupt by calling the device driver with the byte read from a hardware register.
    683 The device driver returns a status code of its current state, and when a complete message is obtained, the operating system knows the message is in the message buffer.
    684 Hence, the device driver is an input/output generator.
    685 
    686 Note, the cost of creating and resuming the device-driver generator, @Driver@, is virtually identical to call/return, so performance in an operating-system kernel is excellent.
    687 As well, the data state is small, where variables @byte@ and @msg@ are communication variables for passing in message bytes and returning the message, and variables @lnth@, @crc@, and @sum@ are local variable that must be retained between calls and are manually hoisted into the generator type.
    688 % Manually, detecting and hoisting local-state variables is easy when the number is small.
    689 In contrast, the execution state is large, with one @resume@ and seven @suspend@s.
    690 Hence, the key benefits of the generator are correctness, safety, and maintenance because the execution states are transcribed directly into the programming language rather than using a table-driven approach.
    691 Because FSMs can be complex and frequently occur in important domains, direct generator support is important in a system programming language.
     918The device driver returns a status code of its current state, and when a complete message is obtained, the operating system read the message accumulated in the supplied buffer.
     919Hence, the device driver is an input/output generator, where the cost of resuming the device-driver generator is the same as call/return, so performance in an operating-system kernel is excellent.
     920The key benefits of using a generator are correctness, safety, and maintenance because the execution states are transcribed directly into the programming language rather than table lookup or stack ripping.
     921The conclusion is that FSMs are complex and occur in important domains, so direct generator support is important in a system programming language.
    692922
    693923\begin{figure}
    694924\centering
    695 \newbox\myboxA
    696 \begin{lrbox}{\myboxA}
    697 \begin{python}[aboveskip=0pt,belowskip=0pt]
    698 def Fib():
    699         fn1, fn = 0, 1
    700         while True:
    701                 `yield fn1`
    702                 fn1, fn = fn, fn1 + fn
    703 f1 = Fib()
    704 f2 = Fib()
    705 for i in range( 10 ):
    706         print( next( f1 ), next( f2 ) )
    707 
    708 
    709 
    710 
    711 
    712 
    713 \end{python}
    714 \end{lrbox}
    715 
    716 \newbox\myboxB
    717 \begin{lrbox}{\myboxB}
    718 \begin{python}[aboveskip=0pt,belowskip=0pt]
    719 def Fmt():
    720         try:
    721                 while True:
    722                         for g in range( 5 ):
    723                                 for b in range( 4 ):
    724                                         print( `(yield)`, end='' )
    725                                 print( '  ', end='' )
    726                         print()
    727         except GeneratorExit:
    728                 if g != 0 | b != 0:
    729                         print()
    730 fmt = Fmt()
    731 `next( fmt )`                    # prime, next prewritten
    732 for i in range( 41 ):
    733         `fmt.send( 'a' );`      # send to yield
    734 \end{python}
    735 \end{lrbox}
    736 \subfloat[Fibonacci]{\label{f:PythonFibonacci}\usebox\myboxA}
    737 \hspace{3pt}
    738 \vrule
    739 \hspace{3pt}
    740 \subfloat[Formatter]{\label{f:PythonFormatter}\usebox\myboxB}
    741 \caption{Python generator}
    742 \label{f:PythonGenerator}
    743 
    744 \bigskip
    745 
    746925\begin{tabular}{@{}l|l@{}}
    747926\begin{cfa}[aboveskip=0pt,belowskip=0pt]
     
    750929`generator` Driver {
    751930        Status status;
    752         unsigned char byte, * msg; // communication
    753         unsigned int lnth, sum;      // local state
    754         unsigned short int crc;
     931        char byte, * msg; // communication
     932        int lnth, sum;      // local state
     933        short int crc;
    755934};
    756935void ?{}( Driver & d, char * m ) { d.msg = m; }
     
    800979(The trivial cycle is a generator resuming itself.)
    801980This control flow is similar to recursion for functions but without stack growth.
    802 The steps for symmetric control-flow are creating, executing, and terminating the cycle.
     981Figure~\ref{f:PingPongFullCoroutineSteps} shows the steps for symmetric control-flow are creating, executing, and terminating the cycle.
    803982Constructing the cycle must deal with definition-before-use to close the cycle, \ie, the first generator must know about the last generator, which is not within scope.
    804983(This issue occurs for any cyclic data structure.)
    805 % The example creates all the generators and then assigns the partners that form the cycle.
    806 % Alternatively, the constructor can assign the partners as they are declared, except the first, and the first-generator partner is set after the last generator declaration to close the cycle.
    807 Once the cycle is formed, the program main resumes one of the generators, and the generators can then traverse an arbitrary cycle using @resume@ to activate partner generator(s).
     984The example creates the generators, @ping@/@pong@, and then assigns the partners that form the cycle.
     985% (Alternatively, the constructor can assign the partners as they are declared, except the first, and the first-generator partner is set after the last generator declaration to close the cycle.)
     986Once the cycle is formed, the program main resumes one of the generators, @ping@, and the generators can then traverse an arbitrary cycle using @resume@ to activate partner generator(s).
    808987Terminating the cycle is accomplished by @suspend@ or @return@, both of which go back to the stack frame that started the cycle (program main in the example).
     988Note, the creator and starter may be different, \eg if the creator calls another function that starts the cycle.
    809989The starting stack-frame is below the last active generator because the resume/resume cycle does not grow the stack.
    810 Also, since local variables are not retained in the generator function, it does not contain any objects with destructors that must be called, so the  cost is the same as a function return.
    811 Destructor cost occurs when the generator instance is deallocated, which is easily controlled by the programmer.
    812 
    813 Figure~\ref{f:CPingPongSim} shows the implementation of the symmetric generator, where the complexity is the @resume@, which needs an extension to the calling convention to perform a forward rather than backward jump.
    814 This jump-starts at the top of the next generator main to re-execute the normal calling convention to make space on the stack for its local variables.
    815 However, before the jump, the caller must reset its stack (and any registers) equivalent to a @return@, but subsequently jump forward.
    816 This semantics is basically a tail-call optimization, which compilers already perform.
    817 The example shows the assembly code to undo the generator's entry code before the direct jump.
    818 This assembly code depends on what entry code is generated, specifically if there are local variables and the level of optimization.
    819 To provide this new calling convention requires a mechanism built into the compiler, which is beyond the scope of \CFA at this time.
    820 Nevertheless, it is possible to hand generate any symmetric generators for proof of concept and performance testing.
    821 A compiler could also eliminate other artifacts in the generator simulation to further increase performance, \eg LLVM has various coroutine support~\cite{CoroutineTS}, and \CFA can leverage this support should it fork @clang@.
     990Also, since local variables are not retained in the generator function, there are no objects with destructors to be called, so the cost is the same as a function return.
     991Destructor cost occurs when the generator instance is deallocated by the creator.
    822992
    823993\begin{figure}
     
    826996\begin{cfa}[aboveskip=0pt,belowskip=0pt]
    827997`generator PingPong` {
     998        int N, i;                               // local state
    828999        const char * name;
    829         int N;
    830         int i;                          // local state
    8311000        PingPong & partner; // rebindable reference
    8321001};
    8331002
    8341003void `main( PingPong & pp )` with(pp) {
     1004
     1005
    8351006        for ( ; i < N; i += 1 ) {
    8361007                sout | name | i;
     
    8501021\begin{cfa}[escapechar={},aboveskip=0pt,belowskip=0pt]
    8511022typedef struct PingPong {
     1023        int restart, N, i;
    8521024        const char * name;
    853         int N, i;
    8541025        struct PingPong * partner;
    855         void * next;
    8561026} PingPong;
    857 #define PPCtor(name, N) {name,N,0,NULL,NULL}
     1027#define PPCtor(name, N) {0, N, 0, name, NULL}
    8581028void comain( PingPong * pp ) {
    859         if ( pp->next ) goto *pp->next;
    860         pp->next = &&cycle;
     1029        static void * states[] = {&&s0, &&s1};
     1030        goto *states[pp->restart];
     1031  s0: pp->restart = 1;
    8611032        for ( ; pp->i < pp->N; pp->i += 1 ) {
    8621033                printf( "%s %d\n", pp->name, pp->i );
    8631034                asm( "mov  %0,%%rdi" : "=m" (pp->partner) );
    8641035                asm( "mov  %rdi,%rax" );
    865                 asm( "popq %rbx" );
     1036                asm( "add  $16, %rsp" );
     1037                asm( "popq %rbp" );
    8661038                asm( "jmp  comain" );
    867           cycle: ;
     1039          s1: ;
    8681040        }
    8691041}
     
    8811053\end{figure}
    8821054
    883 Finally, part of this generator work was inspired by the recent \CCtwenty generator proposal~\cite{C++20Coroutine19} (which they call coroutines).
     1055\begin{figure}
     1056\centering
     1057\input{FullCoroutinePhases.pstex_t}
     1058\vspace*{-10pt}
     1059\caption{Symmetric coroutine steps: Ping / Pong}
     1060\label{f:PingPongFullCoroutineSteps}
     1061\end{figure}
     1062
     1063Figure~\ref{f:CPingPongSim} shows the C implementation of the \CFA symmetric generator, where there is still only one additional field, @restart@, but @resume@ is more complex because it does a forward rather than backward jump.
     1064Before the jump, the parameter for the next call @partner@ is placed into the register used for the first parameter, @rdi@, and the remaining registers are reset for a return.
     1065The @jmp comain@ restarts the function but with a different parameter, so the new call's behaviour depends on the state of the coroutine type, i.e., branch to restart location with different data state.
     1066While the semantics of call forward is a tail-call optimization, which compilers perform, the generator state is different on each call rather a common state for a tail-recursive function (i.e., the parameter to the function never changes during the forward calls.
     1067However, this assembler code depends on what entry code is generated, specifically if there are local variables and the level of optimization.
     1068Hence, internal compiler support is necessary for any forward call (or backwards return), \eg LLVM has various coroutine support~\cite{CoroutineTS}, and \CFA can leverage this support should it eventually fork @clang@.
     1069For this reason, \CFA does not support general symmetric generators at this time, but, it is possible to hand generate any symmetric generators (as in Figure~\ref{f:CPingPongSim}) for proof of concept and performance testing.
     1070
     1071Finally, part of this generator work was inspired by the recent \CCtwenty coroutine proposal~\cite{C++20Coroutine19}, which uses the general term coroutine to mean generator.
    8841072Our work provides the same high-performance asymmetric generators as \CCtwenty, and extends their work with symmetric generators.
    8851073An additional \CCtwenty generator feature allows @suspend@ and @resume@ to be followed by a restricted compound statement that is executed after the current generator has reset its stack but before calling the next generator, specified with \CFA syntax:
     
    8961084\label{s:Coroutine}
    8971085
    898 Stackful coroutines extend generator semantics, \ie there is an implicit closure and @suspend@ may appear in a helper function called from the coroutine main.
     1086Stackful coroutines (Table~\ref{t:ExecutionPropertyComposition} case 5) extend generator semantics, \ie there is an implicit closure and @suspend@ may appear in a helper function called from the coroutine main.
    8991087A coroutine is specified by replacing @generator@ with @coroutine@ for the type.
    900 Coroutine generality results in higher cost for creation, due to dynamic stack allocation, execution, due to context switching among stacks, and terminating, due to possible stack unwinding and dynamic stack deallocation.
     1088Coroutine generality results in higher cost for creation, due to dynamic stack allocation, for execution, due to context switching among stacks, and for terminating, due to possible stack unwinding and dynamic stack deallocation.
    9011089A series of different kinds of coroutines and their implementations demonstrate how coroutines extend generators.
    9021090
    9031091First, the previous generator examples are converted to their coroutine counterparts, allowing local-state variables to be moved from the generator type into the coroutine main.
    904 \begin{description}
    905 \item[Fibonacci]
    906 Move the declaration of @fn1@ to the start of coroutine main.
     1092\begin{center}
     1093\begin{tabular}{@{}l|l|l|l@{}}
     1094\multicolumn{1}{c|}{Fibonacci} & \multicolumn{1}{c|}{Formatter} & \multicolumn{1}{c|}{Device Driver} & \multicolumn{1}{c}{PingPong} \\
     1095\hline
    9071096\begin{cfa}[xleftmargin=0pt]
    908 void main( Fib & fib ) with(fib) {
     1097void main( Fib & fib ) ...
    9091098        `int fn1;`
    910 \end{cfa}
    911 \item[Formatter]
    912 Move the declaration of @g@ and @b@ to the for loops in the coroutine main.
     1099
     1100
     1101\end{cfa}
     1102&
    9131103\begin{cfa}[xleftmargin=0pt]
    9141104for ( `g`; 5 ) {
    9151105        for ( `b`; 4 ) {
    916 \end{cfa}
    917 \item[Device Driver]
    918 Move the declaration of @lnth@ and @sum@ to their points of initialization.
     1106
     1107
     1108\end{cfa}
     1109&
    9191110\begin{cfa}[xleftmargin=0pt]
    920         status = CONT;
    921         `unsigned int lnth = 0, sum = 0;`
    922         ...
    923         `unsigned short int crc = byte << 8;`
    924 \end{cfa}
    925 \item[PingPong]
    926 Move the declaration of @i@ to the for loop in the coroutine main.
     1111status = CONT;
     1112`int lnth = 0, sum = 0;`
     1113...
     1114`short int crc = byte << 8;`
     1115\end{cfa}
     1116&
    9271117\begin{cfa}[xleftmargin=0pt]
    928 void main( PingPong & pp ) with(pp) {
     1118void main( PingPong & pp ) ...
    9291119        for ( `i`; N ) {
    930 \end{cfa}
    931 \end{description}
     1120
     1121
     1122\end{cfa}
     1123\end{tabular}
     1124\end{center}
    9321125It is also possible to refactor code containing local-state and @suspend@ statements into a helper function, like the computation of the CRC for the device driver.
    9331126\begin{cfa}
    934 unsigned int Crc() {
     1127int Crc() {
    9351128        `suspend;`
    936         unsigned short int crc = byte << 8;
     1129        short int crc = byte << 8;
    9371130        `suspend;`
    9381131        status = (crc | byte) == sum ? MSG : ECRC;
     
    9451138
    9461139\begin{comment}
    947 Figure~\ref{f:Coroutine3States} creates a @coroutine@ type, @`coroutine` Fib { int fn; }@, which provides communication, @fn@, for the \newterm{coroutine main}, @main@, which runs on the coroutine stack, and possibly multiple interface functions, \eg @next@.
     1140Figure~\ref{f:Coroutine3States} creates a @coroutine@ type, @`coroutine` Fib { int fn; }@, which provides communication, @fn@, for the \newterm{coroutine main}, @main@, which runs on the coroutine stack, and possibly multiple interface functions, \eg @restart@.
    9481141Like the structure in Figure~\ref{f:ExternalState}, the coroutine type allows multiple instances, where instances of this type are passed to the (overloaded) coroutine main.
    9491142The coroutine main's stack holds the state for the next generation, @f1@ and @f2@, and the code represents the three states in the Fibonacci formula via the three suspend points, to context switch back to the caller's @resume@.
    950 The interface function @next@, takes a Fibonacci instance and context switches to it using @resume@;
     1143The interface function @restart@, takes a Fibonacci instance and context switches to it using @resume@;
    9511144on restart, the Fibonacci field, @fn@, contains the next value in the sequence, which is returned.
    9521145The first @resume@ is special because it allocates the coroutine stack and cocalls its coroutine main on that stack;
     
    11141307\begin{figure}
    11151308\centering
    1116 \lstset{language=CFA,escapechar={},moredelim=**[is][\protect\color{red}]{`}{`}}% allow $
    11171309\begin{tabular}{@{}l@{\hspace{2\parindentlnth}}l@{}}
    11181310\begin{cfa}
    11191311`coroutine` Prod {
    1120         Cons & c;                       // communication
     1312        Cons & c;                       $\C[1.5in]{// communication}$
    11211313        int N, money, receipt;
    11221314};
    11231315void main( Prod & prod ) with( prod ) {
    1124         // 1st resume starts here
    1125         for ( i; N ) {
     1316        for ( i; N ) {          $\C{// 1st resume}\CRT$
    11261317                int p1 = random( 100 ), p2 = random( 100 );
    1127                 sout | p1 | " " | p2;
    11281318                int status = delivery( c, p1, p2 );
    1129                 sout | " $" | money | nl | status;
    11301319                receipt += 1;
    11311320        }
    11321321        stop( c );
    1133         sout | "prod stops";
    11341322}
    11351323int payment( Prod & prod, int money ) {
     
    11521340\begin{cfa}
    11531341`coroutine` Cons {
    1154         Prod & p;                       // communication
     1342        Prod & p;                       $\C[1.5in]{// communication}$
    11551343        int p1, p2, status;
    11561344        bool done;
    11571345};
    11581346void ?{}( Cons & cons, Prod & p ) {
    1159         &cons.p = &p; // reassignable reference
     1347        &cons.p = &p;           $\C{// reassignable reference}$
    11601348        cons.[status, done ] = [0, false];
    11611349}
    11621350void main( Cons & cons ) with( cons ) {
    1163         // 1st resume starts here
    1164         int money = 1, receipt;
     1351        int money = 1, receipt; $\C{// 1st resume}\CRT$
    11651352        for ( ; ! done; ) {
    1166                 sout | p1 | " " | p2 | nl | " $" | money;
    11671353                status += 1;
    11681354                receipt = payment( p, money );
    1169                 sout | " #" | receipt;
    11701355                money += 1;
    11711356        }
    1172         sout | "cons stops";
    11731357}
    11741358int delivery( Cons & cons, int p1, int p2 ) {
     
    11911375This example is illustrative because both producer/consumer have two interface functions with @resume@s that suspend execution in these interface (helper) functions.
    11921376The program main creates the producer coroutine, passes it to the consumer coroutine in its initialization, and closes the cycle at the call to @start@ along with the number of items to be produced.
    1193 The first @resume@ of @prod@ creates @prod@'s stack with a frame for @prod@'s coroutine main at the top, and context switches to it.
    1194 @prod@'s coroutine main starts, creates local-state variables that are retained between coroutine activations, and executes $N$ iterations, each generating two random values, calling the consumer to deliver the values, and printing the status returned from the consumer.
    1195 
     1377The call to @start@ is the first @resume@ of @prod@, which remembers the program main as the starter and creates @prod@'s stack with a frame for @prod@'s coroutine main at the top, and context switches to it.
     1378@prod@'s coroutine main starts, creates local-state variables that are retained between coroutine activations, and executes $N$ iterations, each generating two random values, calling the consumer's @deliver@ function to transfer the values, and printing the status returned from the consumer.
    11961379The producer call to @delivery@ transfers values into the consumer's communication variables, resumes the consumer, and returns the consumer status.
    1197 On the first resume, @cons@'s stack is created and initialized, holding local-state variables retained between subsequent activations of the coroutine.
    1198 The consumer iterates until the @done@ flag is set, prints the values delivered by the producer, increments status, and calls back to the producer via @payment@, and on return from @payment@, prints the receipt from the producer and increments @money@ (inflation).
    1199 The call from the consumer to @payment@ introduces the cycle between producer and consumer.
    1200 When @payment@ is called, the consumer copies values into the producer's communication variable and a resume is executed.
    1201 The context switch restarts the producer at the point where it last context switched, so it continues in @delivery@ after the resume.
    1202 @delivery@ returns the status value in @prod@'s coroutine main, where the status is printed.
    1203 The loop then repeats calling @delivery@, where each call resumes the consumer coroutine.
    1204 The context switch to the consumer continues in @payment@.
    1205 The consumer increments and returns the receipt to the call in @cons@'s coroutine main.
    1206 The loop then repeats calling @payment@, where each call resumes the producer coroutine.
     1380Similarly on the first resume, @cons@'s stack is created and initialized, holding local-state variables retained between subsequent activations of the coroutine.
     1381The symmetric coroutine cycle forms when the consumer calls the producer's @payment@ function, which resumes the producer in the consumer's delivery function.
     1382When the producer calls @delivery@ again, it resumes the consumer in the @payment@ function.
     1383Both interface function than return to the their corresponding coroutine-main functions for the next cycle.
    12071384Figure~\ref{f:ProdConsRuntimeStacks} shows the runtime stacks of the program main, and the coroutine mains for @prod@ and @cons@ during the cycling.
     1385As a consequence of a coroutine retaining its last resumer for suspending back, these reverse pointers allow @suspend@ to cycle \emph{backwards} around a symmetric coroutine cycle.
    12081386
    12091387\begin{figure}
     
    12141392\caption{Producer / consumer runtime stacks}
    12151393\label{f:ProdConsRuntimeStacks}
    1216 
    1217 \medskip
    1218 
    1219 \begin{center}
    1220 \input{FullCoroutinePhases.pstex_t}
    1221 \end{center}
    1222 \vspace*{-10pt}
    1223 \caption{Ping / Pong coroutine steps}
    1224 \label{f:PingPongFullCoroutineSteps}
    12251394\end{figure}
    12261395
    12271396Terminating a coroutine cycle is more complex than a generator cycle, because it requires context switching to the program main's \emph{stack} to shutdown the program, whereas generators started by the program main run on its stack.
    1228 Furthermore, each deallocated coroutine must guarantee all destructors are run for object allocated in the coroutine type \emph{and} allocated on the coroutine's stack at the point of suspension, which can be arbitrarily deep.
    1229 When a coroutine's main ends, its stack is already unwound so any stack allocated objects with destructors have been finalized.
     1397Furthermore, each deallocated coroutine must execute all destructors for object allocated in the coroutine type \emph{and} allocated on the coroutine's stack at the point of suspension, which can be arbitrarily deep.
     1398In the example, termination begins with the producer's loop stopping after N iterations and calling the consumer's @stop@ function, which sets the @done@ flag, resumes the consumer in function @payment@, terminating the call, and the consumer's loop in its coroutine main.
     1399% (Not shown is having @prod@ raise a nonlocal @stop@ exception at @cons@ after it finishes generating values and suspend back to @cons@, which catches the @stop@ exception to terminate its loop.)
     1400When the consumer's main ends, its stack is already unwound so any stack allocated objects with destructors are finalized.
     1401The question now is where does control continue?
     1402
    12301403The na\"{i}ve semantics for coroutine-cycle termination is to context switch to the last resumer, like executing a @suspend@/@return@ in a generator.
    12311404However, for coroutines, the last resumer is \emph{not} implicitly below the current stack frame, as for generators, because each coroutine's stack is independent.
    12321405Unfortunately, it is impossible to determine statically if a coroutine is in a cycle and unrealistic to check dynamically (graph-cycle problem).
    12331406Hence, a compromise solution is necessary that works for asymmetric (acyclic) and symmetric (cyclic) coroutines.
    1234 
    1235 Our solution is to context switch back to the first resumer (starter) once the coroutine ends.
     1407Our solution is to retain a coroutine's starter (first resumer), and context switch back to the starter when the coroutine ends.
     1408Hence, the consumer restarts its first resumer, @prod@, in @stop@, and when the producer ends, it restarts its first resumer, program main, in @start@ (see dashed lines from the end of the coroutine mains in Figure~\ref{f:ProdConsRuntimeStacks}).
    12361409This semantics works well for the most common asymmetric and symmetric coroutine usage patterns.
    1237 For asymmetric coroutines, it is common for the first resumer (starter) coroutine to be the only resumer.
    1238 All previous generators converted to coroutines have this property.
    1239 For symmetric coroutines, it is common for the cycle creator to persist for the lifetime of the cycle.
    1240 Hence, the starter coroutine is remembered on the first resume and ending the coroutine resumes the starter.
    1241 Figure~\ref{f:ProdConsRuntimeStacks} shows this semantic by the dashed lines from the end of the coroutine mains: @prod@ starts @cons@ so @cons@ resumes @prod@ at the end, and the program main starts @prod@ so @prod@ resumes the program main at the end.
     1410For asymmetric coroutines, it is common for the first resumer (starter) coroutine to be the only resumer;
     1411for symmetric coroutines, it is common for the cycle creator to persist for the lifetime of the cycle.
    12421412For other scenarios, it is always possible to devise a solution with additional programming effort, such as forcing the cycle forward (backward) to a safe point before starting termination.
    12431413
    1244 The producer/consumer example does not illustrate the full power of the starter semantics because @cons@ always ends first.
    1245 Assume generator @PingPong@ is converted to a coroutine.
    1246 Figure~\ref{f:PingPongFullCoroutineSteps} shows the creation, starter, and cyclic execution steps of the coroutine version.
    1247 The program main creates (declares) coroutine instances @ping@ and @pong@.
    1248 Next, program main resumes @ping@, making it @ping@'s starter, and @ping@'s main resumes @pong@'s main, making it @pong@'s starter.
    1249 Execution forms a cycle when @pong@ resumes @ping@, and cycles $N$ times.
    1250 By adjusting $N$ for either @ping@/@pong@, it is possible to have either one finish first, instead of @pong@ always ending first.
    1251 If @pong@ ends first, it resumes its starter @ping@ in its coroutine main, then @ping@ ends and resumes its starter the program main in function @start@.
    1252 If @ping@ ends first, it resumes its starter the program main in function @start@.
    1253 Regardless of the cycle complexity, the starter stack always leads back to the program main, but the stack can be entered at an arbitrary point.
    1254 Once back at the program main, coroutines @ping@ and @pong@ are deallocated.
    1255 For generators, deallocation runs the destructors for all objects in the generator type.
    1256 For coroutines, deallocation deals with objects in the coroutine type and must also run the destructors for any objects pending on the coroutine's stack for any unterminated coroutine.
    1257 Hence, if a coroutine's destructor detects the coroutine is not ended, it implicitly raises a cancellation exception (uncatchable exception) at the coroutine and resumes it so the cancellation exception can propagate to the root of the coroutine's stack destroying all local variable on the stack.
    1258 So the \CFA semantics for the generator and coroutine, ensure both can be safely deallocated at any time, regardless of their current state, like any other aggregate object.
    1259 Explicitly raising normal exceptions at another coroutine can replace flag variables, like @stop@, \eg @prod@ raises a @stop@ exception at @cons@ after it finishes generating values and resumes @cons@, which catches the @stop@ exception to terminate its loop.
    1260 
    1261 Finally, there is an interesting effect for @suspend@ with symmetric coroutines.
    1262 A coroutine must retain its last resumer to suspend back because the resumer is on a different stack.
    1263 These reverse pointers allow @suspend@ to cycle \emph{backwards}, which may be useful in certain cases.
    1264 However, there is an anomaly if a coroutine resumes itself, because it overwrites its last resumer with itself, losing the ability to resume the last external resumer.
    1265 To prevent losing this information, a self-resume does not overwrite the last resumer.
     1414Note, the producer/consumer example does not illustrate the full power of the starter semantics because @cons@ always ends first.
     1415Assume generator @PingPong@ in Figure~\ref{f:PingPongSymmetricGenerator} is converted to a coroutine.
     1416Unlike generators, coroutines have a starter structure with multiple levels, where the program main starts @ping@ and @ping@ starts @pong@.
     1417By adjusting $N$ for either @ping@/@pong@, it is possible to have either finish first.
     1418If @pong@ ends first, it resumes its starter @ping@ in its coroutine main, then @ping@ ends and resumes its starter the program main on return;
     1419if @ping@ ends first, it resumes its starter the program main on return.
     1420Regardless of the cycle complexity, the starter structure always leads back to the program main, but the path can be entered at an arbitrary point.
     1421Once back at the program main (creator), coroutines @ping@ and @pong@ are deallocated, runnning any destructors for objects within the coroutine and possibly deallocating any coroutine stacks for non-terminated coroutines, where stack deallocation implies stack unwinding to find destructors for allocated objects on the stack.
     1422Hence, the \CFA termination semantics for the generator and coroutine ensure correct deallocation semnatics, regardless of the coroutine's state (terminated or active), like any other aggregate object.
    12661423
    12671424
     
    12941451Users wanting to extend custom types or build their own can only do so in ways offered by the language.
    12951452Furthermore, implementing custom types without language support may display the power of a programming language.
    1296 \CFA blends the two approaches, providing custom type for idiomatic \CFA code, while extending and building new custom types is still possible, similar to Java concurrency with builtin and library.
     1453\CFA blends the two approaches, providing custom type for idiomatic \CFA code, while extending and building new custom types is still possible, similar to Java concurrency with builtin and library (@java.util.concurrent@) monitors.
    12971454
    12981455Part of the mechanism to generalize custom types is the \CFA trait~\cite[\S~2.3]{Moss18}, \eg the definition for custom-type @coroutine@ is anything satisfying the trait @is_coroutine@, and this trait both enforces and restricts the coroutine-interface functions.
     
    13041461forall( `dtype` T | is_coroutine(T) ) void $suspend$( T & ), resume( T & );
    13051462\end{cfa}
    1306 Note, copying generators/coroutines/threads is not meaningful.
    1307 For example, both the resumer and suspender descriptors can have bidirectional pointers;
    1308 copying these coroutines does not update the internal pointers so behaviour of both copies would be difficult to understand.
    1309 Furthermore, two coroutines cannot logically execute on the same stack.
    1310 A deep coroutine copy, which copies the stack, is also meaningless in an unmanaged language (no garbage collection), like C, because the stack may contain pointers to object within it that require updating for the copy.
     1463Note, copying generators/coroutines/threads is undefined because muliple objects cannot execute on a shared stack and stack copying does not work in unmanaged languages (no garbage collection), like C, because the stack may contain pointers to objects within it that require updating for the copy.
    13111464The \CFA @dtype@ property provides no \emph{implicit} copying operations and the @is_coroutine@ trait provides no \emph{explicit} copying operations, so all coroutines must be passed by reference (pointer).
    13121465The function definitions ensure there is a statically typed @main@ function that is the starting point (first stack frame) of a coroutine, and a mechanism to get (read) the coroutine descriptor from its handle.
     
    13521505The combination of custom types and fundamental @trait@ description of these types allows a concise specification for programmers and tools, while more advanced programmers can have tighter control over memory layout and initialization.
    13531506
    1354 Figure~\ref{f:CoroutineMemoryLayout} shows different memory-layout options for a coroutine (where a task is similar).
     1507Figure~\ref{f:CoroutineMemoryLayout} shows different memory-layout options for a coroutine (where a thread is similar).
    13551508The coroutine handle is the @coroutine@ instance containing programmer specified type global/communication variables across interface functions.
    13561509The coroutine descriptor contains all implicit declarations needed by the runtime, \eg @suspend@/@resume@, and can be part of the coroutine handle or separate.
    13571510The coroutine stack can appear in a number of locations and be fixed or variable sized.
    1358 Hence, the coroutine's stack could be a VLS\footnote{
    1359 We are examining variable-sized structures (VLS), where fields can be variable-sized structures or arrays.
     1511Hence, the coroutine's stack could be a variable-length structure (VLS)\footnote{
     1512We are examining VLSs, where fields can be variable-sized structures or arrays.
    13601513Once allocated, a VLS is fixed sized.}
    13611514on the allocating stack, provided the allocating stack is large enough.
    13621515For a VLS stack allocation/deallocation is an inexpensive adjustment of the stack pointer, modulo any stack constructor costs (\eg initial frame setup).
    1363 For heap stack allocation, allocation/deallocation is an expensive heap allocation (where the heap can be a shared resource), modulo any stack constructor costs.
    1364 With heap stack allocation, it is also possible to use a split (segmented) stack calling convention, available with gcc and clang, so the stack is variable sized.
     1516For stack allocation in the heap, allocation/deallocation is an expensive allocation, where the heap can be a shared resource, modulo any stack constructor costs.
     1517It is also possible to use a split (segmented) stack calling convention, available with gcc and clang, allowing a variable-sized stack via a set of connected blocks in the heap.
    13651518Currently, \CFA supports stack/heap allocated descriptors but only fixed-sized heap allocated stacks.
    13661519In \CFA debug-mode, the fixed-sized stack is terminated with a write-only page, which catches most stack overflows.
    13671520Experience teaching concurrency with \uC~\cite{CS343} shows fixed-sized stacks are rarely an issue for students.
    1368 Split-stack allocation is under development but requires recompilation of legacy code, which may be impossible.
     1521Split-stack allocation is under development but requires recompilation of legacy code, which is not always possible.
    13691522
    13701523\begin{figure}
     
    13801533
    13811534Concurrency is nondeterministic scheduling of independent sequential execution paths (threads), where each thread has its own stack.
    1382 A single thread with multiple call stacks, \newterm{coroutining}~\cite{Conway63,Marlin80}, does \emph{not} imply concurrency~\cite[\S~2]{Buhr05a}.
    1383 In coroutining, coroutines self-schedule the thread across stacks so execution is deterministic.
     1535A single thread with multiple stacks, \ie coroutining, does \emph{not} imply concurrency~\cite[\S~3]{Buhr05a}.
     1536Coroutining self-schedule the thread across stacks so execution is deterministic.
    13841537(It is \emph{impossible} to generate a concurrency error when coroutining.)
    1385 However, coroutines are a stepping stone towards concurrency.
    1386 
    1387 The transition to concurrency, even for a single thread with multiple stacks, occurs when coroutines context switch to a \newterm{scheduling coroutine}, introducing non-determinism from the coroutine perspective~\cite[\S~3,]{Buhr05a}.
     1538
     1539The transition to concurrency, even for a single thread with multiple stacks, occurs when coroutines context switch to a \newterm{scheduling coroutine}, introducing non-determinism from the coroutine perspective~\cite[\S~3]{Buhr05a}.
    13881540Therefore, a minimal concurrency system requires coroutines \emph{in conjunction with a nondeterministic scheduler}.
    1389 The resulting execution system now follows a cooperative threading model~\cite{Adya02,libdill}, called \newterm{non-preemptive scheduling}.
    1390 Adding \newterm{preemption} introduces non-cooperative scheduling, where context switching occurs randomly between any two instructions often based on a timer interrupt, called \newterm{preemptive scheduling}.
    1391 While a scheduler introduces uncertain execution among explicit context switches, preemption introduces uncertainty by introducing implicit context switches.
     1541The resulting execution system now follows a cooperative threading-model~\cite{Adya02,libdill} because context-switching points to the scheduler (blocking) are known, but the next unblocking point is unknown due to the scheduler.
     1542Adding \newterm{preemption} introduces \newterm{non-cooperative} or \newterm{preemptive} scheduling, where context switching points to the scheduler are unknown as they can occur randomly between any two instructions often based on a timer interrupt.
    13921543Uncertainty gives the illusion of parallelism on a single processor and provides a mechanism to access and increase performance on multiple processors.
    13931544The reason is that the scheduler/runtime have complete knowledge about resources and how to best utilized them.
    1394 However, the introduction of unrestricted nondeterminism results in the need for \newterm{mutual exclusion} and \newterm{synchronization}, which restrict nondeterminism for correctness;
     1545However, the introduction of unrestricted nondeterminism results in the need for \newterm{mutual exclusion} and \newterm{synchronization}~\cite[\S~4]{Buhr05a}, which restrict nondeterminism for correctness;
    13951546otherwise, it is impossible to write meaningful concurrent programs.
    13961547Optimal concurrent performance is often obtained by having as much nondeterminism as mutual exclusion and synchronization correctness allow.
    13971548
    1398 A scheduler can either be a stackless or stackful.
     1549A scheduler can also be stackless or stackful.
    13991550For stackless, the scheduler performs scheduling on the stack of the current coroutine and switches directly to the next coroutine, so there is one context switch.
    14001551For stackful, the current coroutine switches to the scheduler, which performs scheduling, and it then switches to the next coroutine, so there are two context switches.
     
    14051556\label{s:threads}
    14061557
    1407 Threading needs the ability to start a thread and wait for its completion.
     1558Threading (Table~\ref{t:ExecutionPropertyComposition} case 11) needs the ability to start a thread and wait for its completion.
    14081559A common API for this ability is @fork@ and @join@.
    1409 \begin{cquote}
    1410 \begin{tabular}{@{}lll@{}}
    1411 \multicolumn{1}{c}{\textbf{Java}} & \multicolumn{1}{c}{\textbf{\Celeven}} & \multicolumn{1}{c}{\textbf{pthreads}} \\
    1412 \begin{cfa}
    1413 class MyTask extends Thread {...}
    1414 mytask t = new MyTask(...);
     1560\vspace{4pt}
     1561\par\noindent
     1562\begin{tabular}{@{}l|l|l@{}}
     1563\multicolumn{1}{c|}{\textbf{Java}} & \multicolumn{1}{c|}{\textbf{\Celeven}} & \multicolumn{1}{c}{\textbf{pthreads}} \\
     1564\hline
     1565\begin{cfa}
     1566class MyThread extends Thread {...}
     1567mythread t = new MyThread(...);
    14151568`t.start();` // start
    14161569// concurrency
     
    14191572&
    14201573\begin{cfa}
    1421 class MyTask { ... } // functor
    1422 MyTask mytask;
    1423 `thread t( mytask, ... );` // start
     1574class MyThread { ... } // functor
     1575MyThread mythread;
     1576`thread t( mythread, ... );` // start
    14241577// concurrency
    14251578`t.join();` // wait
     
    14341587\end{cfa}
    14351588\end{tabular}
    1436 \end{cquote}
     1589\vspace{1pt}
     1590\par\noindent
    14371591\CFA has a simpler approach using a custom @thread@ type and leveraging declaration semantics (allocation/deallocation), where threads implicitly @fork@ after construction and @join@ before destruction.
    14381592\begin{cfa}
    1439 thread MyTask {};
    1440 void main( MyTask & this ) { ... }
     1593thread MyThread {};
     1594void main( MyThread & this ) { ... }
    14411595int main() {
    1442         MyTask team`[10]`; $\C[2.5in]{// allocate stack-based threads, implicit start after construction}$
     1596        MyThread team`[10]`; $\C[2.5in]{// allocate stack-based threads, implicit start after construction}$
    14431597        // concurrency
    14441598} $\C{// deallocate stack-based threads, implicit joins before destruction}$
     
    14481602Arbitrary topologies are possible using dynamic allocation, allowing threads to outlive their declaration scope, identical to normal dynamic allocation.
    14491603\begin{cfa}
    1450 MyTask * factory( int N ) { ... return `anew( N )`; } $\C{// allocate heap-based threads, implicit start after construction}$
     1604MyThread * factory( int N ) { ... return `anew( N )`; } $\C{// allocate heap-based threads, implicit start after construction}$
    14511605int main() {
    1452         MyTask * team = factory( 10 );
     1606        MyThread * team = factory( 10 );
    14531607        // concurrency
    14541608        `delete( team );` $\C{// deallocate heap-based threads, implicit joins before destruction}\CRT$
     
    14961650
    14971651Threads in \CFA are user level run by runtime kernel threads (see Section~\ref{s:CFARuntimeStructure}), where user threads provide concurrency and kernel threads provide parallelism.
    1498 Like coroutines, and for the same design reasons, \CFA provides a custom @thread@ type and a @trait@ to enforce and restrict the task-interface functions.
     1652Like coroutines, and for the same design reasons, \CFA provides a custom @thread@ type and a @trait@ to enforce and restrict the thread-interface functions.
    14991653\begin{cquote}
    15001654\begin{tabular}{@{}c@{\hspace{3\parindentlnth}}c@{}}
     
    15271681\label{s:MutualExclusionSynchronization}
    15281682
    1529 Unrestricted nondeterminism is meaningless as there is no way to know when the result is completed without synchronization.
     1683Unrestricted nondeterminism is meaningless as there is no way to know when a result is completed and safe to access.
    15301684To produce meaningful execution requires clawing back some determinism using mutual exclusion and synchronization, where mutual exclusion provides access control for threads using shared data, and synchronization is a timing relationship among threads~\cite[\S~4]{Buhr05a}.
    1531 Some concurrent systems eliminate mutable shared-state by switching to stateless communication like message passing~\cite{Thoth,Harmony,V-Kernel,MPI} (Erlang, MPI), channels~\cite{CSP} (CSP,Go), actors~\cite{Akka} (Akka, Scala), or functional techniques (Haskell).
     1685The shared data protected by mutual exlusion is called a \newterm{critical section}~\cite{Dijkstra65}, and the protection can be simple (only 1 thread) or complex (only N kinds of threads, \eg group~\cite{Joung00} or readers/writer~\cite{Courtois71}).
     1686Without synchronization control in a critical section, an arriving thread can barge ahead of preexisting waiter threads resulting in short/long-term starvation, staleness/freshness problems, and/or incorrect transfer of data.
     1687Preventing or detecting barging is a challenge with low-level locks, but made easier through higher-level constructs.
     1688This challenge is often split into two different approaches: barging \emph{avoidance} and \emph{prevention}.
     1689Approaches that unconditionally releasing a lock for competing threads to acquire must use barging avoidance with flag/counter variable(s) to force barging threads to wait;
     1690approaches that conditionally hold locks during synchronization, \eg baton-passing~\cite{Andrews89}, prevent barging completely.
     1691
     1692At the lowest level, concurrent control is provided by atomic operations, upon which different kinds of locking mechanisms are constructed, \eg spin locks, semaphores~\cite{Dijkstra68b}, barriers, and path expressions~\cite{Campbell74}.
     1693However, for productivity it is always desirable to use the highest-level construct that provides the necessary efficiency~\cite{Hochstein05}.
     1694A significant challenge with locks is composability because it takes careful organization for multiple locks to be used while preventing deadlock.
     1695Easing composability is another feature higher-level mutual-exclusion mechanisms can offer.
     1696Some concurrent systems eliminate mutable shared-state by switching to non-shared communication like message passing~\cite{Thoth,Harmony,V-Kernel,MPI} (Erlang, MPI), channels~\cite{CSP} (CSP,Go), actors~\cite{Akka} (Akka, Scala), or functional techniques (Haskell).
    15321697However, these approaches introduce a new communication mechanism for concurrency different from the standard communication using function call/return.
    15331698Hence, a programmer must learn and manipulate two sets of design/programming patterns.
    15341699While this distinction can be hidden away in library code, effective use of the library still has to take both paradigms into account.
    1535 In contrast, approaches based on stateful models more closely resemble the standard call/return programming model, resulting in a single programming paradigm.
    1536 
    1537 At the lowest level, concurrent control is implemented by atomic operations, upon which different kinds of locking mechanisms are constructed, \eg semaphores~\cite{Dijkstra68b}, barriers, and path expressions~\cite{Campbell74}.
    1538 However, for productivity it is always desirable to use the highest-level construct that provides the necessary efficiency~\cite{Hochstein05}.
    1539 A newer approach for restricting non-determinism is transactional memory~\cite{Herlihy93}.
    1540 While this approach is pursued in hardware~\cite{Nakaike15} and system languages, like \CC~\cite{Cpp-Transactions}, the performance and feature set is still too restrictive to be the main concurrency paradigm for system languages, which is why it is rejected as the core paradigm for concurrency in \CFA.
    1541 
    1542 One of the most natural, elegant, and efficient mechanisms for mutual exclusion and synchronization for shared-memory systems is the \emph{monitor}.
    1543 First proposed by Brinch Hansen~\cite{Hansen73} and later described and extended by C.A.R.~Hoare~\cite{Hoare74}, many concurrent programming languages provide monitors as an explicit language construct: \eg Concurrent Pascal~\cite{ConcurrentPascal}, Mesa~\cite{Mesa}, Modula~\cite{Modula-2}, Turing~\cite{Turing:old}, Modula-3~\cite{Modula-3}, NeWS~\cite{NeWS}, Emerald~\cite{Emerald}, \uC~\cite{Buhr92a} and Java~\cite{Java}.
    1544 In addition, operating-system kernels and device drivers have a monitor-like structure, although they often use lower-level primitives such as mutex locks or semaphores to simulate monitors.
    1545 For these reasons, \CFA selected monitors as the core high-level concurrency construct, upon which higher-level approaches can be easily constructed.
    1546 
    1547 
    1548 \subsection{Mutual Exclusion}
    1549 
    1550 A group of instructions manipulating a specific instance of shared data that must be performed atomically is called a \newterm{critical section}~\cite{Dijkstra65}, which is enforced by \newterm{simple mutual-exclusion}.
    1551 The generalization is called a \newterm{group critical-section}~\cite{Joung00}, where multiple tasks with the same session use the resource simultaneously and different sessions are segregated, which is enforced by \newterm{complex mutual-exclusion} providing the correct kind and number of threads using a group critical-section.
    1552 The readers/writer problem~\cite{Courtois71} is an instance of a group critical-section, where readers share a session but writers have a unique session.
    1553 
    1554 However, many solutions exist for mutual exclusion, which vary in terms of performance, flexibility and ease of use.
    1555 Methods range from low-level locks, which are fast and flexible but require significant attention for correctness, to higher-level concurrency techniques, which sacrifice some performance to improve ease of use.
    1556 Ease of use comes by either guaranteeing some problems cannot occur, \eg deadlock free, or by offering a more explicit coupling between shared data and critical section.
    1557 For example, the \CC @std::atomic<T>@ offers an easy way to express mutual-exclusion on a restricted set of operations, \eg reading/writing, for numerical types.
    1558 However, a significant challenge with locks is composability because it takes careful organization for multiple locks to be used while preventing deadlock.
    1559 Easing composability is another feature higher-level mutual-exclusion mechanisms can offer.
    1560 
    1561 
    1562 \subsection{Synchronization}
    1563 
    1564 Synchronization enforces relative ordering of execution, and synchronization tools provide numerous mechanisms to establish these timing relationships.
    1565 Low-level synchronization primitives offer good performance and flexibility at the cost of ease of use;
    1566 higher-level mechanisms often simplify usage by adding better coupling between synchronization and data, \eg receive-specific versus receive-any thread in message passing or offering specialized solutions, \eg barrier lock.
    1567 Often synchronization is used to order access to a critical section, \eg ensuring a waiting writer thread enters the critical section before a calling reader thread.
    1568 If the calling reader is scheduled before the waiting writer, the reader has barged.
    1569 Barging can result in staleness/freshness problems, where a reader barges ahead of a writer and reads temporally stale data, or a writer barges ahead of another writer overwriting data with a fresh value preventing the previous value from ever being read (lost computation).
    1570 Preventing or detecting barging is an involved challenge with low-level locks, which is made easier through higher-level constructs.
    1571 This challenge is often split into two different approaches: barging avoidance and prevention.
    1572 Algorithms that unconditionally releasing a lock for competing threads to acquire use barging avoidance during synchronization to force a barging thread to wait;
    1573 algorithms that conditionally hold locks during synchronization, \eg baton-passing~\cite{Andrews89}, prevent barging completely.
     1700In contrast, approaches based on shared-state models more closely resemble the standard call/return programming model, resulting in a single programming paradigm.
     1701Finally, a newer approach for restricting non-determinism is transactional memory~\cite{Herlihy93}.
     1702While this approach is pursued in hardware~\cite{Nakaike15} and system languages, like \CC~\cite{Cpp-Transactions}, the performance and feature set is still too restrictive~\cite{Cascaval08,Boehm09} to be the main concurrency paradigm for system languages.
    15741703
    15751704
     
    15771706\label{s:Monitor}
    15781707
    1579 A \textbf{monitor} is a set of functions that ensure mutual exclusion when accessing shared state.
    1580 More precisely, a monitor is a programming technique that implicitly binds mutual exclusion to static function scope, as opposed to locks, where mutual-exclusion is defined by acquire/release calls, independent of lexical context (analogous to block and heap storage allocation).
     1708One of the most natural, elegant, efficient, high-level mechanisms for mutual exclusion and synchronization for shared-memory systems is the \emph{monitor} (Table~\ref{t:ExecutionPropertyComposition} case 2).
     1709First proposed by Brinch Hansen~\cite{Hansen73} and later described and extended by C.A.R.~Hoare~\cite{Hoare74}, many concurrent programming languages provide monitors as an explicit language construct: \eg Concurrent Pascal~\cite{ConcurrentPascal}, Mesa~\cite{Mesa}, Modula~\cite{Modula-2}, Turing~\cite{Turing:old}, Modula-3~\cite{Modula-3}, NeWS~\cite{NeWS}, Emerald~\cite{Emerald}, \uC~\cite{Buhr92a} and Java~\cite{Java}.
     1710In addition, operating-system kernels and device drivers have a monitor-like structure, although they often use lower-level primitives such as mutex locks or semaphores to manually implement a monitor.
     1711For these reasons, \CFA selected monitors as the core high-level concurrency construct, upon which higher-level approaches can be easily constructed.
     1712
     1713Specifically, a \textbf{monitor} is a set of functions that ensure mutual exclusion when accessing shared state.
     1714More precisely, a monitor is a programming technique that implicitly binds mutual exclusion to static function scope by call/return, as opposed to locks, where mutual-exclusion is defined by acquire/release calls, independent of lexical context (analogous to block and heap storage allocation).
    15811715Restricting acquire/release points eases programming, comprehension, and maintenance, at a slight cost in flexibility and efficiency.
    15821716\CFA uses a custom @monitor@ type and leverages declaration semantics (deallocation) to protect active or waiting threads in a monitor.
    15831717
    15841718The following is a \CFA monitor implementation of an atomic counter.
    1585 \begin{cfa}[morekeywords=nomutex]
     1719\begin{cfa}
    15861720`monitor` Aint { int cnt; }; $\C[4.25in]{// atomic integer counter}$
    1587 int ++?( Aint & `mutex`$\(_{opt}\)$ this ) with( this ) { return ++cnt; } $\C{// increment}$
    1588 int ?=?( Aint & `mutex`$\(_{opt}\)$ lhs, int rhs ) with( lhs ) { cnt = rhs; } $\C{// conversions with int}\CRT$
    1589 int ?=?( int & lhs, Aint & `mutex`$\(_{opt}\)$ rhs ) with( rhs ) { lhs = cnt; }
    1590 \end{cfa}
    1591 % The @Aint@ constructor, @?{}@, uses the \lstinline[morekeywords=nomutex]@nomutex@ qualifier indicating mutual exclusion is unnecessary during construction because an object is inaccessible (private) until after it is initialized.
    1592 % (While a constructor may publish its address into a global variable, doing so generates a race-condition.)
    1593 The prefix increment operation, @++?@, is normally @mutex@, indicating mutual exclusion is necessary during function execution, to protect the incrementing from race conditions, unless there is an atomic increment instruction for the implementation type.
    1594 The assignment operators provide bidirectional conversion between an atomic and normal integer without accessing field @cnt@;
    1595 these operations only need @mutex@, if reading/writing the implementation type is not atomic.
    1596 The atomic counter is used without any explicit mutual-exclusion and provides thread-safe semantics, which is similar to the \CC template @std::atomic@.
     1721int ++?( Aint & `mutex` this ) with( this ) { return ++cnt; } $\C{// increment}$
     1722int ?=?( Aint & `mutex` lhs, int rhs ) with( lhs ) { cnt = rhs; } $\C{// conversions with int, mutex optional}\CRT$
     1723int ?=?( int & lhs, Aint & `mutex` rhs ) with( rhs ) { lhs = cnt; }
     1724\end{cfa}
     1725The operators use the parameter-only declaration type-qualifier @mutex@ to mark which parameters require locking during function execution to protect from race conditions.
     1726The assignment operators provide bidirectional conversion between an atomic and normal integer without accessing field @cnt@.
     1727(These operations only need @mutex@, if reading/writing the implementation type is not atomic.)
     1728The atomic counter is used without any explicit mutual-exclusion and provides thread-safe semantics.
    15971729\begin{cfa}
    15981730int i = 0, j = 0, k = 5;
     
    16021734i = x; j = y; k = z;
    16031735\end{cfa}
     1736Note, like other concurrent programming languages, \CFA has specializations for the basic types using atomic instructions for performance and a general trait similar to the \CC template @std::atomic@.
    16041737
    16051738\CFA monitors have \newterm{multi-acquire} semantics so the thread in the monitor may acquire it multiple times without deadlock, allowing recursion and calling other interface functions.
     1739\newpage
    16061740\begin{cfa}
    16071741monitor M { ... } m;
     
    16121746\end{cfa}
    16131747\CFA monitors also ensure the monitor lock is released regardless of how an acquiring function ends (normal or exceptional), and returning a shared variable is safe via copying before the lock is released.
    1614 Similar safety is offered by \emph{explicit} mechanisms like \CC RAII;
    1615 monitor \emph{implicit} safety ensures no programmer usage errors.
     1748Similar safety is offered by \emph{explicit} opt-in disciplines like \CC RAII versus the monitor \emph{implicit} language-enforced safety guarantee ensuring no programmer usage errors.
    16161749Furthermore, RAII mechanisms cannot handle complex synchronization within a monitor, where the monitor lock may not be released on function exit because it is passed to an unblocking thread;
    16171750RAII is purely a mutual-exclusion mechanism (see Section~\ref{s:Scheduling}).
     
    16391772\end{cquote}
    16401773The @dtype@ property prevents \emph{implicit} copy operations and the @is_monitor@ trait provides no \emph{explicit} copy operations, so monitors must be passed by reference (pointer).
    1641 % Copying a lock is insecure because it is possible to copy an open lock and then use the open copy when the original lock is closed to simultaneously access the shared data.
    1642 % Copying a monitor is secure because both the lock and shared data are copies, but copying the shared data is meaningless because it no longer represents a unique entity.
    16431774Similarly, the function definitions ensures there is a mechanism to get (read) the monitor descriptor from its handle, and a special destructor to prevent deallocation if a thread using the shared data.
    16441775The custom monitor type also inserts any locks needed to implement the mutual exclusion semantics.
     
    16521783For example, a monitor may be passed through multiple helper functions before it is necessary to acquire the monitor's mutual exclusion.
    16531784
    1654 The benefit of mandatory monitor qualifiers is self-documentation, but requiring both @mutex@ and \lstinline[morekeywords=nomutex]@nomutex@ for all monitor parameters is redundant.
    1655 Instead, the semantics has one qualifier as the default and the other required.
    1656 For example, make the safe @mutex@ qualifier the default because assuming \lstinline[morekeywords=nomutex]@nomutex@ may cause subtle errors.
    1657 Alternatively, make the unsafe \lstinline[morekeywords=nomutex]@nomutex@ qualifier the default because it is the \emph{normal} parameter semantics while @mutex@ parameters are rare.
    1658 Providing a default qualifier implies knowing whether a parameter is a monitor.
    1659 Since \CFA relies heavily on traits as an abstraction mechanism, types can coincidentally match the monitor trait but not be a monitor, similar to inheritance where a shape and playing card can both be drawable.
    1660 For this reason, \CFA requires programmers to identify the kind of parameter with the @mutex@ keyword and uses no keyword to mean \lstinline[morekeywords=nomutex]@nomutex@.
     1785\CFA requires programmers to identify the kind of parameter with the @mutex@ keyword and uses no keyword to mean \lstinline[morekeywords=nomutex]@nomutex@, because @mutex@ parameters are rare and no keyword is the \emph{normal} parameter semantics.
     1786Hence, @mutex@ parameters are documentation, at the function and its prototype, to both programmer and compiler, without other redundant keywords.
     1787Furthermore, \CFA relies heavily on traits as an abstraction mechanism, so the @mutex@ qualifier prevents coincidentally matching of a monitor trait with a type that is not a monitor, similar to coincidental inheritance where a shape and playing card can both be drawable.
    16611788
    16621789The next semantic decision is establishing which parameter \emph{types} may be qualified with @mutex@.
     
    16721799Function @f3@ has a multiple object matrix, and @f4@ a multiple object data structure.
    16731800While shown shortly, multiple object acquisition is possible, but the number of objects must be statically known.
    1674 Therefore, \CFA only acquires one monitor per parameter with at most one level of indirection, excluding pointers as it is impossible to statically determine the size.
     1801Therefore, \CFA only acquires one monitor per parameter with exactly one level of indirection, and exclude pointer types to unknown sized arrays.
    16751802
    16761803For object-oriented monitors, \eg Java, calling a mutex member \emph{implicitly} acquires mutual exclusion of the receiver object, @`rec`.foo(...)@.
     
    16791806While object-oriented monitors can be extended with a mutex qualifier for multiple-monitor members, no prior example of this feature could be found.}
    16801807called \newterm{bulk acquire}.
    1681 \CFA guarantees acquisition order is consistent across calls to @mutex@ functions using the same monitors as arguments, so acquiring multiple monitors is safe from deadlock.
     1808\CFA guarantees bulk acquisition order is consistent across calls to @mutex@ functions using the same monitors as arguments, so acquiring multiple monitors in a bulk acquire is safe from deadlock.
    16821809Figure~\ref{f:BankTransfer} shows a trivial solution to the bank transfer problem~\cite{BankTransfer}, where two resources must be locked simultaneously, using \CFA monitors with implicit locking and \CC with explicit locking.
    16831810A \CFA programmer only has to manage when to acquire mutual exclusion;
     
    16991826void transfer( BankAccount & `mutex` my,
    17001827        BankAccount & `mutex` your, int me2you ) {
    1701 
     1828        // bulk acquire
    17021829        deposit( my, -me2you ); // debit
    17031830        deposit( your, me2you ); // credit
     
    17291856void transfer( BankAccount & my,
    17301857                        BankAccount & your, int me2you ) {
    1731         `scoped_lock lock( my.m, your.m );`
     1858        `scoped_lock lock( my.m, your.m );` // bulk acquire
    17321859        deposit( my, -me2you ); // debit
    17331860        deposit( your, me2you ); // credit
     
    17571884\end{figure}
    17581885
    1759 Users can still force the acquiring order by using @mutex@/\lstinline[morekeywords=nomutex]@nomutex@.
     1886Users can still force the acquiring order by using or not using @mutex@.
    17601887\begin{cfa}
    17611888void foo( M & mutex m1, M & mutex m2 ); $\C{// acquire m1 and m2}$
    1762 void bar( M & mutex m1, M & /* nomutex */ m2 ) { $\C{// acquire m1}$
     1889void bar( M & mutex m1, M & m2 ) { $\C{// only acquire m1}$
    17631890        ... foo( m1, m2 ); ... $\C{// acquire m2}$
    17641891}
    1765 void baz( M & /* nomutex */ m1, M & mutex m2 ) { $\C{// acquire m2}$
     1892void baz( M & m1, M & mutex m2 ) { $\C{// only acquire m2}$
    17661893        ... foo( m1, m2 ); ... $\C{// acquire m1}$
    17671894}
     
    18061933% There are many aspects of scheduling in a concurrency system, all related to resource utilization by waiting threads, \ie which thread gets the resource next.
    18071934% Different forms of scheduling include access to processors by threads (see Section~\ref{s:RuntimeStructureCluster}), another is access to a shared resource by a lock or monitor.
    1808 This section discusses monitor scheduling for waiting threads eligible for entry, \ie which thread gets the shared resource next. (See Section~\ref{s:RuntimeStructureCluster} for scheduling threads on virtual processors.)
    1809 While monitor mutual-exclusion provides safe access to shared data, the monitor data may indicate that a thread accessing it cannot proceed, \eg a bounded buffer may be full/empty so produce/consumer threads must block.
    1810 Leaving the monitor and trying again (busy waiting) is impractical for high-level programming.
    1811 Monitors eliminate busy waiting by providing synchronization to schedule threads needing access to the shared data, where threads block versus spinning.
     1935This section discusses scheduling for waiting threads eligible for monitor entry, \ie which user thread gets the shared resource next. (See Section~\ref{s:RuntimeStructureCluster} for scheduling kernel threads on virtual processors.)
     1936While monitor mutual-exclusion provides safe access to its shared data, the data may indicate a thread cannot proceed, \eg a bounded buffer may be full/\-empty so produce/consumer threads must block.
     1937Leaving the monitor and retrying (busy waiting) is impractical for high-level programming.
     1938
     1939Monitors eliminate busy waiting by providing synchronization within the monitor critical-section to schedule threads needing access to the shared data, where threads block versus spin.
    18121940Synchronization is generally achieved with internal~\cite{Hoare74} or external~\cite[\S~2.9.2]{uC++} scheduling.
    1813 \newterm{Internal scheduling} is characterized by each thread entering the monitor and making an individual decision about proceeding or blocking, while \newterm{external scheduling} is characterized by an entering thread making a decision about proceeding for itself and on behalf of other threads attempting entry.
    1814 Finally, \CFA monitors do not allow calling threads to barge ahead of signalled threads, which simplifies synchronization among threads in the monitor and increases correctness.
    1815 If barging is allowed, synchronization between a signaller and signallee is difficult, often requiring additional flags and multiple unblock/block cycles.
    1816 In fact, signals-as-hints is completely opposite from that proposed by Hoare in the seminal paper on monitors~\cite[p.~550]{Hoare74}.
     1941\newterm{Internal} (largely) schedules threads located \emph{inside} the monitor and is accomplished using condition variables with signal and wait.
     1942\newterm{External} (largely) schedules threads located \emph{outside} the monitor and is accomplished with the @waitfor@ statement.
     1943Note, internal scheduling has a small amount of external scheduling and vice versus, so the naming denotes where the majority of the block threads reside (inside or outside) for scheduling.
     1944For complex scheduling, the approaches can be combined, so there can be an equal number of threads waiting inside and outside.
     1945
     1946\CFA monitors do not allow calling threads to barge ahead of signalled threads (via barging prevention), which simplifies synchronization among threads in the monitor and increases correctness.
     1947A direct consequence of this semantics is that unblocked waiting threads are not required to recheck the waiting condition, \ie waits are not in a starvation-prone busy-loop as required by the signals-as-hints style with barging.
     1948Preventing barging comes directly from Hoare's semantics in the seminal paper on monitors~\cite[p.~550]{Hoare74}.
    18171949% \begin{cquote}
    18181950% However, we decree that a signal operation be followed immediately by resumption of a waiting program, without possibility of an intervening procedure call from yet a third program.
    18191951% It is only in this way that a waiting program has an absolute guarantee that it can acquire the resource just released by the signalling program without any danger that a third program will interpose a monitor entry and seize the resource instead.~\cite[p.~550]{Hoare74}
    18201952% \end{cquote}
    1821 Furthermore, \CFA concurrency has no spurious wakeup~\cite[\S~9]{Buhr05a}, which eliminates an implicit form of self barging.
    1822 Hence, a \CFA @wait@ statement is not enclosed in a @while@ loop retesting a blocking predicate, which can cause thread starvation due to barging.
    1823 
    1824 Figure~\ref{f:MonitorScheduling} shows general internal/external scheduling (for the bounded-buffer example in Figure~\ref{f:InternalExternalScheduling}).
    1825 External calling threads block on the calling queue, if the monitor is occupied, otherwise they enter in FIFO order.
    1826 Internal threads block on condition queues via @wait@ and reenter from the condition in FIFO order.
    1827 Alternatively, internal threads block on urgent from the @signal_block@ or @waitfor@, and reenter implicitly when the monitor becomes empty, \ie, the thread in the monitor exits or waits.
    1828 
    1829 There are three signalling mechanisms to unblock waiting threads to enter the monitor.
    1830 Note, signalling cannot have the signaller and signalled thread in the monitor simultaneously because of the mutual exclusion, so either the signaller or signallee can proceed.
    1831 For internal scheduling, threads are unblocked from condition queues using @signal@, where the signallee is moved to urgent and the signaller continues (solid line).
    1832 Multiple signals move multiple signallees to urgent until the condition is empty.
    1833 When the signaller exits or waits, a thread blocked on urgent is processed before calling threads to prevent barging.
     1953Furthermore, \CFA concurrency has no spurious wakeup~\cite[\S~9]{Buhr05a}, which eliminates an implicit self barging.
     1954
     1955Monitor mutual-exclusion means signalling cannot have the signaller and signalled thread in the monitor simultaneously, so only the signaller or signallee can proceed.
     1956Figure~\ref{f:MonitorScheduling} shows internal/external scheduling for the bounded-buffer examples in Figure~\ref{f:GenericBoundedBuffer}.
     1957For internal scheduling in Figure~\ref{f:BBInt}, the @signal@ moves the signallee (front thread of the specified condition queue) to urgent and the signaller continues (solid line).
     1958Multiple signals move multiple signallees to urgent until the condition queue is empty.
     1959When the signaller exits or waits, a thread is implicitly unblocked from urgent (if available) before unblocking a calling thread to prevent barging.
    18341960(Java conceptually moves the signalled thread to the calling queue, and hence, allows barging.)
    1835 The alternative unblock is in the opposite order using @signal_block@, where the signaller is moved to urgent and the signallee continues (dashed line), and is implicitly unblocked from urgent when the signallee exits or waits.
    1836 
    1837 For external scheduling, the condition queues are not used;
    1838 instead threads are unblocked directly from the calling queue using @waitfor@ based on function names requesting mutual exclusion.
    1839 (The linear search through the calling queue to locate a particular call can be reduced to $O(1)$.)
    1840 The @waitfor@ has the same semantics as @signal_block@, where the signalled thread executes before the signallee, which waits on urgent.
    1841 Executing multiple @waitfor@s from different signalled functions causes the calling threads to move to urgent.
    1842 External scheduling requires urgent to be a stack, because the signaller expects to execute immediately after the specified monitor call has exited or waited.
    1843 Internal scheduling behaves the same for an urgent stack or queue, except for multiple signalling, where the threads unblock from urgent in reverse order from signalling.
    1844 If the restart order is important, multiple signalling by a signal thread can be transformed into daisy-chain signalling among threads, where each thread signals the next thread.
    1845 We tried both a stack for @waitfor@ and queue for signalling, but that resulted in complex semantics about which thread enters next.
    1846 Hence, \CFA uses a single urgent stack to correctly handle @waitfor@ and adequately support both forms of signalling.
     1961Signal is used when the signaller is providing the cooperation needed by the signallee (\eg creating an empty slot in a buffer for a producer) and the signaller immediately exits the monitor to run concurrently (consume the buffer element) and passes control of the monitor to the signalled thread, which can immediately take advantage of the state change.
     1962Specifically, the @wait@ function atomically blocks the calling thread and implicitly releases the monitor lock(s) for all monitors in the function's parameter list.
     1963Signalling is unconditional because signalling an empty condition queue does nothing.
     1964It is common to declare condition queues as monitor fields to prevent shared access, hence no locking is required for access as the queues are protected by the monitor lock.
     1965In \CFA, a condition queue can be created/stored independently.
    18471966
    18481967\begin{figure}
     
    18621981\end{figure}
    18631982
    1864 Figure~\ref{f:BBInt} shows a \CFA generic bounded-buffer with internal scheduling, where producers/consumers enter the monitor, detect the buffer is full/empty, and block on an appropriate condition variable, @full@/@empty@.
    1865 The @wait@ function atomically blocks the calling thread and implicitly releases the monitor lock(s) for all monitors in the function's parameter list.
    1866 The appropriate condition variable is signalled to unblock an opposite kind of thread after an element is inserted/removed from the buffer.
    1867 Signalling is unconditional, because signalling an empty condition variable does nothing.
    1868 It is common to declare condition variables as monitor fields to prevent shared access, hence no locking is required for access as the conditions are protected by the monitor lock.
    1869 In \CFA, a condition variable can be created/stored independently.
    1870 % To still prevent expensive locking on access, a condition variable is tied to a \emph{group} of monitors on first use, called \newterm{branding}, resulting in a low-cost boolean test to detect sharing from other monitors.
    1871 
    1872 % Signalling semantics cannot have the signaller and signalled thread in the monitor simultaneously, which means:
    1873 % \begin{enumerate}
    1874 % \item
    1875 % The signalling thread returns immediately and the signalled thread continues.
    1876 % \item
    1877 % The signalling thread continues and the signalled thread is marked for urgent unblocking at the next scheduling point (exit/wait).
    1878 % \item
    1879 % The signalling thread blocks but is marked for urgent unblocking at the next scheduling point and the signalled thread continues.
    1880 % \end{enumerate}
    1881 % The first approach is too restrictive, as it precludes solving a reasonable class of problems, \eg dating service (see Figure~\ref{f:DatingService}).
    1882 % \CFA supports the next two semantics as both are useful.
    1883 
    18841983\begin{figure}
    18851984\centering
     
    18931992                T elements[10];
    18941993        };
    1895         void ?{}( Buffer(T) & buffer ) with(buffer) {
     1994        void ?{}( Buffer(T) & buf ) with(buf) {
    18961995                front = back = count = 0;
    18971996        }
    1898         void insert( Buffer(T) & mutex buffer, T elem )
    1899                                 with(buffer) {
    1900                 if ( count == 10 ) `wait( empty )`;
    1901                 // insert elem into buffer
     1997
     1998        void insert(Buffer(T) & mutex buf, T elm) with(buf){
     1999                if ( count == 10 ) `wait( empty )`; // full ?
     2000                // insert elm into buf
    19022001                `signal( full )`;
    19032002        }
    1904         T remove( Buffer(T) & mutex buffer ) with(buffer) {
    1905                 if ( count == 0 ) `wait( full )`;
    1906                 // remove elem from buffer
     2003        T remove( Buffer(T) & mutex buf ) with(buf) {
     2004                if ( count == 0 ) `wait( full )`; // empty ?
     2005                // remove elm from buf
    19072006                `signal( empty )`;
    1908                 return elem;
     2007                return elm;
    19092008        }
    19102009}
    19112010\end{cfa}
    19122011\end{lrbox}
    1913 
    1914 % \newbox\myboxB
    1915 % \begin{lrbox}{\myboxB}
    1916 % \begin{cfa}[aboveskip=0pt,belowskip=0pt]
    1917 % forall( otype T ) { // distribute forall
    1918 %       monitor Buffer {
    1919 %
    1920 %               int front, back, count;
    1921 %               T elements[10];
    1922 %       };
    1923 %       void ?{}( Buffer(T) & buffer ) with(buffer) {
    1924 %               [front, back, count] = 0;
    1925 %       }
    1926 %       T remove( Buffer(T) & mutex buffer ); // forward
    1927 %       void insert( Buffer(T) & mutex buffer, T elem )
    1928 %                               with(buffer) {
    1929 %               if ( count == 10 ) `waitfor( remove, buffer )`;
    1930 %               // insert elem into buffer
    1931 %
    1932 %       }
    1933 %       T remove( Buffer(T) & mutex buffer ) with(buffer) {
    1934 %               if ( count == 0 ) `waitfor( insert, buffer )`;
    1935 %               // remove elem from buffer
    1936 %
    1937 %               return elem;
    1938 %       }
    1939 % }
    1940 % \end{cfa}
    1941 % \end{lrbox}
    19422012
    19432013\newbox\myboxB
    19442014\begin{lrbox}{\myboxB}
    19452015\begin{cfa}[aboveskip=0pt,belowskip=0pt]
     2016forall( otype T ) { // distribute forall
     2017        monitor Buffer {
     2018
     2019                int front, back, count;
     2020                T elements[10];
     2021        };
     2022        void ?{}( Buffer(T) & buf ) with(buf) {
     2023                front = back = count = 0;
     2024        }
     2025        T remove( Buffer(T) & mutex buf ); // forward
     2026        void insert(Buffer(T) & mutex buf, T elm) with(buf){
     2027                if ( count == 10 ) `waitfor( remove : buf )`;
     2028                // insert elm into buf
     2029
     2030        }
     2031        T remove( Buffer(T) & mutex buf ) with(buf) {
     2032                if ( count == 0 ) `waitfor( insert : buf )`;
     2033                // remove elm from buf
     2034
     2035                return elm;
     2036        }
     2037}
     2038\end{cfa}
     2039\end{lrbox}
     2040
     2041\subfloat[Internal scheduling]{\label{f:BBInt}\usebox\myboxA}
     2042\hspace{1pt}
     2043\vrule
     2044\hspace{3pt}
     2045\subfloat[External scheduling]{\label{f:BBExt}\usebox\myboxB}
     2046
     2047\caption{Generic bounded buffer}
     2048\label{f:GenericBoundedBuffer}
     2049\end{figure}
     2050
     2051The @signal_block@ provides the opposite unblocking order, where the signaller is moved to urgent and the signallee continues and a thread is implicitly unblocked from urgent when the signallee exits or waits (dashed line).
     2052Signal block is used when the signallee is providing the cooperation needed by the signaller (\eg if the buffer is removed and a producer hands off an item to a consumer, as in Figure~\ref{f:DatingSignalBlock}) so the signaller must wait until the signallee unblocks, provides the cooperation, exits the monitor to run concurrently, and passes control of the monitor to the signaller, which can immediately take advantage of the state change.
     2053Using @signal@ or @signal_block@ can be a dynamic decision based on whether the thread providing the cooperation arrives before or after the thread needing the cooperation.
     2054
     2055External scheduling in Figure~\ref{f:BBExt} simplifies internal scheduling by eliminating condition queues and @signal@/@wait@ (cases where it cannot are discussed shortly), and has existed in the programming language Ada for almost 40 years with variants in other languages~\cite{SR,ConcurrentC++,uC++}.
     2056While prior languages use external scheduling solely for thread interaction, \CFA generalizes it to both monitors and threads.
     2057External scheduling allows waiting for events from other threads while restricting unrelated events, that would otherwise have to wait on condition queues in the monitor.
     2058Scheduling is controlled by the @waitfor@ statement, which atomically blocks the calling thread, releases the monitor lock, and restricts the function calls that can next acquire mutual exclusion.
     2059Specifically, a thread calling the monitor is unblocked directly from the calling queue based on function names that can fulfill the cooperation required by the signaller.
     2060(The linear search through the calling queue to locate a particular call can be reduced to $O(1)$.)
     2061Hence, the @waitfor@ has the same semantics as @signal_block@, where the signallee thread from the calling queue executes before the signaller, which waits on urgent.
     2062Now when a producer/consumer detects a full/empty buffer, the necessary cooperation for continuation is specified by indicating the next function call that can occur.
     2063For example, a producer detecting a full buffer must have cooperation from a consumer to remove an item so function @remove@ is accepted, which prevents producers from entering the monitor, and after a consumer calls @remove@, the producer waiting on urgent is \emph{implicitly} unblocked because it can now continue its insert operation.
     2064Hence, this mechanism is done in terms of control flow, next call, versus in terms of data, channels, as in Go/Rust @select@.
     2065While both mechanisms have strengths and weaknesses, \CFA uses the control-flow mechanism to be consistent with other language features.
     2066
     2067Figure~\ref{f:ReadersWriterLock} shows internal/external scheduling for a readers/writer lock with no barging and threads are serviced in FIFO order to eliminate staleness/freshness among the reader/writer threads.
     2068For internal scheduling in Figure~\ref{f:RWInt}, the readers and writers wait on the same condition queue in FIFO order, making it impossible to tell if a waiting thread is a reader or writer.
     2069To clawback the kind of thread, a \CFA condition can store user data in the node for a blocking thread at the @wait@, \ie whether the thread is a @READER@ or @WRITER@.
     2070An unblocked reader thread checks if the thread at the front of the queue is a reader and unblock it, \ie the readers daisy-chain signal the next group of readers demarcated by the next writer or end of the queue.
     2071For external scheduling in Figure~\ref{f:RWExt}, a waiting reader checks if a writer is using the resource, and if so, restricts further calls until the writer exits by calling @EndWrite@.
     2072The writer does a similar action for each reader or writer using the resource.
     2073Note, no new calls to @StartRead@/@StartWrite@ may occur when waiting for the call to @EndRead@/@EndWrite@.
     2074
     2075\begin{figure}
     2076\centering
     2077\newbox\myboxA
     2078\begin{lrbox}{\myboxA}
     2079\begin{cfa}[aboveskip=0pt,belowskip=0pt]
     2080enum RW { READER, WRITER };
    19462081monitor ReadersWriter {
    1947         int rcnt, wcnt; // readers/writer using resource
     2082        int rcnt, wcnt; // readers/writer using resource
     2083        `condition RWers;`
    19482084};
    19492085void ?{}( ReadersWriter & rw ) with(rw) {
     
    19522088void EndRead( ReadersWriter & mutex rw ) with(rw) {
    19532089        rcnt -= 1;
     2090        if ( rcnt == 0 ) `signal( RWers )`;
    19542091}
    19552092void EndWrite( ReadersWriter & mutex rw ) with(rw) {
    19562093        wcnt = 0;
     2094        `signal( RWers );`
    19572095}
    19582096void StartRead( ReadersWriter & mutex rw ) with(rw) {
    1959         if ( wcnt > 0 ) `waitfor( EndWrite, rw );`
     2097        if ( wcnt !=0 || ! empty( RWers ) )
     2098                `wait( RWers, READER )`;
    19602099        rcnt += 1;
     2100        if ( ! empty(RWers) && `front(RWers) == READER` )
     2101                `signal( RWers )`;  // daisy-chain signalling
    19612102}
    19622103void StartWrite( ReadersWriter & mutex rw ) with(rw) {
    1963         if ( wcnt > 0 ) `waitfor( EndWrite, rw );`
    1964         else while ( rcnt > 0 ) `waitfor( EndRead, rw );`
     2104        if ( wcnt != 0 || rcnt != 0 ) `wait( RWers, WRITER )`;
     2105
    19652106        wcnt = 1;
    19662107}
    1967 
    19682108\end{cfa}
    19692109\end{lrbox}
    19702110
    1971 \subfloat[Generic bounded buffer, internal scheduling]{\label{f:BBInt}\usebox\myboxA}
    1972 \hspace{3pt}
     2111\newbox\myboxB
     2112\begin{lrbox}{\myboxB}
     2113\begin{cfa}[aboveskip=0pt,belowskip=0pt]
     2114
     2115monitor ReadersWriter {
     2116        int rcnt, wcnt; // readers/writer using resource
     2117
     2118};
     2119void ?{}( ReadersWriter & rw ) with(rw) {
     2120        rcnt = wcnt = 0;
     2121}
     2122void EndRead( ReadersWriter & mutex rw ) with(rw) {
     2123        rcnt -= 1;
     2124
     2125}
     2126void EndWrite( ReadersWriter & mutex rw ) with(rw) {
     2127        wcnt = 0;
     2128
     2129}
     2130void StartRead( ReadersWriter & mutex rw ) with(rw) {
     2131        if ( wcnt > 0 ) `waitfor( EndWrite : rw );`
     2132
     2133        rcnt += 1;
     2134
     2135
     2136}
     2137void StartWrite( ReadersWriter & mutex rw ) with(rw) {
     2138        if ( wcnt > 0 ) `waitfor( EndWrite : rw );`
     2139        else while ( rcnt > 0 ) `waitfor( EndRead : rw );`
     2140        wcnt = 1;
     2141}
     2142\end{cfa}
     2143\end{lrbox}
     2144
     2145\subfloat[Internal scheduling]{\label{f:RWInt}\usebox\myboxA}
     2146\hspace{1pt}
    19732147\vrule
    19742148\hspace{3pt}
    1975 \subfloat[Readers / writer lock, external scheduling]{\label{f:RWExt}\usebox\myboxB}
    1976 
    1977 \caption{Internal / external scheduling}
    1978 \label{f:InternalExternalScheduling}
     2149\subfloat[External scheduling]{\label{f:RWExt}\usebox\myboxB}
     2150
     2151\caption{Readers / writer lock}
     2152\label{f:ReadersWriterLock}
    19792153\end{figure}
    19802154
    1981 Figure~\ref{f:BBInt} can be transformed into external scheduling by removing the condition variables and signals/waits, and adding the following lines at the locations of the current @wait@s in @insert@/@remove@, respectively.
    1982 \begin{cfa}[aboveskip=2pt,belowskip=1pt]
    1983 if ( count == 10 ) `waitfor( remove, buffer )`;       |      if ( count == 0 ) `waitfor( insert, buffer )`;
    1984 \end{cfa}
    1985 Here, the producers/consumers detects a full/\-empty buffer and prevents more producers/consumers from entering the monitor until there is a free/empty slot in the buffer.
    1986 External scheduling is controlled by the @waitfor@ statement, which atomically blocks the calling thread, releases the monitor lock, and restricts the function calls that can next acquire mutual exclusion.
    1987 If the buffer is full, only calls to @remove@ can acquire the buffer, and if the buffer is empty, only calls to @insert@ can acquire the buffer.
    1988 Threads calling excluded functions block outside of (external to) the monitor on the calling queue, versus blocking on condition queues inside of (internal to) the monitor.
    1989 Figure~\ref{f:RWExt} shows a readers/writer lock written using external scheduling, where a waiting reader detects a writer using the resource and restricts further calls until the writer exits by calling @EndWrite@.
    1990 The writer does a similar action for each reader or writer using the resource.
    1991 Note, no new calls to @StarRead@/@StartWrite@ may occur when waiting for the call to @EndRead@/@EndWrite@.
    1992 External scheduling allows waiting for events from other threads while restricting unrelated events, that would otherwise have to wait on conditions in the monitor.
    1993 The mechnaism can be done in terms of control flow, \eg Ada @accept@ or \uC @_Accept@, or in terms of data, \eg Go @select@ on channels.
    1994 While both mechanisms have strengths and weaknesses, this project uses the control-flow mechanism to be consistent with other language features.
    1995 % Two challenges specific to \CFA for external scheduling are loose object-definitions (see Section~\ref{s:LooseObjectDefinitions}) and multiple-monitor functions (see Section~\ref{s:Multi-MonitorScheduling}).
    1996 
    1997 Figure~\ref{f:DatingService} shows a dating service demonstrating non-blocking and blocking signalling.
    1998 The dating service matches girl and boy threads with matching compatibility codes so they can exchange phone numbers.
    1999 A thread blocks until an appropriate partner arrives.
    2000 The complexity is exchanging phone numbers in the monitor because of the mutual-exclusion property.
    2001 For signal scheduling, the @exchange@ condition is necessary to block the thread finding the match, while the matcher unblocks to take the opposite number, post its phone number, and unblock the partner.
    2002 For signal-block scheduling, the implicit urgent-queue replaces the explict @exchange@-condition and @signal_block@ puts the finding thread on the urgent condition and unblocks the matcher.
    2003 The dating service is an example of a monitor that cannot be written using external scheduling because it requires knowledge of calling parameters to make scheduling decisions, and parameters of waiting threads are unavailable;
    2004 as well, an arriving thread may not find a partner and must wait, which requires a condition variable, and condition variables imply internal scheduling.
    2005 Furthermore, barging corrupts the dating service during an exchange because a barger may also match and change the phone numbers, invalidating the previous exchange phone number.
    2006 Putting loops around the @wait@s does not correct the problem;
    2007 the simple solution must be restructured to account for barging.
     2155Finally, external scheduling requires urgent to be a stack, because the signaller expects to execute immediately after the specified monitor call has exited or waited.
     2156Internal schedulling performing multiple signalling results in unblocking from urgent in the reverse order from signalling.
     2157It is rare for the unblocking order to be important as an unblocked thread can be time-sliced immediately after leaving the monitor.
     2158If the unblocking order is important, multiple signalling can be restructured into daisy-chain signalling, where each thread signals the next thread.
     2159Hence, \CFA uses a single urgent stack to correctly handle @waitfor@ and adequately support both forms of signalling.
     2160(Advanced @waitfor@ features are discussed in Section~\ref{s:ExtendedWaitfor}.)
    20082161
    20092162\begin{figure}
     
    20192172};
    20202173int girl( DS & mutex ds, int phNo, int ccode ) {
    2021         if ( is_empty( Boys[ccode] ) ) {
     2174        if ( empty( Boys[ccode] ) ) {
    20222175                wait( Girls[ccode] );
    20232176                GirlPhNo = phNo;
     
    20462199};
    20472200int girl( DS & mutex ds, int phNo, int ccode ) {
    2048         if ( is_empty( Boys[ccode] ) ) { // no compatible
     2201        if ( empty( Boys[ccode] ) ) { // no compatible
    20492202                wait( Girls[ccode] ); // wait for boy
    20502203                GirlPhNo = phNo; // make phone number available
     
    20662219\qquad
    20672220\subfloat[\lstinline@signal_block@]{\label{f:DatingSignalBlock}\usebox\myboxB}
    2068 \caption{Dating service}
    2069 \label{f:DatingService}
     2221\caption{Dating service Monitor}
     2222\label{f:DatingServiceMonitor}
    20702223\end{figure}
    20712224
    2072 In summation, for internal scheduling, non-blocking signalling (as in the producer/consumer example) is used when the signaller is providing the cooperation for a waiting thread;
    2073 the signaller enters the monitor and changes state, detects a waiting threads that can use the state, performs a non-blocking signal on the condition queue for the waiting thread, and exits the monitor to run concurrently.
    2074 The waiter unblocks next from the urgent queue, uses/takes the state, and exits the monitor.
    2075 Blocking signal is the reverse, where the waiter is providing the cooperation for the signalling thread;
    2076 the signaller enters the monitor, detects a waiting thread providing the necessary state, performs a blocking signal to place it on the urgent queue and unblock the waiter.
    2077 The waiter changes state and exits the monitor, and the signaller unblocks next from the urgent queue to use/take the state.
     2225Figure~\ref{f:DatingServiceMonitor} shows a dating service demonstrating non-blocking and blocking signalling.
     2226The dating service matches girl and boy threads with matching compatibility codes so they can exchange phone numbers.
     2227A thread blocks until an appropriate partner arrives.
     2228The complexity is exchanging phone numbers in the monitor because of the mutual-exclusion property.
     2229For signal scheduling, the @exchange@ condition is necessary to block the thread finding the match, while the matcher unblocks to take the opposite number, post its phone number, and unblock the partner.
     2230For signal-block scheduling, the implicit urgent-queue replaces the explicit @exchange@-condition and @signal_block@ puts the finding thread on the urgent stack and unblocks the matcher.
     2231
     2232The dating service is an important example of a monitor that cannot be written using external scheduling.
     2233First, because scheduling requires knowledge of calling parameters to make matching decisions, and parameters of calling threads are unavailable within the monitor.
     2234For example, a girl thread within the monitor cannot examine the @ccode@ of boy threads waiting on the calling queue to determine if there is a matching partner.
     2235Second, because a scheduling decision may be delayed when there is no immediate match, which requires a condition queue for waiting, and condition queues imply internal scheduling.
     2236For example, if a girl thread could determine there is no calling boy with the same @ccode@, it must wait until a matching boy arrives.
     2237Finally, barging corrupts the dating service during an exchange because a barger may also match and change the phone numbers, invalidating the previous exchange phone number.
     2238This situation shows rechecking the waiting condition and waiting again (signals-as-hints) fails, requiring significant restructured to account for barging.
    20782239
    20792240Both internal and external scheduling extend to multiple monitors in a natural way.
    20802241\begin{cquote}
    2081 \begin{tabular}{@{}l@{\hspace{3\parindentlnth}}l@{}}
     2242\begin{tabular}{@{}l@{\hspace{2\parindentlnth}}l@{}}
    20822243\begin{cfa}
    20832244monitor M { `condition e`; ... };
     
    20902251&
    20912252\begin{cfa}
    2092 void rtn$\(_1\)$( M & mutex m1, M & mutex m2 );
     2253void rtn$\(_1\)$( M & mutex m1, M & mutex m2 ); // overload rtn
    20932254void rtn$\(_2\)$( M & mutex m1 );
    20942255void bar( M & mutex m1, M & mutex m2 ) {
    2095         ... waitfor( `rtn` ); ...       // $\LstCommentStyle{waitfor( rtn\(_1\), m1, m2 )}$
    2096         ... waitfor( `rtn, m1` ); ... // $\LstCommentStyle{waitfor( rtn\(_2\), m1 )}$
     2256        ... waitfor( `rtn`${\color{red}\(_1\)}$ ); ...       // $\LstCommentStyle{waitfor( rtn\(_1\) : m1, m2 )}$
     2257        ... waitfor( `rtn${\color{red}\(_2\)}$ : m1` ); ...
    20972258}
    20982259\end{cfa}
     
    21012262For @wait( e )@, the default semantics is to atomically block the signaller and release all acquired mutex parameters, \ie @wait( e, m1, m2 )@.
    21022263To override the implicit multi-monitor wait, specific mutex parameter(s) can be specified, \eg @wait( e, m1 )@.
    2103 Wait cannot statically verifies the released monitors are the acquired mutex-parameters without disallowing separately compiled helper functions calling @wait@.
    2104 While \CC supports bulk locking, @wait@ only accepts a single lock for a condition variable, so bulk locking with condition variables is asymmetric.
     2264Wait cannot statically verify the released monitors are the acquired mutex-parameters without disallowing separately compiled helper functions calling @wait@.
     2265While \CC supports bulk locking, @wait@ only accepts a single lock for a condition queue, so bulk locking with condition queues is asymmetric.
    21052266Finally, a signaller,
    21062267\begin{cfa}
     
    21112272must have acquired at least the same locks as the waiting thread signalled from a condition queue to allow the locks to be passed, and hence, prevent barging.
    21122273
    2113 Similarly, for @waitfor( rtn )@, the default semantics is to atomically block the acceptor and release all acquired mutex parameters, \ie @waitfor( rtn, m1, m2 )@.
    2114 To override the implicit multi-monitor wait, specific mutex parameter(s) can be specified, \eg @waitfor( rtn, m1 )@.
     2274Similarly, for @waitfor( rtn )@, the default semantics is to atomically block the acceptor and release all acquired mutex parameters, \ie @waitfor( rtn : m1, m2 )@.
     2275To override the implicit multi-monitor wait, specific mutex parameter(s) can be specified, \eg @waitfor( rtn : m1 )@.
    21152276@waitfor@ does statically verify the monitor types passed are the same as the acquired mutex-parameters of the given function or function pointer, hence the function (pointer) prototype must be accessible.
    21162277% When an overloaded function appears in an @waitfor@ statement, calls to any function with that name are accepted.
     
    21202281void rtn( M & mutex m );
    21212282`int` rtn( M & mutex m );
    2122 waitfor( (`int` (*)( M & mutex ))rtn, m );
    2123 \end{cfa}
    2124 
    2125 The ability to release a subset of acquired monitors can result in a \newterm{nested monitor}~\cite{Lister77} deadlock.
     2283waitfor( (`int` (*)( M & mutex ))rtn : m );
     2284\end{cfa}
     2285
     2286The ability to release a subset of acquired monitors can result in a \newterm{nested monitor}~\cite{Lister77} deadlock (see Section~\ref{s:MutexAcquisition}).
     2287\newpage
    21262288\begin{cfa}
    21272289void foo( M & mutex m1, M & mutex m2 ) {
    2128         ... wait( `e, m1` ); ...                                $\C{// release m1, keeping m2 acquired )}$
    2129 void bar( M & mutex m1, M & mutex m2 ) {        $\C{// must acquire m1 and m2 )}$
     2290        ... wait( `e, m1` ); ...                                $\C{// release m1, keeping m2 acquired}$
     2291void bar( M & mutex m1, M & mutex m2 ) {        $\C{// must acquire m1 and m2}$
    21302292        ... signal( `e` ); ...
    21312293\end{cfa}
    21322294The @wait@ only releases @m1@ so the signalling thread cannot acquire @m1@ and @m2@ to enter @bar@ and @signal@ the condition.
    2133 While deadlock can occur with multiple/nesting acquisition, this is a consequence of locks, and by extension monitors, not being perfectly composable.
    2134 
     2295While deadlock can occur with multiple/nesting acquisition, this is a consequence of locks, and by extension monitor locking is not perfectly composable.
    21352296
    21362297
    21372298\subsection{\texorpdfstring{Extended \protect\lstinline@waitfor@}{Extended waitfor}}
     2299\label{s:ExtendedWaitfor}
    21382300
    21392301Figure~\ref{f:ExtendedWaitfor} shows the extended form of the @waitfor@ statement to conditionally accept one of a group of mutex functions, with an optional statement to be performed \emph{after} the mutex function finishes.
     
    21462308Hence, the terminating @else@ clause allows a conditional attempt to accept a call without blocking.
    21472309If both @timeout@ and @else@ clause are present, the @else@ must be conditional, or the @timeout@ is never triggered.
    2148 There is also a traditional future wait queue (not shown) (\eg Microsoft (@WaitForMultipleObjects@)), to wait for a specified number of future elements in the queue.
     2310There is also a traditional future wait queue (not shown) (\eg Microsoft @WaitForMultipleObjects@), to wait for a specified number of future elements in the queue.
     2311Finally, there is a shorthand for specifying multiple functions using the same set of monitors: @waitfor( f, g, h : m1, m2, m3 )@.
    21492312
    21502313\begin{figure}
     
    21732336The right example accepts either @mem1@ or @mem2@ if @C1@ and @C2@ are true.
    21742337
    2175 An interesting use of @waitfor@ is accepting the @mutex@ destructor to know when an object is deallocated, \eg assume the bounded buffer is restructred from a monitor to a thread with the following @main@.
     2338An interesting use of @waitfor@ is accepting the @mutex@ destructor to know when an object is deallocated, \eg assume the bounded buffer is restructured from a monitor to a thread with the following @main@.
    21762339\begin{cfa}
    21772340void main( Buffer(T) & buffer ) with(buffer) {
    21782341        for () {
    2179                 `waitfor( ^?{}, buffer )` break;
    2180                 or when ( count != 20 ) waitfor( insert, buffer ) { ... }
    2181                 or when ( count != 0 ) waitfor( remove, buffer ) { ... }
     2342                `waitfor( ^?{} : buffer )` break;
     2343                or when ( count != 20 ) waitfor( insert : buffer ) { ... }
     2344                or when ( count != 0 ) waitfor( remove : buffer ) { ... }
    21822345        }
    21832346        // clean up
     
    22712434To support this efficient semantics (and prevent barging), the implementation maintains a list of monitors acquired for each blocked thread.
    22722435When a signaller exits or waits in a monitor function/statement, the front waiter on urgent is unblocked if all its monitors are released.
    2273 Implementing a fast subset check for the necessary released monitors is important.
     2436Implementing a fast subset check for the necessary released monitors is important and discussed in the following sections.
    22742437% The benefit is encapsulating complexity into only two actions: passing monitors to the next owner when they should be released and conditionally waking threads if all conditions are met.
    22752438
    22762439
    2277 \subsection{Loose Object Definitions}
    2278 \label{s:LooseObjectDefinitions}
    2279 
    2280 In an object-oriented programming language, a class includes an exhaustive list of operations.
    2281 A new class can add members via static inheritance but the subclass still has an exhaustive list of operations.
    2282 (Dynamic member adding, \eg JavaScript~\cite{JavaScript}, is not considered.)
    2283 In the object-oriented scenario, the type and all its operators are always present at compilation (even separate compilation), so it is possible to number the operations in a bit mask and use an $O(1)$ compare with a similar bit mask created for the operations specified in a @waitfor@.
    2284 
    2285 However, in \CFA, monitor functions can be statically added/removed in translation units, making a fast subset check difficult.
    2286 \begin{cfa}
    2287         monitor M { ... }; // common type, included in .h file
    2288 translation unit 1
    2289         void `f`( M & mutex m );
    2290         void g( M & mutex m ) { waitfor( `f`, m ); }
    2291 translation unit 2
    2292         void `f`( M & mutex m ); $\C{// replacing f and g for type M in this translation unit}$
    2293         void `g`( M & mutex m );
    2294         void h( M & mutex m ) { waitfor( `f`, m ) or waitfor( `g`, m ); } $\C{// extending type M in this translation unit}$
    2295 \end{cfa}
    2296 The @waitfor@ statements in each translation unit cannot form a unique bit-mask because the monitor type does not carry that information.
     2440\subsection{\texorpdfstring{\protect\lstinline@waitfor@ Implementation}{waitfor Implementation}}
     2441\label{s:waitforImplementation}
     2442
     2443In a statically-typed object-oriented programming language, a class has an exhaustive list of members, even when members are added via static inheritance (see Figure~\ref{f:uCinheritance}).
     2444Knowing all members at compilation (even separate compilation) allows uniquely numbered them so the accept-statement implementation can use a fast/compact bit mask with $O(1)$ compare.
     2445
     2446\begin{figure}
     2447\centering
     2448\begin{lrbox}{\myboxA}
     2449\begin{uC++}[aboveskip=0pt,belowskip=0pt]
     2450$\emph{translation unit 1}$
     2451_Monitor B { // common type in .h file
     2452        _Mutex virtual void `f`( ... );
     2453        _Mutex virtual void `g`( ... );
     2454        _Mutex virtual void w1( ... ) { ... _Accept(`f`, `g`); ... }
     2455};
     2456$\emph{translation unit 2}$
     2457// include B
     2458_Monitor D : public B { // inherit
     2459        _Mutex void `h`( ... ); // add
     2460        _Mutex void w2( ... ) { ... _Accept(`f`, `h`); ... }
     2461};
     2462\end{uC++}
     2463\end{lrbox}
     2464
     2465\begin{lrbox}{\myboxB}
     2466\begin{cfa}[aboveskip=0pt,belowskip=0pt]
     2467$\emph{translation unit 1}$
     2468monitor M { ... }; // common type in .h file
     2469void `f`( M & mutex m, ... );
     2470void `g`( M & mutex m, ... );
     2471void w1( M & mutex m, ... ) { ... waitfor(`f`, `g` : m); ... }
     2472
     2473$\emph{translation unit 2}$
     2474// include M
     2475extern void `f`( M & mutex m, ... ); // import f but not g
     2476void `h`( M & mutex m ); // add
     2477void w2( M & mutex m, ... ) { ... waitfor(`f`, `h` : m); ... }
     2478
     2479\end{cfa}
     2480\end{lrbox}
     2481
     2482\subfloat[\uC]{\label{f:uCinheritance}\usebox\myboxA}
     2483\hspace{3pt}
     2484\vrule
     2485\hspace{3pt}
     2486\subfloat[\CFA]{\label{f:CFinheritance}\usebox\myboxB}
     2487\caption{Member / Function visibility}
     2488\label{f:MemberFunctionVisibility}
     2489\end{figure}
     2490
     2491However, the @waitfor@ statement in translation unit 2 (see Figure~\ref{f:CFinheritance}) cannot see function @g@ in translation unit 1 precluding a unique numbering for a bit-mask because the monitor type only carries the protected shared-data.
     2492(A possible way to construct a dense mapping is at link or load-time.)
    22972493Hence, function pointers are used to identify the functions listed in the @waitfor@ statement, stored in a variable-sized array.
    2298 Then, the same implementation approach used for the urgent stack is used for the calling queue.
    2299 Each caller has a list of monitors acquired, and the @waitfor@ statement performs a (usually short) linear search matching functions in the @waitfor@ list with called functions, and then verifying the associated mutex locks can be transfers.
    2300 (A possible way to construct a dense mapping is at link or load-time.)
     2494Then, the same implementation approach used for the urgent stack (see Section~\ref{s:Scheduling}) is used for the calling queue.
     2495Each caller has a list of monitors acquired, and the @waitfor@ statement performs a (short) linear search matching functions in the @waitfor@ list with called functions, and then verifying the associated mutex locks can be transfers.
    23012496
    23022497
     
    23132508The solution is for the programmer to disambiguate:
    23142509\begin{cfa}
    2315 waitfor( f, `m2` ); $\C{// wait for call to f with argument m2}$
     2510waitfor( f : `m2` ); $\C{// wait for call to f with argument m2}$
    23162511\end{cfa}
    23172512Both locks are acquired by function @g@, so when function @f@ is called, the lock for monitor @m2@ is passed from @g@ to @f@, while @g@ still holds lock @m1@.
     
    23202515monitor M { ... };
    23212516void f( M & mutex m1, M & mutex m2 );
    2322 void g( M & mutex m1, M & mutex m2 ) { waitfor( f, `m1, m2` ); $\C{// wait for call to f with arguments m1 and m2}$
     2517void g( M & mutex m1, M & mutex m2 ) { waitfor( f : `m1, m2` ); $\C{// wait for call to f with arguments m1 and m2}$
    23232518\end{cfa}
    23242519Again, the set of monitors passed to the @waitfor@ statement must be entirely contained in the set of monitors already acquired by the accepting function.
    2325 Also, the order of the monitors in a @waitfor@ statement is unimportant.
    2326 
    2327 Figure~\ref{f:UnmatchedMutexSets} shows an example where, for internal and external scheduling with multiple monitors, a signalling or accepting thread must match exactly, \ie partial matching results in waiting.
    2328 For both examples, the set of monitors is disjoint so unblocking is impossible.
     2520% Also, the order of the monitors in a @waitfor@ statement must match the order of the mutex parameters.
     2521
     2522Figure~\ref{f:UnmatchedMutexSets} shows internal and external scheduling with multiple monitors that must match exactly with a signalling or accepting thread, \ie partial matching results in waiting.
     2523In both cases, the set of monitors is disjoint so unblocking is impossible.
    23292524
    23302525\begin{figure}
     
    23552550}
    23562551void g( M1 & mutex m1, M2 & mutex m2 ) {
    2357         waitfor( f, m1, m2 );
     2552        waitfor( f : m1, m2 );
    23582553}
    23592554g( `m11`, m2 ); // block on accept
     
    23702565\end{figure}
    23712566
    2372 
    2373 \subsection{\texorpdfstring{\protect\lstinline@mutex@ Threads}{mutex Threads}}
    2374 
    2375 Threads in \CFA can also be monitors to allow \emph{direct communication} among threads, \ie threads can have mutex functions that are called by other threads.
    2376 Hence, all monitor features are available when using threads.
    2377 Figure~\ref{f:DirectCommunication} shows a comparison of direct call communication in \CFA with direct channel communication in Go.
    2378 (Ada provides a similar mechanism to the \CFA direct communication.)
    2379 The program main in both programs communicates directly with the other thread versus indirect communication where two threads interact through a passive monitor.
    2380 Both direct and indirection thread communication are valuable tools in structuring concurrent programs.
    2381 
    23822567\begin{figure}
    23832568\centering
     
    23862571
    23872572struct Msg { int i, j; };
    2388 thread GoRtn { int i;  float f;  Msg m; };
     2573monitor thread GoRtn { int i;  float f;  Msg m; };
    23892574void mem1( GoRtn & mutex gortn, int i ) { gortn.i = i; }
    23902575void mem2( GoRtn & mutex gortn, float f ) { gortn.f = f; }
     
    23962581        for () {
    23972582
    2398                 `waitfor( mem1, gortn )` sout | i;  // wait for calls
    2399                 or `waitfor( mem2, gortn )` sout | f;
    2400                 or `waitfor( mem3, gortn )` sout | m.i | m.j;
    2401                 or `waitfor( ^?{}, gortn )` break;
     2583                `waitfor( mem1 : gortn )` sout | i;  // wait for calls
     2584                or `waitfor( mem2 : gortn )` sout | f;
     2585                or `waitfor( mem3 : gortn )` sout | m.i | m.j;
     2586                or `waitfor( ^?{} : gortn )` break; // low priority
    24022587
    24032588        }
     
    24532638\hspace{3pt}
    24542639\subfloat[Go]{\label{f:Gochannel}\usebox\myboxB}
    2455 \caption{Direct communication}
    2456 \label{f:DirectCommunication}
     2640\caption{Direct versus indirect communication}
     2641\label{f:DirectCommunicationComparison}
     2642
     2643\medskip
     2644
     2645\begin{cfa}
     2646monitor thread DatingService {
     2647        condition Girls[CompCodes], Boys[CompCodes];
     2648        int girlPhoneNo, boyPhoneNo, ccode;
     2649};
     2650int girl( DatingService & mutex ds, int phoneno, int code ) with( ds ) {
     2651        girlPhoneNo = phoneno;  ccode = code;
     2652        `wait( Girls[ccode] );`                                                         $\C{// wait for boy}$
     2653        girlPhoneNo = phoneno;  return boyPhoneNo;
     2654}
     2655int boy( DatingService & mutex ds, int phoneno, int code ) with( ds ) {
     2656        boyPhoneNo = phoneno;  ccode = code;
     2657        `wait( Boys[ccode] );`                                                          $\C{// wait for girl}$
     2658        boyPhoneNo = phoneno;  return girlPhoneNo;
     2659}
     2660void main( DatingService & ds ) with( ds ) {                    $\C{// thread starts, ds defaults to mutex}$
     2661        for () {
     2662                waitfor( ^?{} ) break;                                                  $\C{// high priority}$
     2663                or waitfor( girl )                                                              $\C{// girl called, compatible boy ? restart boy then girl}$
     2664                        if ( ! is_empty( Boys[ccode] ) ) { `signal_block( Boys[ccode] );  signal_block( Girls[ccode] );` }
     2665                or waitfor( boy ) {                                                             $\C{// boy called, compatible girl ? restart girl then boy}$
     2666                        if ( ! is_empty( Girls[ccode] ) ) { `signal_block( Girls[ccode] );  signal_block( Boys[ccode] );` }
     2667        }
     2668}
     2669\end{cfa}
     2670\caption{Direct communication dating service}
     2671\label{f:DirectCommunicationDatingService}
    24572672\end{figure}
    24582673
     
    24692684void main( Ping & pi ) {
    24702685        for ( 10 ) {
    2471                 `waitfor( ping, pi );`
     2686                `waitfor( ping : pi );`
    24722687                `pong( po );`
    24732688        }
     
    24822697        for ( 10 ) {
    24832698                `ping( pi );`
    2484                 `waitfor( pong, po );`
     2699                `waitfor( pong : po );`
    24852700        }
    24862701}
     
    24972712
    24982713
    2499 \subsection{Execution Properties}
    2500 
    2501 Table~\ref{t:ObjectPropertyComposition} shows how the \CFA high-level constructs cover 3 fundamental execution properties: thread, stateful function, and mutual exclusion.
    2502 Case 1 is a basic object, with none of the new execution properties.
    2503 Case 2 allows @mutex@ calls to Case 1 to protect shared data.
    2504 Case 3 allows stateful functions to suspend/resume but restricts operations because the state is stackless.
    2505 Case 4 allows @mutex@ calls to Case 3 to protect shared data.
    2506 Cases 5 and 6 are the same as 3 and 4 without restriction because the state is stackful.
    2507 Cases 7 and 8 are rejected because a thread cannot execute without a stackful state in a preemptive environment when context switching from the signal handler.
    2508 Cases 9 and 10 have a stackful thread without and with @mutex@ calls.
    2509 For situations where threads do not require direct communication, case 9 provides faster creation/destruction by eliminating @mutex@ setup.
    2510 
    2511 \begin{table}
    2512 \caption{Object property composition}
    2513 \centering
    2514 \label{t:ObjectPropertyComposition}
    2515 \renewcommand{\arraystretch}{1.25}
    2516 %\setlength{\tabcolsep}{5pt}
    2517 \begin{tabular}{c|c||l|l}
    2518 \multicolumn{2}{c||}{object properties} & \multicolumn{2}{c}{mutual exclusion} \\
    2519 \hline
    2520 thread  & stateful                              & \multicolumn{1}{c|}{No} & \multicolumn{1}{c}{Yes} \\
    2521 \hline
    2522 \hline
    2523 No              & No                                    & \textbf{1}\ \ \ aggregate type                & \textbf{2}\ \ \ @monitor@ aggregate type \\
    2524 \hline
    2525 No              & Yes (stackless)               & \textbf{3}\ \ \ @generator@                   & \textbf{4}\ \ \ @monitor@ @generator@ \\
    2526 \hline
    2527 No              & Yes (stackful)                & \textbf{5}\ \ \ @coroutine@                   & \textbf{6}\ \ \ @monitor@ @coroutine@ \\
    2528 \hline
    2529 Yes             & No / Yes (stackless)  & \textbf{7}\ \ \ {\color{red}rejected} & \textbf{8}\ \ \ {\color{red}rejected} \\
    2530 \hline
    2531 Yes             & Yes (stackful)                & \textbf{9}\ \ \ @thread@                              & \textbf{10}\ \ @monitor@ @thread@ \\
    2532 \end{tabular}
    2533 \end{table}
     2714\subsection{\texorpdfstring{\protect\lstinline@monitor@ Generators / Coroutines / Threads}{monitor Generators / Coroutines / Threads}}
     2715
     2716\CFA generators, coroutines, and threads can also be monitors (Table~\ref{t:ExecutionPropertyComposition} cases 4, 6, 12) allowing safe \emph{direct communication} with threads, \ie the custom types can have mutex functions that are called by other threads.
     2717All monitor features are available within these mutex functions.
     2718For example, if the formatter generator (or coroutine equivalent) in Figure~\ref{f:CFAFormatGen} is extended with the monitor property and this interface function is used to communicate with the formatter:
     2719\begin{cfa}
     2720void fmt( Fmt & mutex fmt, char ch ) { fmt.ch = ch; resume( fmt ) }
     2721\end{cfa}
     2722multiple threads can safely pass characters for formatting.
     2723
     2724Figure~\ref{f:DirectCommunicationComparison} shows a comparison of direct call-communication in \CFA versus indirect channel-communication in Go.
     2725(Ada has a similar mechanism to \CFA direct communication.)
     2726The program thread in \CFA @main@ uses the call/return paradigm to directly communicate with the @GoRtn main@, whereas Go switches to the channel paradigm to indirectly communicate with the goroutine.
     2727Communication by multiple threads is safe for the @gortn@ thread via mutex calls in \CFA or channel assignment in Go.
     2728
     2729Figure~\ref{f:DirectCommunicationDatingService} shows the dating-service problem in Figure~\ref{f:DatingServiceMonitor} extended from indirect monitor communication to direct thread communication.
     2730When converting a monitor to a thread (server), the coding pattern is to move as much code as possible from the accepted members into the thread main so it does an much work as possible.
     2731Notice, the dating server is postponing requests for an unspecified time while continuing to accept new requests.
     2732For complex servers (web-servers), there can be hundreds of lines of code in the thread main and safe interaction with clients can be complex.
    25342733
    25352734
     
    25372736
    25382737For completeness and efficiency, \CFA provides a standard set of low-level locks: recursive mutex, condition, semaphore, barrier, \etc, and atomic instructions: @fetchAssign@, @fetchAdd@, @testSet@, @compareSet@, \etc.
    2539 Some of these low-level mechanism are used in the \CFA runtime, but we strongly advocate using high-level mechanisms whenever possible.
     2738Some of these low-level mechanism are used to build the \CFA runtime, but we always advocate using high-level mechanisms whenever possible.
    25402739
    25412740
     
    25802779\begin{cfa}
    25812780struct Adder {
    2582     int * row, cols;
     2781        int * row, cols;
    25832782};
    25842783int operator()() {
     
    26392838\label{s:RuntimeStructureCluster}
    26402839
    2641 A \newterm{cluster} is a collection of threads and virtual processors (abstract kernel-thread) that execute the (user) threads from its own ready queue (like an OS executing kernel threads).
     2840A \newterm{cluster} is a collection of user and kernel threads, where the kernel threads run the user threads from the cluster's ready queue, and the operating system runs the kernel threads on the processors from its ready queue.
     2841The term \newterm{virtual processor} is introduced as a synonym for kernel thread to disambiguate between user and kernel thread.
     2842From the language perspective, a virtual processor is an actual processor (core).
     2843
    26422844The purpose of a cluster is to control the amount of parallelism that is possible among threads, plus scheduling and other execution defaults.
    26432845The default cluster-scheduler is single-queue multi-server, which provides automatic load-balancing of threads on processors.
     
    26582860Programs may use more virtual processors than hardware processors.
    26592861On a multiprocessor, kernel threads are distributed across the hardware processors resulting in virtual processors executing in parallel.
    2660 (It is possible to use affinity to lock a virtual processor onto a particular hardware processor~\cite{affinityLinux, affinityWindows, affinityFreebsd, affinityNetbsd, affinityMacosx}, which is used when caching issues occur or for heterogeneous hardware processors.)
     2862(It is possible to use affinity to lock a virtual processor onto a particular hardware processor~\cite{affinityLinux,affinityWindows}, which is used when caching issues occur or for heterogeneous hardware processors.) %, affinityFreebsd, affinityNetbsd, affinityMacosx
    26612863The \CFA runtime attempts to block unused processors and unblock processors as the system load increases;
    2662 balancing the workload with processors is difficult because it requires future knowledge, \ie what will the applicaton workload do next.
     2864balancing the workload with processors is difficult because it requires future knowledge, \ie what will the application workload do next.
    26632865Preemption occurs on virtual processors rather than user threads, via operating-system interrupts.
    26642866Thus virtual processors execute user threads, where preemption frequency applies to a virtual processor, so preemption occurs randomly across the executed user threads.
     
    26952897Nondeterministic preemption provides fairness from long-running threads, and forces concurrent programmers to write more robust programs, rather than relying on code between cooperative scheduling to be atomic.
    26962898This atomic reliance can fail on multi-core machines, because execution across cores is nondeterministic.
    2697 A different reason for not supporting preemption is that it significantly complicates the runtime system, \eg Microsoft runtime does not support interrupts and on Linux systems, interrupts are complex (see below).
     2899A different reason for not supporting preemption is that it significantly complicates the runtime system, \eg Windows runtime does not support interrupts and on Linux systems, interrupts are complex (see below).
    26982900Preemption is normally handled by setting a countdown timer on each virtual processor.
    2699 When the timer expires, an interrupt is delivered, and the interrupt handler resets the countdown timer, and if the virtual processor is executing in user code, the signal handler performs a user-level context-switch, or if executing in the language runtime kernel, the preemption is ignored or rolled forward to the point where the runtime kernel context switches back to user code.
     2901When the timer expires, an interrupt is delivered, and its signal handler resets the countdown timer, and if the virtual processor is executing in user code, the signal handler performs a user-level context-switch, or if executing in the language runtime kernel, the preemption is ignored or rolled forward to the point where the runtime kernel context switches back to user code.
    27002902Multiple signal handlers may be pending.
    27012903When control eventually switches back to the signal handler, it returns normally, and execution continues in the interrupted user thread, even though the return from the signal handler may be on a different kernel thread than the one where the signal is delivered.
    27022904The only issue with this approach is that signal masks from one kernel thread may be restored on another as part of returning from the signal handler;
    27032905therefore, the same signal mask is required for all virtual processors in a cluster.
    2704 Because preemption frequency is usually long (1 millisecond) performance cost is negligible.
    2705 
    2706 Linux switched a decade ago from specific to arbitrary process signal-delivery for applications with multiple kernel threads.
    2707 \begin{cquote}
    2708 A process-directed signal may be delivered to any one of the threads that does not currently have the signal blocked.
    2709 If more than one of the threads has the signal unblocked, then the kernel chooses an arbitrary thread to which it will deliver the signal.
    2710 SIGNAL(7) - Linux Programmer's Manual
    2711 \end{cquote}
     2906Because preemption interval is usually long (1 millisecond) performance cost is negligible.
     2907
     2908Linux switched a decade ago from specific to arbitrary virtual-processor signal-delivery for applications with multiple kernel threads.
     2909In the new semantics, a virtual-processor directed signal may be delivered to any virtual processor created by the application that does not have the signal blocked.
    27122910Hence, the timer-expiry signal, which is generated \emph{externally} by the Linux kernel to an application, is delivered to any of its Linux subprocesses (kernel threads).
    27132911To ensure each virtual processor receives a preemption signal, a discrete-event simulation is run on a special virtual processor, and only it sets and receives timer events.
     
    27272925\label{s:Performance}
    27282926
    2729 To verify the implementation of the \CFA runtime, a series of microbenchmarks are performed comparing \CFA with pthreads, Java OpenJDK-9, Go 1.12.6 and \uC 7.0.0.
     2927To test the performance of the \CFA runtime, a series of microbenchmarks are used to compare \CFA with pthreads, Java 11.0.6, Go 1.12.6, Rust 1.37.0, Python 3.7.6, Node.js 12.14.1, and \uC 7.0.0.
    27302928For comparison, the package must be multi-processor (M:N), which excludes libdill/libmil~\cite{libdill} (M:1)), and use a shared-memory programming model, \eg not message passing.
    2731 The benchmark computer is an AMD Opteron\texttrademark\ 6380 NUMA 64-core, 8 socket, 2.5 GHz processor, running Ubuntu 16.04.6 LTS, and \CFA/\uC are compiled with gcc 6.5.
     2929The benchmark computer is an AMD Opteron\texttrademark\ 6380 NUMA 64-core, 8 socket, 2.5 GHz processor, running Ubuntu 16.04.6 LTS, and pthreads/\CFA/\uC are compiled with gcc 9.2.1.
    27322930
    27332931All benchmarks are run using the following harness. (The Java harness is augmented to circumvent JIT issues.)
    27342932\begin{cfa}
    2735 unsigned int N = 10_000_000;
    2736 #define BENCH( `run` ) Time before = getTimeNsec();  `run;`  Duration result = (getTimeNsec() - before) / N;
    2737 \end{cfa}
    2738 The method used to get time is @clock_gettime( CLOCK_REALTIME )@.
    2739 Each benchmark is performed @N@ times, where @N@ varies depending on the benchmark;
    2740 the total time is divided by @N@ to obtain the average time for a benchmark.
    2741 Each benchmark experiment is run 31 times.
     2933#define BENCH( `run` ) uint64_t start = cputime_ns();  `run;`  double result = (double)(cputime_ns() - start) / N;
     2934\end{cfa}
     2935where CPU time in nanoseconds is from the appropriate language clock.
     2936Each benchmark is performed @N@ times, where @N@ is selected so the benchmark runs in the range of 2--20 seconds for the specific programming language.
     2937The total time is divided by @N@ to obtain the average time for a benchmark.
     2938Each benchmark experiment is run 13 times and the average appears in the table.
    27422939All omitted tests for other languages are functionally identical to the \CFA tests and available online~\cite{CforallBenchMarks}.
    2743 % tar --exclude=.deps --exclude=Makefile --exclude=Makefile.in --exclude=c.c --exclude=cxx.cpp --exclude=fetch_add.c -cvhf benchmark.tar benchmark
    2744 
    2745 \paragraph{Object Creation}
    2746 
    2747 Object creation is measured by creating/deleting the specific kind of concurrent object.
    2748 Figure~\ref{f:creation} shows the code for \CFA, with results in Table~\ref{tab:creation}.
    2749 The only note here is that the call stacks of \CFA coroutines are lazily created, therefore without priming the coroutine to force stack creation, the creation cost is artificially low.
    2750 
    2751 \begin{multicols}{2}
    2752 \lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
    2753 \begin{cfa}
    2754 @thread@ MyThread {};
    2755 void @main@( MyThread & ) {}
    2756 int main() {
    2757         BENCH( for ( N ) { @MyThread m;@ } )
    2758         sout | result`ns;
    2759 }
    2760 \end{cfa}
    2761 \captionof{figure}{\CFA object-creation benchmark}
    2762 \label{f:creation}
    2763 
    2764 \columnbreak
    2765 
    2766 \vspace*{-16pt}
    2767 \captionof{table}{Object creation comparison (nanoseconds)}
    2768 \label{tab:creation}
    2769 
    2770 \begin{tabular}[t]{@{}r*{3}{D{.}{.}{5.2}}@{}}
    2771 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
    2772 \CFA Coroutine Lazy             & 13.2          & 13.1          & 0.44          \\
    2773 \CFA Coroutine Eager    & 531.3         & 536.0         & 26.54         \\
    2774 \CFA Thread                             & 2074.9        & 2066.5        & 170.76        \\
    2775 \uC Coroutine                   & 89.6          & 90.5          & 1.83          \\
    2776 \uC Thread                              & 528.2         & 528.5         & 4.94          \\
    2777 Goroutine                               & 4068.0        & 4113.1        & 414.55        \\
    2778 Java Thread                             & 103848.5      & 104295.4      & 2637.57       \\
    2779 Pthreads                                & 33112.6       & 33127.1       & 165.90
    2780 \end{tabular}
    2781 \end{multicols}
    2782 
    2783 
    2784 \paragraph{Context-Switching}
     2940% tar --exclude-ignore=exclude -cvhf benchmark.tar benchmark
     2941
     2942\paragraph{Context Switching}
    27852943
    27862944In procedural programming, the cost of a function call is important as modularization (refactoring) increases.
    2787 (In many cases, a compiler inlines function calls to eliminate this cost.)
    2788 Similarly, when modularization extends to coroutines/tasks, the time for a context switch becomes a relevant factor.
     2945(In many cases, a compiler inlines function calls to increase the size and number of basic blocks for optimizing.)
     2946Similarly, when modularization extends to coroutines/threads, the time for a context switch becomes a relevant factor.
    27892947The coroutine test is from resumer to suspender and from suspender to resumer, which is two context switches.
     2948%For async-await systems, the test is scheduling and fulfilling @N@ empty promises, where all promises are allocated before versus interleaved with fulfillment to avoid garbage collection.
     2949For async-await systems, the test measures the cost of the @await@ expression entering the event engine by awaiting @N@ promises, where each created promise is resolved by an immediate event in the engine (using Node.js @setImmediate@).
    27902950The thread test is using yield to enter and return from the runtime kernel, which is two context switches.
    27912951The difference in performance between coroutine and thread context-switch is the cost of scheduling for threads, whereas coroutines are self-scheduling.
    2792 Figure~\ref{f:ctx-switch} only shows the \CFA code for coroutines/threads (other systems are similar) with all results in Table~\ref{tab:ctx-switch}.
     2952Figure~\ref{f:ctx-switch} shows the \CFA code for a coroutine/thread with results in Table~\ref{t:ctx-switch}.
     2953
     2954% From: Gregor Richards <gregor.richards@uwaterloo.ca>
     2955% To: "Peter A. Buhr" <pabuhr@plg2.cs.uwaterloo.ca>
     2956% Date: Fri, 24 Jan 2020 13:49:18 -0500
     2957%
     2958% I can also verify that the previous version, which just tied a bunch of promises together, *does not* go back to the
     2959% event loop at all in the current version of Node. Presumably they're taking advantage of the fact that the ordering of
     2960% events is intentionally undefined to just jump right to the next 'then' in the chain, bypassing event queueing
     2961% entirely. That's perfectly correct behavior insofar as its difference from the specified behavior isn't observable, but
     2962% it isn't typical or representative of much anything useful, because most programs wouldn't have whole chains of eager
     2963% promises. Also, it's not representative of *anything* you can do with async/await, as there's no way to encode such an
     2964% eager chain that way.
    27932965
    27942966\begin{multicols}{2}
    27952967\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
    27962968\begin{cfa}[aboveskip=0pt,belowskip=0pt]
    2797 @coroutine@ C {} c;
    2798 void main( C & ) { for ( ;; ) { @suspend;@ } }
     2969@coroutine@ C {};
     2970void main( C & ) { for () { @suspend;@ } }
    27992971int main() { // coroutine test
     2972        C c;
    28002973        BENCH( for ( N ) { @resume( c );@ } )
    2801         sout | result`ns;
    2802 }
    2803 int main() { // task test
     2974        sout | result;
     2975}
     2976int main() { // thread test
    28042977        BENCH( for ( N ) { @yield();@ } )
    2805         sout | result`ns;
     2978        sout | result;
    28062979}
    28072980\end{cfa}
     
    28132986\vspace*{-16pt}
    28142987\captionof{table}{Context switch comparison (nanoseconds)}
    2815 \label{tab:ctx-switch}
     2988\label{t:ctx-switch}
    28162989\begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}}
    28172990\multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
    2818 C function              & 1.8   & 1.8   & 0.01  \\
    2819 \CFA generator  & 2.4   & 2.2   & 0.25  \\
    2820 \CFA Coroutine  & 36.2  & 36.2  & 0.25  \\
    2821 \CFA Thread             & 93.2  & 93.5  & 2.09  \\
    2822 \uC Coroutine   & 52.0  & 52.1  & 0.51  \\
    2823 \uC Thread              & 96.2  & 96.3  & 0.58  \\
    2824 Goroutine               & 141.0 & 141.3 & 3.39  \\
    2825 Java Thread             & 374.0 & 375.8 & 10.38 \\
    2826 Pthreads Thread & 361.0 & 365.3 & 13.19
     2991C function                      & 1.8           & 1.8           & 0.0   \\
     2992\CFA generator          & 1.8           & 2.0           & 0.3   \\
     2993\CFA coroutine          & 32.5          & 32.9          & 0.8   \\
     2994\CFA thread                     & 93.8          & 93.6          & 2.2   \\
     2995\uC coroutine           & 50.3          & 50.3          & 0.2   \\
     2996\uC thread                      & 97.3          & 97.4          & 1.0   \\
     2997Python generator        & 40.9          & 41.3          & 1.5   \\
     2998Node.js generator       & 32.6          & 32.2          & 1.0   \\
     2999Node.js await           & 1852.2        & 1854.7        & 16.4  \\
     3000Goroutine thread        & 143.0         & 143.3         & 1.1   \\
     3001Rust thread                     & 332.0         & 331.4         & 2.4   \\
     3002Java thread                     & 405.0         & 415.0         & 17.6  \\
     3003Pthreads thread         & 334.3         & 335.2         & 3.9
    28273004\end{tabular}
    28283005\end{multicols}
    28293006
    2830 
    2831 \paragraph{Mutual-Exclusion}
    2832 
    2833 Uncontented mutual exclusion, which frequently occurs, is measured by entering/leaving a critical section.
    2834 For monitors, entering and leaving a monitor function is measured.
    2835 To put the results in context, the cost of entering a non-inline function and the cost of acquiring and releasing a @pthread_mutex@ lock is also measured.
    2836 Figure~\ref{f:mutex} shows the code for \CFA with all results in Table~\ref{tab:mutex}.
     3007\paragraph{Internal Scheduling}
     3008
     3009Internal scheduling is measured using a cycle of two threads signalling and waiting.
     3010Figure~\ref{f:schedint} shows the code for \CFA, with results in Table~\ref{t:schedint}.
    28373011Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
     3012Java scheduling is significantly greater because the benchmark explicitly creates multiple thread in order to prevent the JIT from making the program sequential, \ie removing all locking.
    28383013
    28393014\begin{multicols}{2}
    28403015\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
    28413016\begin{cfa}
     3017volatile int go = 0;
     3018@condition c;@
    28423019@monitor@ M {} m1/*, m2, m3, m4*/;
    2843 void __attribute__((noinline))
    2844 do_call( M & @mutex m/*, m2, m3, m4*/@ ) {}
     3020void call( M & @mutex p1/*, p2, p3, p4*/@ ) {
     3021        @signal( c );@
     3022}
     3023void wait( M & @mutex p1/*, p2, p3, p4*/@ ) {
     3024        go = 1; // continue other thread
     3025        for ( N ) { @wait( c );@ } );
     3026}
     3027thread T {};
     3028void main( T & ) {
     3029        while ( go == 0 ) { yield(); } // waiter must start first
     3030        BENCH( for ( N ) { call( m1/*, m2, m3, m4*/ ); } )
     3031        sout | result;
     3032}
    28453033int main() {
    2846         BENCH(
    2847                 for( N ) do_call( m1/*, m2, m3, m4*/ );
    2848         )
    2849         sout | result`ns;
    2850 }
    2851 \end{cfa}
    2852 \captionof{figure}{\CFA acquire/release mutex benchmark}
    2853 \label{f:mutex}
     3034        T t;
     3035        wait( m1/*, m2, m3, m4*/ );
     3036}
     3037\end{cfa}
     3038\captionof{figure}{\CFA Internal-scheduling benchmark}
     3039\label{f:schedint}
    28543040
    28553041\columnbreak
    28563042
    28573043\vspace*{-16pt}
    2858 \captionof{table}{Mutex comparison (nanoseconds)}
    2859 \label{tab:mutex}
    2860 \begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}}
    2861 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
    2862 test and test-and-test lock             & 19.1  & 18.9  & 0.40  \\
    2863 \CFA @mutex@ function, 1 arg.   & 45.9  & 46.6  & 1.45  \\
    2864 \CFA @mutex@ function, 2 arg.   & 105.0 & 104.7 & 3.08  \\
    2865 \CFA @mutex@ function, 4 arg.   & 165.0 & 167.6 & 5.65  \\
    2866 \uC @monitor@ member rtn.               & 54.0  & 53.7  & 0.82  \\
    2867 Java synchronized method                & 31.0  & 31.1  & 0.50  \\
    2868 Pthreads Mutex Lock                             & 33.6  & 32.6  & 1.14
     3044\captionof{table}{Internal-scheduling comparison (nanoseconds)}
     3045\label{t:schedint}
     3046\bigskip
     3047
     3048\begin{tabular}{@{}r*{3}{D{.}{.}{5.2}}@{}}
     3049\multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
     3050\CFA @signal@, 1 monitor        & 364.4         & 364.2         & 4.4           \\
     3051\CFA @signal@, 2 monitor        & 484.4         & 483.9         & 8.8           \\
     3052\CFA @signal@, 4 monitor        & 709.1         & 707.7         & 15.0          \\
     3053\uC @signal@ monitor            & 328.3         & 327.4         & 2.4           \\
     3054Rust cond. variable                     & 7514.0        & 7437.4        & 397.2         \\
     3055Java @notify@ monitor           & 9623.0        & 9654.6        & 236.2         \\
     3056Pthreads cond. variable         & 5553.7        & 5576.1        & 345.6
    28693057\end{tabular}
    28703058\end{multicols}
     
    28743062
    28753063External scheduling is measured using a cycle of two threads calling and accepting the call using the @waitfor@ statement.
    2876 Figure~\ref{f:ext-sched} shows the code for \CFA, with results in Table~\ref{tab:ext-sched}.
     3064Figure~\ref{f:schedext} shows the code for \CFA with results in Table~\ref{t:schedext}.
    28773065Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
    28783066
     
    28813069\vspace*{-16pt}
    28823070\begin{cfa}
    2883 volatile int go = 0;
    2884 @monitor@ M {} m;
     3071@monitor@ M {} m1/*, m2, m3, m4*/;
     3072void call( M & @mutex p1/*, p2, p3, p4*/@ ) {}
     3073void wait( M & @mutex p1/*, p2, p3, p4*/@ ) {
     3074        for ( N ) { @waitfor( call : p1/*, p2, p3, p4*/ );@ }
     3075}
    28853076thread T {};
    2886 void __attribute__((noinline))
    2887 do_call( M & @mutex@ ) {}
    28883077void main( T & ) {
    2889         while ( go == 0 ) { yield(); }
    2890         while ( go == 1 ) { do_call( m ); }
    2891 }
    2892 int __attribute__((noinline))
    2893 do_wait( M & @mutex@ m ) {
    2894         go = 1; // continue other thread
    2895         BENCH( for ( N ) { @waitfor( do_call, m );@ } )
    2896         go = 0; // stop other thread
    2897         sout | result`ns;
     3078        BENCH( for ( N ) { call( m1/*, m2, m3, m4*/ ); } )
     3079        sout | result;
    28983080}
    28993081int main() {
    29003082        T t;
    2901         do_wait( m );
     3083        wait( m1/*, m2, m3, m4*/ );
    29023084}
    29033085\end{cfa}
    29043086\captionof{figure}{\CFA external-scheduling benchmark}
    2905 \label{f:ext-sched}
     3087\label{f:schedext}
    29063088
    29073089\columnbreak
     
    29093091\vspace*{-16pt}
    29103092\captionof{table}{External-scheduling comparison (nanoseconds)}
    2911 \label{tab:ext-sched}
     3093\label{t:schedext}
    29123094\begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}}
    29133095\multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
    2914 \CFA @waitfor@, 1 @monitor@     & 376.4 & 376.8 & 7.63  \\
    2915 \CFA @waitfor@, 2 @monitor@     & 491.4 & 492.0 & 13.31 \\
    2916 \CFA @waitfor@, 4 @monitor@     & 681.0 & 681.7 & 19.10 \\
    2917 \uC @_Accept@                           & 331.1 & 331.4 & 2.66
     3096\CFA @waitfor@, 1 monitor       & 367.1 & 365.3 & 5.0   \\
     3097\CFA @waitfor@, 2 monitor       & 463.0 & 464.6 & 7.1   \\
     3098\CFA @waitfor@, 4 monitor       & 689.6 & 696.2 & 21.5  \\
     3099\uC \lstinline[language=uC++]|_Accept| monitor  & 328.2 & 329.1 & 3.4   \\
     3100Go \lstinline[language=Golang]|select| channel  & 365.0 & 365.5 & 1.2
    29183101\end{tabular}
    29193102\end{multicols}
    29203103
    2921 
    2922 \paragraph{Internal Scheduling}
    2923 
    2924 Internal scheduling is measured using a cycle of two threads signalling and waiting.
    2925 Figure~\ref{f:int-sched} shows the code for \CFA, with results in Table~\ref{tab:int-sched}.
    2926 Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
    2927 Java scheduling is significantly greater because the benchmark explicitly creates multiple thread in order to prevent the JIT from making the program sequential, \ie removing all locking.
     3104\paragraph{Mutual-Exclusion}
     3105
     3106Uncontented mutual exclusion, which frequently occurs, is measured by entering/leaving a critical section.
     3107For monitors, entering and leaving a monitor function is measured, otherwise the language-appropriate mutex-lock is measured.
     3108For comparison, a spinning (versus blocking) test-and-test-set lock is presented.
     3109Figure~\ref{f:mutex} shows the code for \CFA with results in Table~\ref{t:mutex}.
     3110Note the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
    29283111
    29293112\begin{multicols}{2}
    29303113\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
    29313114\begin{cfa}
    2932 volatile int go = 0;
    2933 @monitor@ M { @condition c;@ } m;
    2934 void __attribute__((noinline))
    2935 do_call( M & @mutex@ a1 ) { @signal( c );@ }
    2936 thread T {};
    2937 void main( T & this ) {
    2938         while ( go == 0 ) { yield(); }
    2939         while ( go == 1 ) { do_call( m ); }
    2940 }
    2941 int  __attribute__((noinline))
    2942 do_wait( M & mutex m ) with(m) {
    2943         go = 1; // continue other thread
    2944         BENCH( for ( N ) { @wait( c );@ } );
    2945         go = 0; // stop other thread
    2946         sout | result`ns;
    2947 }
     3115@monitor@ M {} m1/*, m2, m3, m4*/;
     3116call( M & @mutex p1/*, p2, p3, p4*/@ ) {}
    29483117int main() {
    2949         T t;
    2950         do_wait( m );
    2951 }
    2952 \end{cfa}
    2953 \captionof{figure}{\CFA Internal-scheduling benchmark}
    2954 \label{f:int-sched}
     3118        BENCH( for( N ) call( m1/*, m2, m3, m4*/ ); )
     3119        sout | result;
     3120}
     3121\end{cfa}
     3122\captionof{figure}{\CFA acquire/release mutex benchmark}
     3123\label{f:mutex}
    29553124
    29563125\columnbreak
    29573126
    29583127\vspace*{-16pt}
    2959 \captionof{table}{Internal-scheduling comparison (nanoseconds)}
    2960 \label{tab:int-sched}
    2961 \bigskip
    2962 
    2963 \begin{tabular}{@{}r*{3}{D{.}{.}{5.2}}@{}}
    2964 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
    2965 \CFA @signal@, 1 @monitor@      & 372.6         & 374.3         & 14.17         \\
    2966 \CFA @signal@, 2 @monitor@      & 492.7         & 494.1         & 12.99         \\
    2967 \CFA @signal@, 4 @monitor@      & 749.4         & 750.4         & 24.74         \\
    2968 \uC @signal@                            & 320.5         & 321.0         & 3.36          \\
    2969 Java @notify@                           & 10160.5       & 10169.4       & 267.71        \\
    2970 Pthreads Cond. Variable         & 4949.6        & 5065.2        & 363
     3128\captionof{table}{Mutex comparison (nanoseconds)}
     3129\label{t:mutex}
     3130\begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}}
     3131\multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
     3132test-and-test-set lock                  & 19.1  & 18.9  & 0.4   \\
     3133\CFA @mutex@ function, 1 arg.   & 48.3  & 47.8  & 0.9   \\
     3134\CFA @mutex@ function, 2 arg.   & 86.7  & 87.6  & 1.9   \\
     3135\CFA @mutex@ function, 4 arg.   & 173.4 & 169.4 & 5.9   \\
     3136\uC @monitor@ member rtn.               & 54.8  & 54.8  & 0.1   \\
     3137Goroutine mutex lock                    & 34.0  & 34.0  & 0.0   \\
     3138Rust mutex lock                                 & 33.0  & 33.2  & 0.8   \\
     3139Java synchronized method                & 31.0  & 31.0  & 0.0   \\
     3140Pthreads mutex Lock                             & 31.0  & 31.1  & 0.4
    29713141\end{tabular}
    29723142\end{multicols}
    29733143
     3144\paragraph{Creation}
     3145
     3146Creation is measured by creating/deleting a specific kind of control-flow object.
     3147Figure~\ref{f:creation} shows the code for \CFA with results in Table~\ref{t:creation}.
     3148Note, the call stacks of \CFA coroutines are lazily created on the first resume, therefore the cost of creation with and without a stack are presented.
     3149
     3150\begin{multicols}{2}
     3151\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
     3152\begin{cfa}
     3153@coroutine@ MyCoroutine {};
     3154void ?{}( MyCoroutine & this ) {
     3155#ifdef EAGER
     3156        resume( this );
     3157#endif
     3158}
     3159void main( MyCoroutine & ) {}
     3160int main() {
     3161        BENCH( for ( N ) { @MyCoroutine c;@ } )
     3162        sout | result;
     3163}
     3164\end{cfa}
     3165\captionof{figure}{\CFA creation benchmark}
     3166\label{f:creation}
     3167
     3168\columnbreak
     3169
     3170\vspace*{-16pt}
     3171\captionof{table}{Creation comparison (nanoseconds)}
     3172\label{t:creation}
     3173
     3174\begin{tabular}[t]{@{}r*{3}{D{.}{.}{5.2}}@{}}
     3175\multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
     3176\CFA generator                  & 0.6           & 0.6           & 0.0           \\
     3177\CFA coroutine lazy             & 13.4          & 13.1          & 0.5           \\
     3178\CFA coroutine eager    & 144.7         & 143.9         & 1.5           \\
     3179\CFA thread                             & 466.4         & 468.0         & 11.3          \\
     3180\uC coroutine                   & 155.6         & 155.7         & 1.7           \\
     3181\uC thread                              & 523.4         & 523.9         & 7.7           \\
     3182Python generator                & 123.2         & 124.3         & 4.1           \\
     3183Node.js generator               & 32.3          & 32.2          & 0.3           \\
     3184Goroutine thread                & 751.0         & 750.5         & 3.1           \\
     3185Rust thread                             & 53801.0       & 53896.8       & 274.9         \\
     3186Java thread                             & 120274.0      & 120722.9      & 2356.7        \\
     3187Pthreads thread                 & 31465.5       & 31419.5       & 140.4
     3188\end{tabular}
     3189\end{multicols}
     3190
     3191
     3192\subsection{Discussion}
     3193
     3194Languages using 1:1 threading based on pthreads can at best meet or exceed (due to language overhead) the pthread results.
     3195Note, pthreads has a fast zero-contention mutex lock checked in user space.
     3196Languages with M:N threading have better performance than 1:1 because there is no operating-system interactions.
     3197Languages with stackful coroutines have higher cost than stackless coroutines because of stack allocation and context switching;
     3198however, stackful \uC and \CFA coroutines have approximately the same performance as stackless Python and Node.js generators.
     3199The \CFA stackless generator is approximately 25 times faster for suspend/resume and 200 times faster for creation than stackless Python and Node.js generators.
     3200
    29743201
    29753202\section{Conclusion}
     
    29773204Advanced control-flow will always be difficult, especially when there is temporal ordering and nondeterminism.
    29783205However, many systems exacerbate the difficulty through their presentation mechanisms.
    2979 This paper shows it is possible to present a hierarchy of control-flow features, generator, coroutine, thread, and monitor, providing an integrated set of high-level, efficient, and maintainable control-flow features.
    2980 Eliminated from \CFA are spurious wakeup and barging, which are nonintuitive and lead to errors, and having to work with a bewildering set of low-level locks and acquisition techniques.
    2981 \CFA high-level race-free monitors and tasks provide the core mechanisms for mutual exclusion and synchronization, without having to resort to magic qualifiers like @volatile@/@atomic@.
     3206This paper shows it is possible to understand high-level control-flow using three properties: statefulness, thread, mutual-exclusion/synchronization.
     3207Combining these properties creates a number of high-level, efficient, and maintainable control-flow types: generator, coroutine, thread, each of which can be a monitor.
     3208Eliminated from \CFA are barging and spurious wakeup, which are nonintuitive and lead to errors, and having to work with a bewildering set of low-level locks and acquisition techniques.
     3209\CFA high-level race-free monitors and threads provide the core mechanisms for mutual exclusion and synchronization, without having to resort to magic qualifiers like @volatile@/@atomic@.
    29823210Extending these mechanisms to handle high-level deadlock-free bulk acquire across both mutual exclusion and synchronization is a unique contribution.
    29833211The \CFA runtime provides concurrency based on a preemptive M:N user-level threading-system, executing in clusters, which encapsulate scheduling of work on multiple kernel threads providing parallelism.
    29843212The M:N model is judged to be efficient and provide greater flexibility than a 1:1 threading model.
    29853213These concepts and the \CFA runtime-system are written in the \CFA language, extensively leveraging the \CFA type-system, which demonstrates the expressiveness of the \CFA language.
    2986 Performance comparisons with other concurrent systems/languages show the \CFA approach is competitive across all low-level operations, which translates directly into good performance in well-written concurrent applications.
    2987 C programmers should feel comfortable using these mechanisms for developing complex control-flow in applications, with the ability to obtain maximum available performance by selecting mechanisms at the appropriate level of need.
     3214Performance comparisons with other concurrent systems/languages show the \CFA approach is competitive across all basic operations, which translates directly into good performance in well-written applications with advanced control-flow.
     3215C programmers should feel comfortable using these mechanisms for developing complex control-flow in applications, with the ability to obtain maximum available performance by selecting mechanisms at the appropriate level of need using only calling communication.
    29883216
    29893217
     
    30053233\label{futur:nbio}
    30063234
    3007 Many modern workloads are not bound by computation but IO operations, a common case being web servers and XaaS~\cite{XaaS} (anything as a service).
     3235Many modern workloads are not bound by computation but IO operations, common cases being web servers and XaaS~\cite{XaaS} (anything as a service).
    30083236These types of workloads require significant engineering to amortizing costs of blocking IO-operations.
    30093237At its core, non-blocking I/O is an operating-system level feature queuing IO operations, \eg network operations, and registering for notifications instead of waiting for requests to complete.
     
    30333261\section{Acknowledgements}
    30343262
    3035 The authors would like to recognize the design assistance of Aaron Moss, Rob Schluntz, Andrew Beach and Michael Brooks on the features described in this paper.
    3036 Funding for this project has been provided by Huawei Ltd.\ (\url{http://www.huawei.com}). %, and Peter Buhr is partially funded by the Natural Sciences and Engineering Research Council of Canada.
     3263The authors recognize the design assistance of Aaron Moss, Rob Schluntz, Andrew Beach, and Michael Brooks; David Dice for commenting and helping with the Java benchmarks; and Gregor Richards for helping with the Node.js benchmarks.
     3264This research is funded by a grant from Waterloo-Huawei (\url{http://www.huawei.com}) Joint Innovation Lab. %, and Peter Buhr is partially funded by the Natural Sciences and Engineering Research Council of Canada.
    30373265
    30383266{%
    3039 \fontsize{9bp}{12bp}\selectfont%
     3267\fontsize{9bp}{11.5bp}\selectfont%
    30403268\bibliography{pl,local}
    30413269}%
  • doc/papers/concurrency/examples/Fib.py

    rb7d6a36 r6a490b2  
    44        while True:
    55                fn = fn1 + fn2; fn2 = fn1; fn1 = fn; yield fn
    6 
    7 
    86
    97f1 = Fib()
     
    1412# Local Variables: #
    1513# tab-width: 4 #
    16 # compile-command: "python3.5 Fib.py" #
     14# compile-command: "python3.7 Fib.py" #
    1715# End: #
  • doc/papers/concurrency/examples/Fib2.c

    rb7d6a36 r6a490b2  
    11#include <stdio.h>
    22
    3 void mary() {
    4         printf( "MARY\n" );
    5 }
    6 
    73#define FIB_INIT { 0 }
    8 typedef struct { int next; int fn1, fn2; } Fib;
     4typedef struct { int restart; int fn1, fn2; } Fib;
    95int fib( Fib * f ) {
    10         static void * states[] = { &&s1, &&s2, &&s3 };
    11         goto *states[f->next];
     6        static void * states[] = { &&s0, &&s1, &&s2 };
     7        goto *states[f->restart];
     8  s0:
     9        f->fn1 = 0;
     10        f->restart = 1;
     11        return f->fn1;
    1212  s1:
    13         mary();
    14         f->fn1 = 0;
    15         f->next = 1;
    16         return f->fn1;
    17   s2:
    18         mary();
    1913        f->fn2 = f->fn1;
    2014        f->fn1 = 1;
    21         f->next = 2;
     15        f->restart = 2;
    2216        return f->fn1;
    23   s3:;
    24         mary();
     17  s2:;
    2518        int fn = f->fn1 + f->fn2;
    2619        f->fn2 = f->fn1;
  • doc/papers/concurrency/examples/Fib2.py

    rb7d6a36 r6a490b2  
    11def Fib():
    2     fn1, fn = 0, 1
     2    fn1, fn = 1, 0
    33    while True:
    4         yield fn1
     4        yield fn
    55        fn1, fn = fn, fn1 + fn
    66
     
    1212# Local Variables: #
    1313# tab-width: 4 #
    14 # compile-command: "python3.5 Fib2.py" #
     14# compile-command: "python3.7 Fib2.py" #
    1515# End: #
  • doc/papers/concurrency/examples/Fib3.c

    rb7d6a36 r6a490b2  
    22
    33typedef struct {
    4         int fn1, fn;
    5         void * next;
     4        int restart, fn1, fn;
    65} Fib;
    7 #define FibCtor { 1, 0, NULL }
     6#define FibCtor { 0, 1, 0 }
    87
    98Fib * comain( Fib * f ) {
    10         if ( __builtin_expect(f->next != 0, 1) ) goto *f->next;
    11         f->next = &&s1;
     9        static void * states[] = {&&s0, &&s1};
     10        goto *states[f->restart];
     11  s0: f->restart = 1;
    1212        for ( ;; ) {
    1313                return f;
  • doc/papers/concurrency/examples/FibRefactor.py

    rb7d6a36 r6a490b2  
    2222# Local Variables: #
    2323# tab-width: 4 #
    24 # compile-command: "python3.5 FibRefactor.py" #
     24# compile-command: "python3.7 FibRefactor.py" #
    2525# End: #
  • doc/papers/concurrency/examples/Format.c

    rb7d6a36 r6a490b2  
    22
    33typedef struct {
    4         void * next;
     4        int restart, g, b;
    55        char ch;
    6         int g, b;
    76} Fmt;
    87
    98void comain( Fmt * f ) {
    10         if ( __builtin_expect(f->next != 0, 1) ) goto *f->next;
    11         f->next = &&s1;
     9        static void * states[] = {&&s0, &&s1};
     10        goto *states[f->restart];
     11  s0: f->restart = 1;
    1212        for ( ;; ) {
    1313                for ( f->g = 0; f->g < 5; f->g += 1 ) {                 // groups
    1414                        for ( f->b = 0; f->b < 4; f->b += 1 ) {         // blocks
    15                                 return;
    16                           s1:;  while ( f->ch == '\n' ) return;         // ignore
     15                                do {
     16                                        return;  s1: ;
     17                                } while ( f->ch == '\n' );                              // ignore
    1718                                printf( "%c", f->ch );                                  // print character
    1819                        }
     
    2425
    2526int main() {
    26         Fmt fmt = { NULL };
     27        Fmt fmt = { 0 };
    2728        comain( &fmt );                                                                         // prime
    2829        for ( ;; ) {
  • doc/papers/concurrency/examples/Format.cc

    rb7d6a36 r6a490b2  
    66                        for ( g = 0; g < 5; g += 1 ) { // groups of 5 blocks
    77                                for ( b = 0; b < 4; b += 1 ) { // blocks of 4 characters
    8 //                                      for ( ;; ) { // for newline characters
     8                                        for ( ;; ) { // for newline characters
    99                                                suspend();
    10 //                                              if ( ch != '\n' ) break; // ignore newline
    11 //                                      }
     10                                                if ( ch != '\n' ) break; // ignore newline
     11                                        }
    1212//                                      cout << ch; // print character
    1313                                }
     
    3131// Local Variables: //
    3232// tab-width: 4 //
    33 // compile-command: "u++-work -O2 -nodebubg Format.cc" //
     33// compile-command: "u++-work -O2 -nodebug Format.cc" //
    3434// End: //
  • doc/papers/concurrency/examples/Format.cfa

    rb7d6a36 r6a490b2  
    1111                for ( g = 0; g < 5; g += 1 ) {          // groups of 5 blocks
    1212                        for ( b = 0; b < 4; b += 1 ) {  // blocks of 4 characters
    13 //                              do {
     13                                do {
    1414                                        suspend();
    15 //                              } while ( ch == '\n' || ch == '\t' );
     15                                } while ( ch == '\n' || ch == '\t' );
    1616                                sout | ch;                                      // print character
    1717                        }
  • doc/papers/concurrency/examples/Format.data

    rb7d6a36 r6a490b2  
    1 abcdefghijklmnopqrstuvwxyzxxxxxxxxxxxxxx
     1abcdefghijklmnop
     2qrstuvwxyzx
     3xxxxxxxxxxxxx
  • doc/papers/concurrency/examples/Format.py

    rb7d6a36 r6a490b2  
    44                        for g in range( 5 ):    # groups of 5 blocks
    55                                for b in range( 4 ): # blocks of 4 characters
    6                                         print( (yield), end='' ) # receive from send
     6                                        while True:
     7                                                ch = (yield) # receive from send
     8                                                if '\n' not in ch:
     9                                                        break
     10                                        print( ch, end='' ) # receive from send
    711                                print( '  ', end='' ) # block separator
    812                        print()                                 # group separator
     
    1115                        print()
    1216
     17input = "abcdefghijklmnop\nqrstuvwx\nyzxxxxxxxxxxxxxx\n"
     18
    1319fmt = Format()
    1420next( fmt )                                                     # prime generator
    15 for i in range( 41 ):
    16         fmt.send( 'a' );                                # send to yield
     21for i in input:
     22        fmt.send( i );                          # send to yield
    1723
    1824# Local Variables: #
    1925# tab-width: 4 #
    20 # compile-command: "python3.5 Format.py" #
     26# compile-command: "python3.7 Format.py" #
    2127# End: #
  • doc/papers/concurrency/examples/Format1.c

    rb7d6a36 r6a490b2  
    22
    33typedef struct {
    4         void * next;
     4        int restart, g, b;
    55        char ch;
    6         int g, b;
    76} Fmt;
    87
    98void format( Fmt * f ) {
    10         if ( __builtin_expect(f->next != 0, 1) ) goto *f->next;
    11         f->next = &&s1;
     9        static void * states[] = {&&s0, &&s1};
     10        goto *states[f->restart];
     11  s0: f->restart = 1;
    1212        for ( ;; ) {
    1313                for ( f->g = 0; f->g < 5; f->g += 1 ) {                 // groups
    1414                        for ( f->b = 0; f->b < 4; f->b += 1 ) {         // blocks
    1515                                return;
    16                           s1: ;
    17                                 if ( f->ch == '\0' ) goto fini;                 // EOF ?
     16                          s1: if ( f->ch == '\0' ) goto fini;           // EOF ?
    1817                                while ( f->ch == '\n' ) return;                 // ignore
    19                                 printf( "%c", f->ch );                                  // print character
     18//                              printf( "%c", f->ch );                                  // print character
    2019                        }
    21                         printf( " " );                                                          // block separator
     20//                      printf( " " );                                                          // block separator
    2221                }
    23                 printf( "\n" );                                                                 // group separator
     22//              printf( "\n" );                                                                 // group separator
    2423        }
    25   fini:
    26         if ( f->g != 0 || f->b != 0 ) printf( "\n" );
     24  fini:;
     25//      if ( f->g != 0 || f->b != 0 ) printf( "\n" );
    2726}
    2827
    2928int main() {
    30         Fmt fmt = { NULL };
     29        Fmt fmt = { 0 };
    3130        format( &fmt );                                                                         // prime
    32         for ( ;; ) {
    33                 scanf( "%c", &fmt.ch );                                                 // direct read into communication variable
    34           if ( feof( stdin ) ) break;
     31        fmt.ch = 'a';
     32        for ( long int i = 0; i < 1000000000; i += 1 ) {
     33//              scanf( "%c", &fmt.ch );                                                 // direct read into communication variable
     34//        if ( feof( stdin ) ) break;
    3535                format( &fmt );
    3636        }
    37         fmt.ch = '\0';
     37        fmt.ch = '\0';                                                                          // sentential (EOF)
    3838        format( &fmt );
    3939}
  • doc/papers/concurrency/examples/PingPong.c

    rb7d6a36 r6a490b2  
    22
    33typedef struct PingPong {
     4        int restart;                                                                            // style 1
     5        int N, i;
    46        const char * name;
    5         int N, i;
    67        struct PingPong * partner;
    7         void * next;
     8        void * next;                                                                            // style 2
    89} PingPong;
    9 #define PPCtor( name, N ) { name, N, 0, NULL, NULL }
     10#define PPCtor( name, N ) { 0, N, 0, name, NULL, NULL }
     11
    1012void comain( PingPong * pp ) __attribute__(( noinline ));
    1113void comain( PingPong * pp ) {
     14#if 0
    1215        if ( __builtin_expect(pp->next != 0, 1) ) goto *pp->next;
    13 #if 0
    14         pp->next = &&here;
    15                 asm( "mov  %0,%%rdi" : "=m" (pp) );
    16                 asm( "mov  %rdi,%rax" );
    17 #ifndef OPT
    18 #ifdef PRINT
    19                 asm( "add  $16, %rsp" );
    20 #endif // PRINT
    21                 asm( "popq %rbp" );
    22 #endif // ! OPT
    23 
    24 #ifdef OPT
    25 #ifdef PRINT
    26                 asm( "popq %rbx" );
    27 #endif // PRINT
    28 #endif // OPT
    29                 asm( "jmp  comain" );
    30   here: ;
    31 #endif // 0
    32 
    3316        pp->next = &&cycle;
    3417        for ( ; pp->i < pp->N; pp->i += 1 ) {
     
    5336          cycle: ;
    5437        } // for
     38#endif // 0
     39
     40#if 1
     41        static void * states[] = {&&s0, &&s1};
     42        goto *states[pp->restart];
     43  s0: pp->restart = 1;
     44        for ( ; pp->i < pp->N; pp->i += 1 ) {
     45#ifdef PRINT
     46                printf( "%s %d\n", pp->name, pp->i );
     47#endif // PRINT
     48                asm( "mov  %0,%%rdi" : "=m" (pp->partner) );
     49                asm( "mov  %rdi,%rax" );
     50#ifndef OPT
     51#ifdef PRINT
     52                asm( "add  $16, %rsp" );
     53#endif // PRINT
     54                asm( "popq %rbp" );
     55#endif // ! OPT
     56
     57#ifdef OPT
     58#ifdef PRINT
     59                asm( "popq %rbx" );
     60#endif // PRINT
     61#endif // OPT
     62                asm( "jmp  comain" );
     63          s1: ;
     64        } // for
     65#endif // 0
    5566}
    5667
     
    7081// Local Variables: //
    7182// tab-width: 4 //
    72 // compile-command: "gcc-8 -g -DPRINT PingPong.c" //
     83// compile-command: "gcc-9 -g -DPRINT PingPong.c" //
    7384// End: //
  • doc/papers/concurrency/examples/Pingpong.py

    rb7d6a36 r6a490b2  
    11def PingPong( name, N ):
    2         partner = (yield)           # get partner
    3         yield                       # resume scheduler
     2        partner = yield                         # get partner
     3        yield                                           # resume scheduler
    44        for i in range( N ):
    55                print( name )
    6                 yield partner           # execute next
     6                yield partner                   # execute next
    77        print( "end", name )
    88
    99def Scheduler():
    10         n = (yield)                 # starting coroutine
    11         while True:
    12                 n = next( n )           # schedule coroutine
     10        n = yield                                       # starting coroutine
     11        try:
     12                while True:
     13                        n = next( n )           # schedule coroutine
     14        except StopIteration:
     15                pass
    1316
    1417pi = PingPong( "ping", 5 )
    1518po = PingPong( "pong", 5 )
    16 next( pi )                      # prime
    17 pi.send( po )                   # send partner
    18 next( po )                      # prime
    19 po.send( pi )                   # send partner
     19next( pi )                                              # prime
     20pi.send( po )                                   # send partner
     21next( po )                                              # prime
     22po.send( pi )                                   # send partner
    2023
    2124s = Scheduler();
    22 next( s )                       # prime
     25next( s )                                               # prime
    2326try:
    2427        s.send( pi )                            # start cycle
    25 except StopIteration:
    26         print( "scheduler stop" )
     28except StopIteration:                   # scheduler stopped
     29        pass
    2730print( "stop" )
    2831
    2932# Local Variables: #
    3033# tab-width: 4 #
    31 # compile-command: "python3.5 Pingpong.py" #
     34# compile-command: "python3.7 Pingpong.py" #
    3235# End: #
  • doc/papers/concurrency/examples/ProdCons.py

    rb7d6a36 r6a490b2  
    11def Prod( N ):
    2         cons = (yield)              # get cons
    3         yield                       # resume scheduler
     2        cons = yield                            # get cons
     3        yield                                           # resume scheduler
    44        for i in range( N ):
    55                print( "prod" )
    6                 yield cons              # execute next
     6                yield cons                              # execute next
    77        print( "end", "prod" )
    88
    99def Cons( N ):
    10         prod = (yield)              # get prod
    11         yield                       # resume scheduler
     10        prod = yield                            # get prod
     11        yield                                           # resume scheduler
    1212        for i in range( N ):
    1313                print( "cons" )
    14                 yield prod              # execute next
     14                yield prod                              # execute next
    1515        print( "end", "cons" )
    1616
    1717def Scheduler():
    18         n = (yield)                 # starting coroutine
    19         while True:
    20                 n = next( n )           # schedule coroutine
     18        n = yield                                       # starting coroutine
     19        try:
     20                while True:
     21                        n = next( n )           # schedule coroutine
     22        except StopIteration:
     23                pass
    2124
    2225prod = Prod( 5 )
    2326cons = Cons( 5 )
    24 next( prod )                    # prime
    25 prod.send( cons )               # send cons
    26 next( cons )                    # prime
    27 cons.send( prod )               # send prod
     27next( prod )                                    # prime
     28prod.send( cons )                               # send cons
     29next( cons )                                    # prime
     30cons.send( prod )                               # send prod
    2831
    2932s = Scheduler();
    30 next( s )                       # prime
     33next( s )                                               # prime
    3134try:
    3235        s.send( prod )                          # start cycle
    33 except StopIteration:
    34         print( "scheduler stop" )
     36except StopIteration:                   # scheduler stopped
     37        pass
    3538print( "stop" )
    3639
    3740# Local Variables: #
    3841# tab-width: 4 #
    39 # compile-command: "python3.5 ProdCons.py" #
     42# compile-command: "python3.7 ProdCons.py" #
    4043# End: #
  • doc/papers/concurrency/examples/RWMonitorEXT.cfa

    rb7d6a36 r6a490b2  
    77        int rcnt, wcnt;                                                                         // number of readers/writer using resource
    88};
     9
     10void ?{}( ReadersWriter & rw ) with(rw) { rcnt = wcnt = 0; }
    911void EndRead( ReadersWriter & mutex rw ) with(rw) { rcnt -= 1; }
    1012void EndWrite( ReadersWriter & mutex rw ) with(rw) { wcnt = 0; }
    1113void StartRead( ReadersWriter & mutex rw ) with(rw) {
    12         if ( wcnt > 0 ) waitfor( EndWrite, rw );
     14        if ( wcnt > 0 ) waitfor( EndWrite : rw );
    1315        rcnt += 1;
    1416}
    1517void StartWrite( ReadersWriter & mutex rw ) with(rw) {
    16         if ( wcnt > 0 ) waitfor( EndWrite, rw );
    17         else while ( rcnt > 0 ) waitfor( EndRead, rw );
     18        if ( wcnt > 0 ) waitfor( EndWrite : rw );
     19        else while ( rcnt > 0 ) waitfor( EndRead : rw );
    1820        wcnt = 1;
    1921}
    20 void ?{}( ReadersWriter & rw ) with(rw) { rcnt = wcnt = 0; }
    2122int readers( ReadersWriter & rw ) { return rw.rcnt; }
     23
    2224void Read( ReadersWriter & rw ) {
    2325        StartRead( rw );
     
    3436        EndWrite( rw );
    3537}
     38
    3639thread Worker {
    3740        ReadersWriter &rw;
     
    4750        } // for
    4851}
     52
    4953int main() {
    5054        enum { MaxTask = 5 };
     
    5761} // main
    5862
    59 
    6063// Local Variables: //
    6164// tab-width: 4 //
    62 // compile-command: "cfa -O2 RWMonitor.cfa" //
     65// compile-command: "cfa -O2 RWMonitorEXT.cfa" //
    6366// End: //
  • doc/papers/concurrency/examples/Refactor.py

    rb7d6a36 r6a490b2  
    2626# Local Variables: #
    2727# tab-width: 4 #
    28 # compile-command: "python3.5 Refactor.py" #
     28# compile-command: "python3.7 Refactor.py" #
    2929# End: #
  • doc/papers/concurrency/figures/FullCoroutinePhases.fig

    rb7d6a36 r6a490b2  
    88-2
    991200 2
    10 5 1 0 1 0 7 100 0 -1 0.000 0 0 1 0 4575.000 2437.500 4275 1875 4575 1800 4875 1875
     105 1 0 1 0 7 100 0 -1 0.000 0 0 1 0 5175.000 2437.500 4875 1875 5175 1800 5475 1875
    1111        1 1 1.00 45.00 90.00
    12 5 1 0 1 0 7 100 0 -1 0.000 0 0 1 0 4575.000 1537.500 4875 2100 4575 2175 4275 2100
     125 1 0 1 0 7 100 0 -1 0.000 0 0 1 0 5175.000 1537.500 5475 2100 5175 2175 4875 2100
    1313        1 1 1.00 45.00 90.00
    14 5 1 0 1 0 7 50 -1 -1 0.000 0 1 1 0 4207.500 1642.500 4125 1425 3975 1650 4200 1875
     145 1 0 1 0 7 50 -1 -1 0.000 0 1 1 0 4807.500 1642.500 4725 1425 4575 1650 4800 1875
    1515        1 1 1.00 45.00 90.00
     166 1575 1575 2700 2025
    16172 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
    1718        1 1 1.00 45.00 90.00
     
    2021        1 1 1.00 45.00 90.00
    2122         2175 1575 2400 1800
     234 1 0 100 0 4 10 0.0000 2 165 300 1725 1950 ping\001
     244 1 0 100 0 4 10 0.0000 2 135 360 2475 1950 pong\001
     25-6
     266 3075 1575 4200 2025
     276 3075 1575 4200 2025
    22282 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
    2329        1 1 1.00 45.00 90.00
    24          3300 1575 3300 1800
     30         3525 1575 3300 1800
    25312 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
    2632        1 1 1.00 45.00 90.00
    27          3300 2025 3300 2250
    28 4 1 0 100 0 0 10 0.0000 2 105 555 2100 1200 creation\001
    29 4 1 0 100 0 4 10 0.0000 2 165 300 1725 1950 ping\001
    30 4 1 0 100 0 4 10 0.0000 2 135 360 2475 1950 pong\001
    31 4 1 0 100 0 4 10 0.0000 2 165 300 3300 1950 ping\001
    32 4 1 0 100 0 4 10 0.0000 2 135 360 3300 2400 pong\001
    33 4 1 0 100 0 0 10 0.0000 2 105 675 4575 1200 execution\001
    34 4 1 0 100 0 4 10 0.0000 2 165 300 4275 2025 ping\001
    35 4 1 0 100 0 4 10 0.0000 2 135 360 4875 2025 pong\001
    36 4 1 0 100 0 0 10 0.0000 2 90 420 3300 1200 starter\001
     33         3675 1575 3900 1800
     344 1 0 100 0 4 10 0.0000 2 165 300 3225 1950 ping\001
     354 1 0 100 0 4 10 0.0000 2 135 360 3975 1950 pong\001
     36-6
     37-6
    37384 1 0 100 0 4 10 0.0000 2 165 705 2100 1500 pgm main\001
    38 4 1 0 100 0 4 10 0.0000 2 165 705 3300 1500 pgm main\001
    39 4 1 0 100 0 4 10 0.0000 2 165 705 4500 1500 pgm main\001
     394 1 0 100 0 4 10 0.0000 2 165 705 3600 1500 pgm main\001
     404 1 0 100 0 4 10 0.0000 2 165 300 4875 2025 ping\001
     414 1 0 100 0 4 10 0.0000 2 135 360 5475 2025 pong\001
     424 1 0 100 0 4 10 0.0000 2 165 705 5100 1500 pgm main\001
     434 1 0 100 0 2 10 0.0000 2 105 540 2100 1275 creator\001
     444 1 0 100 0 2 10 0.0000 2 105 495 3600 1275 starter\001
     454 1 0 100 0 2 10 0.0000 2 105 690 5175 1275 execution\001
  • doc/papers/concurrency/figures/RunTimeStructure.fig

    rb7d6a36 r6a490b2  
    36361 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 4500 3600 15 15 4500 3600 4515 3615
    3737-6
    38 6 2175 4650 7050 4950
    39 1 3 0 1 0 0 0 0 0 0.000 1 0.0000 2250 4830 30 30 2250 4830 2280 4860
    40 1 1 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4200 4800 150 75 4200 4800 4350 4875
    41 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3275 4800 100 100 3275 4800 3375 4800
     386 3225 4125 4650 4425
     396 4350 4200 4650 4350
     401 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 4425 4275 15 15 4425 4275 4440 4290
     411 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 4500 4275 15 15 4500 4275 4515 4290
     421 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 4575 4275 15 15 4575 4275 4590 4290
     43-6
     441 1 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3450 4275 225 150 3450 4275 3675 4425
     451 1 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4050 4275 225 150 4050 4275 4275 4425
     46-6
     476 6675 4125 7500 4425
     486 7200 4200 7500 4350
     491 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 7275 4275 15 15 7275 4275 7290 4290
     501 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 7350 4275 15 15 7350 4275 7365 4290
     511 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 7425 4275 15 15 7425 4275 7440 4290
     52-6
     531 1 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6900 4275 225 150 6900 4275 7125 4425
     54-6
     556 6675 3525 8025 3975
     562 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 1 0 2
     57        1 1 1.00 45.00 90.00
     58         6675 3750 6975 3750
     592 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 1 0 2
     60        1 1 1.00 45.00 90.00
     61         7125 3750 7350 3750
    42622 2 0 1 -1 -1 0 0 -1 0.000 0 0 0 0 0 5
    43          5400 4950 5400 4725 5175 4725 5175 4950 5400 4950
    44 2 2 1 1 -1 -1 0 0 -1 3.000 0 0 0 0 0 5
    45          6525 4950 6300 4950 6300 4725 6525 4725 6525 4950
    46 4 0 -1 0 0 0 10 0.0000 2 105 450 6600 4875 cluster\001
    47 4 0 -1 0 0 0 10 0.0000 2 105 660 5475 4875 processor\001
    48 4 0 -1 0 0 0 10 0.0000 2 105 555 4425 4875 monitor\001
    49 4 0 -1 0 0 0 10 0.0000 2 120 270 3450 4875 task\001
    50 4 0 -1 0 0 0 10 0.0000 2 105 660 2325 4875 coroutine\001
    51 -6
    52 6 3450 1275 3750 1425
    53 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3525 1350 15 15 3525 1350 3540 1365
    54 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3600 1350 15 15 3600 1350 3615 1365
    55 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3675 1350 15 15 3675 1350 3690 1365
    56 -6
    57 6 5550 1275 5850 1425
    58 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 5625 1350 15 15 5625 1350 5640 1365
    59 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 5700 1350 15 15 5700 1350 5715 1365
    60 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 5775 1350 15 15 5775 1350 5790 1365
     63         7800 3975 7800 3525 7350 3525 7350 3975 7800 3975
     642 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 1 0 2
     65        1 1 1.00 45.00 90.00
     66         7800 3750 8025 3750
    6167-6
    62681 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 5550 2625 150 150 5550 2625 5700 2625
     
    67731 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4425 2850 150 150 4425 2850 4575 2850
    68741 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4650 2475 150 150 4650 2475 4800 2475
    69 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3525 3600 150 150 3525 3600 3675 3600
    70751 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3975 3600 150 150 3975 3600 4125 3600
    71761 3 0 1 0 0 0 0 0 0.000 1 0.0000 3525 3600 30 30 3525 3600 3555 3630
     
    74791 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3975 2850 150 150 3975 2850 4125 2850
    75801 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 7200 2775 150 150 7200 2775 7350 2775
    76 1 1 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4650 1350 225 150 4650 1350 4875 1500
    77 1 1 0 1 -1 -1 0 0 -1 0.000 1 0.0000 5250 1350 225 150 5250 1350 5475 1500
    78 1 1 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4050 1350 225 150 4050 1350 4275 1500
     811 3 0 1 0 0 0 0 0 0.000 1 0.0000 2250 4830 30 30 2250 4830 2280 4860
     821 3 0 1 0 0 0 0 0 0.000 1 0.0000 7200 2775 30 30 7200 2775 7230 2805
     831 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3525 3600 150 150 3525 3600 3675 3600
     841 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3875 4800 100 100 3875 4800 3975 4800
     851 1 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4650 4800 150 75 4650 4800 4800 4875
    79862 2 0 1 -1 -1 0 0 -1 0.000 0 0 0 0 0 5
    8087         2400 4200 2400 3750 1950 3750 1950 4200 2400 4200
     
    1401472 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 1 0 2
    141148        1 1 1.00 45.00 90.00
    142          6675 3975 6975 3975
    143 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 1 0 2
    144         1 1 1.00 45.00 90.00
    145149         7050 2775 6825 2775
    1461502 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
    147          6825 2775 6825 3975
    148 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 1 0 2
    149         1 1 1.00 45.00 90.00
    150          7125 3975 7350 3975
    151 2 2 0 1 -1 -1 0 0 -1 0.000 0 0 0 0 0 5
    152          7800 4200 7800 3750 7350 3750 7350 4200 7800 4200
    153 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 1 0 2
    154         1 1 1.00 45.00 90.00
    155          7800 3975 8025 3975
     151         6825 2775 6825 3750
    1561522 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 1 0 4
    157153        1 1 1.00 45.00 90.00
    158          7875 3975 7875 2325 7200 2325 7200 2550
     154         7875 3750 7875 2325 7200 2325 7200 2550
     1552 2 0 1 -1 -1 0 0 -1 0.000 0 0 0 0 0 5
     156         5850 4950 5850 4725 5625 4725 5625 4950 5850 4950
     1572 2 1 1 -1 -1 0 0 -1 3.000 0 0 0 0 0 5
     158         6975 4950 6750 4950 6750 4725 6975 4725 6975 4950
    1591594 1 -1 0 0 0 10 0.0000 2 105 720 5550 4425 Processors\001
    1601604 1 -1 0 0 0 10 0.0000 2 120 1005 4200 3225 Blocked Tasks\001
     
    1651654 1 -1 0 0 0 10 0.0000 2 105 990 2175 3525 Discrete-event\001
    1661664 1 -1 0 0 0 10 0.0000 2 135 795 2175 4350 preemption\001
     1674 0 -1 0 0 0 10 0.0000 2 150 1290 2325 4875 genrator/coroutine\001
     1684 0 -1 0 0 0 10 0.0000 2 120 270 4050 4875 task\001
     1694 0 -1 0 0 0 10 0.0000 2 105 450 7050 4875 cluster\001
     1704 0 -1 0 0 0 10 0.0000 2 105 660 5925 4875 processor\001
     1714 0 -1 0 0 0 10 0.0000 2 105 555 4875 4875 monitor\001
  • doc/papers/concurrency/mail2

    rb7d6a36 r6a490b2  
    2222Software: Practice and Experience Editorial Office
    2323
     24
     25
     26Date: Tue, 12 Nov 2019 22:25:17 +0000
     27From: Richard Jones <onbehalfof@manuscriptcentral.com>
     28Reply-To: R.E.Jones@kent.ac.uk
     29To: tdelisle@uwaterloo.ca, pabuhr@uwaterloo.ca
     30Subject: Software: Practice and Experience - Decision on Manuscript ID
     31 SPE-19-0219
     32
     3312-Nov-2019
     34
     35Dear Dr Buhr,
     36
     37Many thanks for submitting SPE-19-0219 entitled "Advanced Control-flow and Concurrency in Cforall" to Software: Practice and Experience. The paper has now been reviewed and the comments of the referees are included at the bottom of this letter.
     38
     39The decision on this paper is that it requires substantial further work is required. The referees have a number of substantial concerns. All the reviewers found the submission very hard to read; two of the reviewers state that it needs very substantial restructuring. These concerns must be addressed before your submission can be considered further.
     40
     41A revised version of your manuscript that takes into account the comments of the referees will be reconsidered for publication.
     42
     43Please note that submitting a revision of your manuscript does not guarantee eventual acceptance, and that your revision will be subject to re-review by the referees before a decision is rendered.
     44
     45You have 90 days from the date of this email to submit your revision. If you are unable to complete the revision within this time, please contact me to request an extension.
     46
     47You can upload your revised manuscript and submit it through your Author Center. Log into https://mc.manuscriptcentral.com/spe  and enter your Author Center, where you will find your manuscript title listed under "Manuscripts with Decisions".
     48
     49When submitting your revised manuscript, you will be able to respond to the comments made by the referee(s) in the space provided.  You can use this space to document any changes you make to the original manuscript.
     50
     51If you feel that your paper could benefit from English language polishing, you may wish to consider having your paper professionally edited for English language by a service such as Wiley's at http://wileyeditingservices.com. Please note that while this service will greatly improve the readability of your paper, it does not guarantee acceptance of your paper by the journal.
     52 
     53Once again, thank you for submitting your manuscript to Software: Practice and Experience and I look forward to receiving your revision.
     54
     55
     56Sincerely,
     57
     58Prof. Richard Jones
     59Software: Practice and Experience
     60R.E.Jones@kent.ac.uk
     61
     62
     63Referee(s)' Comments to Author:
     64
     65Reviewing: 1
     66
     67Comments to the Author
     68This article presents the design and rationale behind the various
     69threading and synchronization mechanisms of C-forall, a new low-level
     70programming language.  This paper is very similar to a companion paper
     71which I have also received: as the papers are similar, so will these
     72reviews be --- in particular any general comments from the other
     73review apply to this paper also.
     74
     75As far as I can tell, the article contains three main ideas: an
     76asynchronous execution / threading model; a model for monitors to
     77provide mutual exclusion; and an implementation.  The first two ideas
     78are drawn together in Table 1: unfortunately this is on page 25 of 30
     79pages of text. Implementation choices and descriptions are scattered
     80throughout the paper - and the sectioning of the paper seems almost
     81arbitrary.
     82
     83The article is about its contributions.  Simply adding feature X to
     84language Y isn't by itself a contribution, (when feature X isn't
     85already a contribution).  The contribution can be in the design: the
     86motivation, the space of potential design options, the particular
     87design chosen and the rationale for that choice, or the resulting
     88performance.  For example: why support two kinds of generators as well
     89as user-level threads?  Why support both low and high level
     90synchronization constructs?  Similarly I would have found the article
     91easier to follow if it was written top down, presenting the design
     92principles, present the space of language features, justify chosen
     93language features (and rationale) and those excluded, and then present
     94implementation, and performance.
     95
     96Then the writing of the article is often hard to follow, to say the
     97least. Two examples: section 3 "stateful functions" - I've some idea
     98what that is (a function with Algol's "own" or C's "static" variables?
     99but in fact the paper has a rather more specific idea than that. The
     100top of page 3 throws a whole lot of defintions at the reader
     101"generator" "coroutine" "stackful" "stackless" "symmetric"
     102"asymmetric" without every stopping to define each one --- but then in
     103footnote "C" takes the time to explain what C's "main" function is?  I
     104cannot imagine a reader of this paper who doesn't know what "main" is
     105in C; especially if they understand the other concepts already
     106presented in the paper.  The start of section 3 then does the same
     107thing: putting up a whole lot of definitions, making distinctions and
     108comparisons, even talking about some runtime details, but the critical
     109definition of a monitor doesn't appear until three pages later, at the
     110start of section 5 on p15, lines 29-34 are a good, clear, description
     111of what a monitor actually is.  That needs to come first, rather than
     112being buried again after two sections of comparisons, discussions,
     113implementations, and options that are ungrounded because they haven't
     114told the reader what they are actually talking about.  First tell the
     115reader what something is, then how they might use it (as programmers:
     116what are the rules and restrictions) and only then start comparison
     117with other things, other approaches, other languages, or
     118implementations.
     119
     120The description of the implementation is similarly lost in the trees
     121without ever really seeing the wood. Figure 19 is crucial here, but
     122it's pretty much at the end of the paper, and comments about
     123implementations are threaded throughout the paper without the context
     124(fig 19) to understand what's going on.   The protocol for performance
     125testing may just about suffice for C (although is N constantly ten
     126million, or does it vary for each benchmark) but such evaluation isn't
     127appropriate for garbage-collected or JITTed languages like Java or Go.
     128
     129other comments working through the paper - these are mostly low level
     130and are certainly not comprehensive.
     131
     132p1 only a subset of C-forall extensions?
     133
     134p1 "has features often associated with object-oriented programming
     135languages, such as constructors, destructors, virtuals and simple
     136inheritance."   There's no need to quibble about this. Once a language
     137has inheritance, it's hard to claim it's not object-oriented.
     138
     139
     140p2 barging? signals-as-hints?
     141
     142p3 start your discussion of generations with a simple example of a
     143C-forall generator.  Fig 1(b) might do: but put it inline instead of
     144the python example - and explain the key rules and restrictions on the
     145construct.  Then don't even start to compare with coroutines until
     146you've presented, described and explained your coroutines...
     147p3 I'd probably leave out the various "C" versions unless there are
     148key points to make you can't make in C-forall. All the alternatives
     149are just confusing.
     150
     151
     152p4 but what's that "with" in Fig 1(B)
     153
     154p5 start with the high level features of C-forall generators...
     155
     156p5 why is the paper explaining networking protocols?
     157
     158p7 lines 1-9 (transforming generator to coroutine - why would I do any
     159of this? Why would I want one instead of the other (do not use "stack"
     160in your answer!)
     161
     162p10 last para "A coroutine must retain its last resumer to suspend
     163back because the resumer is on a different stack. These reverse
     164pointers allow suspend to cycle backwards, "  I've no idea what is
     165going on here?  why should I care?  Shouldn't I just be using threads
     166instead?  why not?
     167
     168p16 for the same reasons - what reasons?
     169
     170p17 if the multiple-monitor entry procedure really is novel, write a
     171paper about that, and only about that.
     172
     173p23 "Loose Object Definitions" - no idea what that means.  in that
     174section: you can't leave out JS-style dynamic properties.  Even in
     175OOLs that (one way or another) allow separate definitions of methods
     176(like Objective-C, Swift, Ruby, C#) at any time a runtime class has a
     177fixed definition.  Quite why the detail about bit mask implementation
     178is here anyway, I've no idea.
     179
     180p25 this cluster isn't a CLU cluster then?
     181
     182* conclusion should conclude the paper, not the related.
     183
     184
     185Reviewing: 2
     186
     187Comments to the Author
     188This paper describes the concurrency features of an extension of C (whose name I will write as "C\/" here, for convenience), including much design-level discussion of the coroutine- and monitor-based features and some microbenchmarks exploring the current implementation's performance. The key message of the latter is that the system's concurrency abstractions are much lighter-weight than the threading found in mainstream C or Java implementations.
     189
     190There is much description of the system and its details, but nothing about (non-artificial) uses of it. Although the microbenchmark data is encouraging, arguably not enough practical experience with the system has been reported here to say much about either its usability advantages or its performance.
     191
     192As such, the main contribution of the paper seem to be to document the existence of the described system and to provide a detailed design rationale and (partial) tutorial. I believe that could be of interest to some readers, so an acceptable manuscript is lurking in here somewhere.
     193
     194Unfortunately, at present the writing style is somewhere between unclear and infuriating. It omits to define terms; it uses needlessly many terms for what are apparently (but not clearly) the same things; it interrupts itself rather than deliver the natural consequent of whatever it has just said; and so on. Section 5 is particularly bad in these regards -- see my detailed comments below. Fairly major additional efforts will be needed to turn the present text into a digestible design-and-tutorial document. I suspect that a shorter paper could do this job better than the present manuscript, which is overwrought in parts.
     195
     196p2: lines 4--9 are a little sloppy. It is not the languages but their popular implementations which "adopt" the 1:1 kernel threading model.
     197
     198line 10: "medium work" -- "medium-sized work"?
     199
     200line 18: "is all sequential to the compiler" -- not true in modern compilers, and in 2004 H-J Boehm wrote a tech report describing exactly why ("Threads cannot be implemented as a library", HP Labs).
     201
     202line 20: "knows the optimization boundaries" -- I found this vague. What's an example?
     203
     204line 31: this paragraph has made a lot of claims. Perhaps forward-reference to the parts of the paper that discuss each one.
     205
     206line 33: "so the reader can judge if" -- this reads rather passive-aggressively. Perhaps better: "... to support our argument that..."
     207
     208line 41: "a dynamic partitioning mechanism" -- I couldn't tell what this meant
     209
     210p3. Presenting concept of a "stateful function" as a new language feature seems odd. In C, functions often have local state thanks to static local variables (or globals, indeed). Of course, that has several limitations. Can you perhaps present your contributions by enumerating these limitations? See also my suggestion below about a possible framing centred on a strawman.
     211
     212line 2: "an old idea that is new again" -- this is too oblique
     213
     214lines 2--15: I found this to be a word/concept soup. Stacks, closures, generators, stackless stackful, coroutine, symmetric, asymmetric, resume/suspend versus resume/resume... there needs to be a more gradual and structured way to introduce all this, and ideally one that minimises redundancy. Maybe present it as a series of "definitions" each with its own heading, e.g. "A closure is stackless if its local state has statically known fixed size"; "A generator simply means a stackless closure." And so on. Perhaps also strongly introduce the word "activate" as a direct contrast with resume and suspend. These are just a flavour of the sort of changes that might make this paragraph into something readable.
     215
     216Continuing the thought: I found it confusing that by these definitinos, a stackful closure is not a stack, even though logically the stack *is* a kind of closure (it is a representation of the current thread's continuation).
     217
     218lines 24--27: without explaining what the boost functor types mean, I don't think the point here comes across.
     219
     220line 34: "semantically coupled" -- I wasn't surew hat this meant
     221
     222p4: the point of Figure 1 (C) was not immediately clear. It seem to be showing how one might "compile down" Figure 1 (B). Or is that Figure 1 (A)?
     223
     224It's right that the incidental language features of the system are not front-and-centre, but I'd appreciate some brief glossing of non-C languages features as they appear. Examples are the square bracket notation, the pipe notation and the constructor syntax. These explanations could go in the caption of the figure which first uses them, perhaps. Overall I found the figure captions to be terse, and a missed opportunity to explain clearly what was going on.
     225
     226p5 line 23: "This restriction is removed..." -- give us some up-front summary of your contributions and the elements of the language design that will be talked about, so that this isn't an aside. This will reduce the "twisty passages" feeling that characterises much of the paper.
     227
     228line 40: "a killer asymmetric generator" -- this is stylistically odd, and the sentence about failures doesn't convincigly argue that C\/ will help with them. Have you any experience writing device drivers using C\/? Or any argument that the kinds of failures can be traced to the "stack-ripping" style that one is forced to use without coroutines? Also, a typo on line 41: "device drives". And saying "Windows/Linux" is sloppy... what does the cited paper actually say?
     229
     230p6 lines 13--23: this paragraph is difficult to understand. It seems to be talking about a control-flow pattern roughly equivalent to tail recursion. What is the high-level point, other than that this is possible?
     231
     232line 34: "which they call coroutines" -- a better way to make this point is presumably that the C++20 proposal only provides a specialised kind of coroutine, namely generators, despite its use of the more general word.
     233
     234line 47: "... due to dynamic stack allocation, execution..." -- this sentence doesn't scan. I suggest adding "and for" in the relevant places where currently there are only commas.
     235
     236p8 / Figure 5 (B) -- the GNU C extension of unary "&&" needs to be explained. The whole figure needs a better explanation, in fact.
     237
     238p9, lines 1--10: I wasn't sure this stepping-through really added much value. What are the truly important points to note about this code?
     239
     240p10: similarly, lines 3--27 again are somewhere between tedious and confusing. I'm sure the motivation and details of "starter semantics" can both be stated much more pithily.
     241
     242line 32: "a self-resume does not overwrite the last resumer" -- is this a hack or a defensible principled decision?
     243
     244p11: "a common source of errors" -- among beginners or among production code? Presumably the former.
     245
     246line 23: "with builtin and library" -- not sure what this means
     247
     248lines 31--36: these can be much briefer. The only important point here seems to be that coroutines cannot be copied.
     249
     250p12: line 1: what is a "task"? Does it matter?
     251
     252line 7: calling it "heap stack" seems to be a recipe for confusion. "Stack-and-heap" might be better, and contrast with "stack-and-VLS" perhaps. When "VLS" is glossed, suggest actually expanding its initials: say "length" not "size".
     253
     254line 21: are you saying "cooperative threading" is the same as "non-preemptive scheduling", or that one is a special case (kind) of the other? Both are defensible, but be clear.
     255
     256line 27: "mutual exclusion and synchronization" -- the former is a kind of the latter, so I suggest "and other forms of synchronization".
     257
     258line 30: "can either be a stackless or stackful" -- stray "a", but also, this seems to be switching from generic/background terminology to C\/-specific terminology.
     259
     260An expositional idea occurs: start the paper with a strawman naive/limited realisation of coroutines -- say, Simon Tatham's popular "Coroutines in C" web page -- and identify point by point what the limitations are and how C\/ overcomes them. Currently the presentation is often flat (lacking motivating contrasts) and backwards (stating solutions before problems). The foregoing approach might fix both of these.
     261
     262page 13: line 23: it seems a distraction to mention the Python feature here.
     263
     264p14 line 5: it seems odd to describe these as "stateless" just because they lack shared mutable state. It means the code itself is even more stateful. Maybe the "stack ripping" argument could usefully be given here.
     265
     266line 16: "too restrictive" -- would be good to have a reference to justify this, or at least give a sense of what the state-of-the-art performance in transactional memory systems is (both software and hardware)
     267
     268line 22: "simulate monitors" -- what about just *implementing* monitors? isn't that what these systems do? or is the point more about refining them somehow into something more specialised?
     269
     270p15: sections 4.1 and 4.2 seem adrift and misplaced. Split them into basic parts (which go earlier) and more advanced parts (e.g. barging, which can be explained later).
     271
     272line 31: "acquire/release" -- misses an opportunity to contrast the monitor's "enter/exit" abstraction with the less structured acquire/release of locks.
     273
     274p16 line 12: the "implicit" versus "explicit" point is unclear. Is it perhaps about the contract between an opt-in *discipline* and a language-enforced *guarantee*?
     275
     276line 28: no need to spend ages dithering about which one is default and which one is the explicit qualifier. Tell us what you decided, briefly justify it, and move on.
     277
     278p17: Figure 11: since the main point seems to be to highlight bulk acquire, include a comment which identifies the line where this is happening.
     279
     280line 2: "impossible to statically..." -- or dynamically. Doing it dynamically would be perfectly acceptable (locking is a dynamic operation after all)
     281
     282"guarantees acquisition order is consistent" -- assuming it's done in a single bulk acquire.
     283
     284p18: section 5.3: the text here is a mess. The explanations of "internal" versus "external" scheduling are unclear, and "signals as hints" is not explained. "... can cause thread starvation" -- means including a while loop, or not doing so? "There are three signalling mechanisms.." but the text does not follow that by telling us what they are. My own scribbled attempt at unpicking the internal/external thing: "threads already in the monitor, albeit waiting, have priority over those trying to enter".
     285
     286p19: line 3: "empty condition" -- explain that condition variables don't store anything. So being "empty" means that the queue of waiting threads (threads waiting to be signalled that the condition has become true) is empty.
     287
     288line 6: "... can be transformed into external scheduling..." -- OK, but give some motivation.
     289
     290p20: line 6: "mechnaism"
     291
     292lines 16--20: this is dense and can probably only be made clear with an example
     293
     294p21 line 21: clarify that nested monitor deadlock was describe earlier (in 5.2). (Is the repetition necessary?)
     295
     296line 27: "locks, and by extension monitors" -- this is true but the "by extension" argument is faulty. It is perfectly possible to use locks as a primitive and build a compositional mechanism out of them, e.g. transactions.
     297
     298p22 line 2: should say "restructured"
     299
     300line 33: "Implementing a fast subset check..." -- make clear that the following section explains how to do this. Restructuring the sections themselves could do this, or noting in the text.
     301
     302p23: line 3: "dynamic member adding, eg, JavaScript" -- needs to say "as permitted in JavaScript", and "dynamically adding members" is stylistically better
     303
     304p23: line 18: "urgent stack" -- back-reference to where this was explained before
     305
     306p24 line 7: I did not understand what was more "direct" about "direct communication". Also, what is a "passive monitor" -- just a monitor, given that monitors are passive by design?
     307
     308line 14 / section 5.9: this table was useful and it (or something like it) could be used much earlier on to set the structure of the rest of the paper. The explanation at present is too brief, e.g. I did not really understand the point about cases 7 and 8.
     309
     310p25 line 2: instead of casually dropping in a terse explanation for the newly intrdouced term "virtual processor", introduce it properly. Presumably the point is to give a less ambiguous meaning to "thread" by reserving it only for C\/'s green threads.
     311
     312Table 1: what does "No / Yes" mean?
     313
     314p26 line 15: "transforms user threads into fibres" -- a reference is needed to explain what "fibres" means... guessing it's in the sense of Adya et al.
     315
     316line 20: "Microsoft runtime" -- means Windows?
     317
     318lines 21--26: don't say "interrupt" to mean "signal", especially not without clear introduction. You can use "POSIX signal" to disambiguate from condition variables' "signal".
     319
     320p27 line 3: "frequency is usually long" -- that's a "time period" or "interval", not a frequency
     321
     322line 5: the lengthy quotation is not really necessary; just paraphrase the first sentence and move on.
     323
     324line 20: "to verify the implementation" -- I don't think that means what is intended
     325
     326Tables in section 7 -- too many significant figures. How many overall runs are described? What is N in each case?
     327
     328p29 line 2: "to eliminate this cost" -- arguably confusing since nowadays on commodity CPUs most of the benefits of inlining are not to do with call overheads, but from later optimizations enabled as a consequence of the inlining
     329
     330line 41: "a hierarchy" -- are they a hierarchy? If so, this could be explained earlier. Also, to say these make up "an integrated set... of control-flow features" verges on the tautologous.
     331
     332p30 line 15: "a common case being web servers and XaaS" -- that's two cases
     333
     334
     335Reviewing: 3
     336
     337Comments to the Author
     338# Cforall review
     339
     340Overall, I quite enjoyed reading the paper. Cforall has some very interesting ideas. I did have some suggestions that I think would be helpful before final publication. I also left notes on various parts of the paper that I find confusing when reading, in hopes that it may be useful to you.
     341
     342## Summary
     343
     344* Expand on the motivations for including both generator and coroutines, vs trying to build one atop the other
     345* Expand on the motivations for having Why both symmetric and asymettric coroutines?
     346* Comparison to async-await model adopted by other languages
     347    * C#, JS
     348    * Rust and its async/await model
     349* Consider performance comparisons against node.js and Rust frameworks
     350* Discuss performance of monitors vs finer-grained memory models and atomic operations found in other languages
     351* Why both internal/external scheduling for synchronization?
     352
     353## Generator/coroutines
     354
     355In general, this section was clear, but I thought it would be useful to provide a somewhat deeper look into why Cforall opted for the particular combination of features that it offers. I see three main differences from other languages:
     356
     357* Generators are not exposed as a "function" that returns a generator object, but rather as a kind of struct, with communication happening via mutable state instead of "return values". That is, the generator must be manually resumed and (if I understood) it is expected to store values that can then later be read (perhaps via methods), instead of having a `yield <Expr>` statement that yields up a value explicitly.
     358* Both "symmetric" and "asymmetric" generators are supported, instead of only asymmetric.
     359* Coroutines (multi-frame generators) are an explicit mechanism.
     360
     361In most other languages, coroutines are rather built by layering single-frame generators atop one another (e.g., using a mechanism like async-await), and symmetric coroutines are basically not supported. I'd like to see a bit more justification for Cforall including all the above mechanisms -- it seemed like symmetric coroutines were a useful building block for some of the user-space threading and custom scheduler mechanisms that were briefly mentioned later in the paper.
     362
     363In the discussion of coroutines, I would have expected a bit more of a comparison to the async-await mechanism offered in other languages. Certainly the semantics of async-await in JavaScript implies significantly more overhead (because each async fn is a distinct heap object). [Rust's approach avoids this overhead][zc], however, and might be worthy of a comparison (see the Performance section).
     364
     365## Locks and threading
     366
     367### Comparison to atomics overlooks performance
     368
     369There are several sections in the paper that compare against atomics -- for example, on page 15, the paper shows a simple monitor that encapsulates an integer and compares that to C++ atomics. Later, the paper compares the simplicity of monitors against the `volatile` quantifier from Java. The conclusion in section 8 also revisits this point.
     370
     371While I agree that monitors are simpler, they are obviously also significantly different from a performance perspective -- the paper doesn't seem to address this at all. It's plausible that (e.g.) the `Aint` monitor type described in the paper can be compiled and mapped to the specialized instructions offered by hardware, but I didn't see any mention of how this would be done. There is also no mention of the more nuanced memory ordering relations offered by C++11 and how one might achieve similar performance characteristics in Cforall (perhaps the answer is that one simply doesn't need to; I think that's defensible, but worth stating explicitly).
     372
     373### Justification for external scheduling feels lacking
     374
     375Cforall includes both internal and external scheduling; I found the explanation for the external scheduling mechanism to be lacking in justification. Why include both mechanisms when most languages seem to make do with only internal scheduling? It would be useful to show some scenarios where external scheduling is truly more powerful.
     376
     377I would have liked to see some more discussion of external scheduling and how it  interacts with software engineering best practices. It seems somewhat similar to AOP in certain regards. It seems to add a bit of "extra semantics" to monitor methods, in that any method may now also become a kind of synchronization point. The "open-ended" nature of this feels like it could easily lead to subtle bugs, particularly when code refactoring occurs (which may e.g. split an existing method into two). This seems particularly true if external scheduling can occur across compilation units -- the paper suggested that this is true, but I wasn't entirely clear.
     378
     379I would have also appreciated a few more details on how external scheduling is implemented. It seems to me that there must be some sort of "hooks" on mutex methods so that they can detect whether some other function is waiting on them and awaken those blocked threads. I'm not sure how such hooks are inserted, particularly across compilation units. The material in Section 5.6 didn't quite clarify the matter for me. For example, it left me somewhat confused about whether the `f` and `g` functions declared were meant to be local to a translation unit, or shared with other unit.
     380
     381### Presentation of monitors is somewhat confusing
     382
     383I found myself confused fairly often in the section on monitors. I'm just going to leave some notes here on places that I got confused in how that it could be useful to you as feedback on writing that might want to be clarified.
     384
     385To start, I did not realize that the `mutex_opt` notation was a keyword, I thought it was a type annotation. I think this could be called out more explicitly.
     386
     387Later, in section 5.2, the paper discusses `nomutex` annotations, which initially threw me, as they had not been introduced (now I realize that this paragraph is there to justify why there is no such keyword). The paragraph might be rearranged to make that clearer, perhaps by leading with the choice that Cforall made.
     388
     389On page 17, the paper states that "acquiring multiple monitors is safe from deadlock", but this could be stated a bit more precisely: acquiring multiple monitors in a bulk-acquire is safe from deadlock (deadlock can still result from nested acquires).
     390
     391On page 18, the paper states that wait states do not have to be enclosed in loops, as there is no concern of barging. This seems true but there are also other reasons to use loops (e.g., if there are multiple reasons to notify on the same condition). Thus the statement initially surprised me, as barging is only one of many reasons that I typically employ loops around waits.
     392
     393I did not understand the diagram in Figure 12 for some time. Initially, I thought that it was generic to all monitors, and I could not understand the state space. It was only later that I realized it was specific to your example. Updating the caption from "Monitor scheduling to "Monitor scheduling in the example from Fig 13" might have helped me quite a bit.
     394
     395I spent quite some time reading the boy/girl dating example (\*) and I admit I found it somewhat confusing. For example, I couldn't tell whether there were supposed to be many "girl" threads executing at once, or if there was only supposed to be one girl and one boy thread executing in a loop. Are the girl/boy threads supposed to invoke the girl/boy methods or vice versa? Surely there is some easier way to set this up? I believe that when reading the paper I convinced myself of how it was supposed to be working, but I'm writing this review some days later, and I find myself confused all over again and not able to easily figure it out.
     396
     397(\*) as an aside, I would consider modifying the example to some other form of matching, like customers and support personnel.
     398
     399## Related work
     400
     401The paper offered a number of comparisons to Go, C#, Scala, and so forth, but seems to have overlooked another recent language, Rust. In many ways, Rust seems to be closest in philosophy to Cforall, so it seems like an odd omission. I already mentioned above that Rust is in the process of shipping [async-await syntax][aa], which is definitely an alternative to the generator/coroutine approach in Cforall (though one with clear pros/cons).
     402
     403## Performance
     404
     405In the performance section in particular, you might consider comparing against some of the Rust web servers and threading systems. For example, actix is top of the [single query TechEmpower Framework benchmarks], and tokio is near the top of the [plainthreading benchmarks][pt] (hyper, the top, is more of an HTTP framework, though it is also written in Rust). It would seem worth trying to compare their "context switching" costs as well -- I believe both actix and tokio have a notion of threads that could be readily compared.
     406
     407Another addition that might be worth considering is to compare against node.js promises, although I think the comparison to process creation is not as clean.
     408
     409That said, I think that the performance comparison is not a big focus of the paper, so it may not be necessary to add anything to it.
     410
     411## Authorship of this review
     412
     413I'm going to sign this review. This review was authored by Nicholas D. Matsakis. In the intrerest of full disclosure, I'm heavily involved in the Rust project, although I dont' think that influenced this review in particular. Feel free to reach out to me for clarifying questions.
     414
     415## Links
     416
     417[aa]: https://blog.rust-lang.org/2019/09/30/Async-await-hits-beta.html
     418[zc]: https://aturon.github.io/blog/2016/08/11/futures/
     419[sq]: https://www.techempower.com/benchmarks/#section=data-r18&hw=ph&test=db
     420[pt]: https://www.techempower.com/benchmarks/#section=data-r18&hw=ph&test=plaintext
     421
     422
     423
     424Subject: Re: manuscript SPE-19-0219
     425To: "Peter A. Buhr" <pabuhr@uwaterloo.ca>
     426From: Richard Jones <R.E.Jones@kent.ac.uk>
     427Date: Tue, 12 Nov 2019 22:43:55 +0000
     428
     429Dear Dr Buhr
     430
     431Your should have received a decision letter on this today. I am sorry that this
     432has taken so long. Unfortunately SP&E receives a lot of submissions and getting
     433reviewers is a perennial problem.
     434
     435Regards
     436Richard
     437
     438Peter A. Buhr wrote on 11/11/2019 13:10:
     439>     26-Jun-2019
     440>     Your manuscript entitled "Advanced Control-flow and Concurrency in Cforall"
     441>     has been received by Software: Practice and Experience. It will be given
     442>     full consideration for publication in the journal.
     443>
     444> Hi, it has been over 4 months since submission of our manuscript SPE-19-0219
     445> with no response.
     446>
     447> Currently, I am refereeing a paper for IEEE that already cites our prior SP&E
     448> paper and the Master's thesis forming the bases of the SP&E paper under
     449> review. Hence our work is apropos and we want to get it disseminates as soon as
     450> possible.
     451>
     452> [3] A. Moss, R. Schluntz, and P. A. Buhr, "Cforall: Adding modern programming
     453>      language features to C," Software - Practice and Experience, vol. 48,
     454>      no. 12, pp. 2111-2146, 2018.
     455>
     456> [4] T. Delisle, "Concurrency in C for all," Master's thesis, University of
     457>      Waterloo, 2018.  [Online].  Available:
     458>      https://uwspace.uwaterloo.ca/bitstream/handle/10012/12888
     459
     460
     461
     462Date: Mon, 13 Jan 2020 05:33:15 +0000
     463From: Richard Jones <onbehalfof@manuscriptcentral.com>
     464Reply-To: R.E.Jones@kent.ac.uk
     465To: pabuhr@uwaterloo.ca
     466Subject: Revision reminder - SPE-19-0219
     467
     46813-Jan-2020
     469Dear Dr Buhr
     470SPE-19-0219
     471
     472This is a reminder that your opportunity to revise and re-submit your
     473manuscript will expire 28 days from now. If you require more time please
     474contact me directly and I may grant an extension to this deadline, otherwise
     475the option to submit a revision online, will not be available.
     476
     477I look forward to receiving your revision.
     478
     479Sincerely,
     480
     481Prof. Richard Jones
     482Editor, Software: Practice and Experience
     483https://mc.manuscriptcentral.com/spe
     484
     485
     486
     487Date: Wed, 5 Feb 2020 04:22:18 +0000
     488From: Aaron Thomas <onbehalfof@manuscriptcentral.com>
     489Reply-To: speoffice@wiley.com
     490To: tdelisle@uwaterloo.ca, pabuhr@uwaterloo.ca
     491Subject: SPE-19-0219.R1 successfully submitted
     492
     49304-Feb-2020
     494
     495Dear Dr Buhr,
     496
     497Your manuscript entitled "Advanced Control-flow and Concurrency in Cforall" has
     498been successfully submitted online and is presently being given full
     499consideration for publication in Software: Practice and Experience.
     500
     501Your manuscript number is SPE-19-0219.R1.  Please mention this number in all
     502future correspondence regarding this submission.
     503
     504You can view the status of your manuscript at any time by checking your Author
     505Center after logging into https://mc.manuscriptcentral.com/spe.  If you have
     506difficulty using this site, please click the 'Get Help Now' link at the top
     507right corner of the site.
     508
     509Thank you for submitting your manuscript to Software: Practice and Experience.
     510
     511Sincerely,
     512Software: Practice and Experience Editorial Office
     513
  • doc/proposals/vtable.md

    rb7d6a36 r6a490b2  
    237237default is provided or not, the second syntax can be used to pick a
    238238parameter on instantiation.
     239
     240### Extension: Object Access
     241This requires that the resolution scope (see below) is at the type level or
     242has explicate points with names. These are the tables and table names used
     243here.
     244
     245The system already knows where to find the virtual table and the object. If
     246the tables have particular identities, or on the user side names, then it is
     247meaningful to check if a binding virtual table is the same* as another. The
     248main use of this is virtual table declarations also give the type they bind
     249and if a binding table matches a known table then the underlyind object in the
     250trait object must be of that type.
     251
     252* By identity, by value would work and in some senses be more flexiable. But
     253  it would be slower and refering to further away functions would be harder.
     254
     255This gives one of the main new features of the hierarchical use of virtual
     256tables (see below); the ability to recover the underlying object. Or a pointer
     257of the approprate type it which both reflects the implementation and gives a
     258convenent way to encode the boolean/conditional aspect of the operation which
     259is that a different virtual table might be in use.
     260
     261There are two general ways to reperent this; a cast or a field access. The
     262cast is traditional and would definitely fit if a single pointer repersents
     263a trait object with the virtual table as part of the object. However for a
     264double pointer field access might be more approprate. By this system though
     265it is not the type that is used as the identifier but the virtual table. If
     266there is one table per type than it becomes equivilant again. Otherwise the
     267table has to be used as the identifier and the type is just a result of that
     268which seems important for syntax.
    239269
    240270Hierarchy
     
    560590be used in only some of the declarations.
    561591
    562     trait combiner fee = (summation_instance, sum);
     592    trait combiner fee = {summation_instance, sum};
    563593    trait combiner foe = summation_instance;
    564594
  • doc/theses/thierry_delisle_PhD/.gitignore

    rb7d6a36 r6a490b2  
    88
    99comp_II/build/
     10comp_II/img/*.fig.bak
    1011comp_II/comp_II.pdf
    1112comp_II/comp_II.ps
  • doc/theses/thierry_delisle_PhD/comp_II/Makefile

    rb7d6a36 r6a490b2  
    22
    33Build = build
    4 Figures = figures
     4Figures = img
    55Macros = ../../../LaTeXmacros
    66TeXLIB = .:${Macros}:${Build}:../../../bibliography:
     
    1818
    1919FIGURES = ${addsuffix .tex, \
     20        base \
     21        empty \
     22        emptybit \
     23        emptytree \
     24        emptytls \
     25        resize \
     26        system \
    2027}
    2128
     
    7077        mkdir -p ${Build}
    7178
    72 %.tex : %.fig ${Build}
     79%.tex : img/%.fig ${Build}
    7380        fig2dev -L eepic $< > ${Build}/$@
    7481
    75 %.ps : %.fig | ${Build}
     82%.ps : img/%.fig | ${Build}
    7683        fig2dev -L ps $< > ${Build}/$@
    7784
    78 %.pstex : %.fig | ${Build}
     85%.pstex : img/%.fig | ${Build}
    7986        fig2dev -L pstex $< > ${Build}/$@
    8087        fig2dev -L pstex_t -p ${Build}/$@ $< > ${Build}/$@_t
  • doc/theses/thierry_delisle_PhD/comp_II/comp_II.tex

    rb7d6a36 r6a490b2  
    1 \documentclass[11pt,fullpage]{article}
     1\documentclass[11pt]{article}
     2\usepackage{fullpage}
    23\usepackage[T1]{fontenc}
    34\usepackage[utf8]{inputenc}
    4 \usepackage{listings}           % for code listings
    55\usepackage{xspace}
    66\usepackage{xcolor}
    77\usepackage{graphicx}
    8 \usepackage[hidelinks]{hyperref}
     8\usepackage{epic,eepic}
     9\usepackage{listings}                   % for code listings
    910\usepackage{glossaries}
    1011\usepackage{textcomp}
    11 \usepackage{geometry}
    12 
    1312% cfa macros used in the document
    1413\input{common}
     14
     15\setlist{topsep=6pt,parsep=0pt}         % global reduce spacing between points
     16\newcommand{\uC}{$\mu$\CC}
     17\usepackage[hidelinks]{hyperref}
     18\setlength{\abovecaptionskip}{5pt plus 3pt minus 2pt}
     19\lstMakeShortInline$%                   % single-character for \lstinline
     20%\usepackage[margin=1in]{geometry}
     21%\usepackage{float}
     22
    1523\input{glossary}
    1624
     
    2432
    2533\author{
    26         \huge Thierry Delisle \\
    27         \Large \vspace*{0.1in} \texttt{tdelisle@uwaterloo.ca} \\
     34        \huge Thierry Delisle \vspace*{5pt} \\
     35        \Large \texttt{tdelisle@uwaterloo.ca} \vspace*{5pt} \\
    2836        \Large Cheriton School of Computer Science \\
    2937        \Large University of Waterloo
     
    3947
    4048\newcommand{\cit}{\textsuperscript{[Citation Needed]}\xspace}
    41 \newcommand{\TODO}{~\newline{\large\bf\color{red} TODO :}\xspace}
     49\newcommand{\TODO}{{\large\bf\color{red} TODO: }\xspace}
    4250
    4351% ===============================================================================
     
    5159\section{Introduction}
    5260\subsection{\CFA and the \CFA concurrency package}
    53 \CFA\cit is a modern, polymorphic, non-object-oriented, backwards-compatible extension of the C programming language. It aims to add high productivity features while maintaning the predictible performance of C. As such concurrency in \CFA\cit aims to offer simple and safe high-level tools while still allowing performant code. Concurrent code is written in the syncrhonous programming paradigm but uses \glspl{uthrd} in order to achieve the simplicity and maintainability of synchronous programming without sacrificing the efficiency of asynchronous programing. As such the \CFA scheduler is a user-level scheduler that maps \glspl{uthrd} onto \glspl{kthrd}.
    54 
    55 The goal of this research is to produce a scheduler that is simple to use and offers acceptable performance in all cases. Here simplicity does not refer to the API but to how much scheduling concerns programmers need to take into account when using the \CFA concurrency package. Therefore, the main goal of this proposal is as follows :
     61\CFA\cite{Moss18} is a modern, polymorphic, non-object-oriented, concurrent, backwards-compatible extension of the C programming language.
     62It aims to add high-productivity features while maintaining the predictable performance of C.
     63As such, concurrency in \CFA\cite{Delisle19} aims to offer simple and safe high-level tools while still allowing performant code.
     64\CFA concurrent code is written in the synchronous programming paradigm but uses \glspl{uthrd} in order to achieve the simplicity and maintainability of synchronous programming without sacrificing the efficiency of asynchronous programing.
     65As such, the \CFA \newterm{scheduler} is a preemptive user-level scheduler that maps \glspl{uthrd} onto \glspl{kthrd}.
     66
     67\newterm{Scheduling} occurs when execution switches from one thread to another, where the second thread is implicitly chosen by the scheduler.
     68This scheduling is an indirect handoff, as opposed to generators and coroutines which explicitly switch to the next generator and coroutine respectively.
     69The cost of switching between two threads for an indirect handoff has two components:
     70\begin{enumerate}
     71\item
     72the cost of actually context-switching, \ie changing the relevant registers to move execution from one thread to the other,
     73\item
     74and the cost of scheduling, \ie deciding which thread to run next among all the threads ready to run.
     75\end{enumerate}
     76The first cost is generally constant and fixed\footnote{Affecting the constant context-switch cost is whether it is done in one step, after the scheduling, or in two steps, context-switching to a fixed third-thread before scheduling.}, while the scheduling cost can vary based on the system state.
     77Adding multiple \glspl{kthrd} does not fundamentally change the scheduler semantics or requirements, it simply adds new correctness requirements, \ie \newterm{linearizability}\footnote{Meaning however fast the CPU threads run, there is an equivalent sequential order that gives the same result.}, and a new dimension to performance: scalability, where scheduling cost now also depends on contention.
     78
     79The more threads switch, the more the administration cost of scheduling becomes noticeable.
     80It is therefore important to build a scheduler with the lowest possible cost and latency.
     81Another important consideration is \newterm{fairness}.
     82In principle, scheduling should give the illusion of perfect fairness, where all threads ready to run are running \emph{simultaneously}.
     83While the illusion of simultaneity is easier to reason about, it can break down if the scheduler allows too much unfairness.
     84Therefore, the scheduler should offer as much fairness as needed to guarantee eventual progress, but use unfairness to help performance.
     85In practice, threads must wait in turn but there can be advantages to unfair scheduling, similar to the the express cash-register at a grocery store.
     86
     87The goal of this research is to produce a scheduler that is simple for programmers to understand and offers good performance.
     88Here understandability does not refer to the API but to how much scheduling concerns programmers need to take into account when writing a \CFA concurrent package.
     89Therefore, the main goal of this proposal is :
    5690\begin{quote}
    57 The \CFA scheduler should be \emph{viable} for any workload.
     91The \CFA scheduler should be \emph{viable} for \emph{any} workload.
    5892\end{quote}
    5993
    60 This objective includes producing a scheduling strategy with minimal fairness guarantees, creating an abstraction layer over the operating system to handle kernel-threads spinning unnecessarily and hide blocking I/O operations and, writing sufficient library tools to allow developpers to properly use the scheduler.
    61 
    62 % ===============================================================================
    63 % ===============================================================================
    64 
    65 \section{Scheduling for \CFA}
    66 While the \CFA concurrency package doesn't have any particular scheduling needs beyond those of any concurrency package which uses \glspl{uthrd}, it is important that the default \CFA Scheduler be viable in general. Indeed, since the \CFA Scheduler does not target any specific workloads, it is unrealistic to demand that it use the best scheduling strategy in all cases. However, it should offer a viable ``out of the box'' solution for most scheduling problems so that programmers can quickly write performant concurrent without needed to think about which scheduling strategy is more appropriate for their workload. Indeed, only programmers with exceptionnaly high performance requirements should need to write their own scheduler. More specifically, two broad types of schedulering strategies should be avoided in order to avoid penalizing certain types of workloads : feedback-based and priority schedulers.
     94For a general purpose scheduler, it is impossible to produce an optimal algorithm as it would require knowledge of the future behaviour of threads.
     95As such, scheduling performance is generally either defined by the best case scenario, \ie a workload to which the scheduler is tailored, or the worst case scenario, \ie the scheduler behaves no worst than \emph{X}.
     96For this proposal, the performance is evaluated using the second approach to allow \CFA programmers to rely on scheduling performance.
     97Because there is no optimal scheduler, ultimately \CFA may allow programmers to write their own scheduler; but that is not the subject of this proposal, which considers only the default scheduler.
     98As such, it is important that only programmers with exceptionally high performance requirements should need to write their own scheduler and replace the scheduler in this proposal.
     99
     100To achieve the \CFA scheduling goal includes:
     101\begin{enumerate}
     102\item
     103producing a scheduling strategy with sufficient fairness guarantees,
     104\item
     105creating an abstraction layer over the operating system to handle kernel-threads spinning unnecessarily,
     106\item
     107scheduling blocking I/O operations,
     108\item
     109and writing sufficient library tools to allow developers to indirectly use the scheduler, either through tuning knobs or replacing the default scheduler.
     110\end{enumerate}
     111
     112% ===============================================================================
     113% ===============================================================================
     114
     115\section{\CFA Scheduling}
     116To schedule user-level threads across all workloads, the scheduler has a number of requirements:
     117
     118\paragraph{Correctness} As with any other concurrent data structure or algorithm, the correctness requirement is paramount.
     119The scheduler cannot allow threads to be dropped from the ready queue, \ie scheduled but never run, or be executed multiple times when only being scheduled once.
     120Since \CFA concurrency has no spurious wakeup, this definition of correctness also means the scheduler should have no spurious wakeup.
     121The \CFA scheduler must be correct.
     122
     123\paragraph{Performance} The performance of a scheduler can generally be measured in terms of scheduling cost, scalability and latency.
     124\newterm{Scheduling cost} is the cost to switch from one thread to another, as mentioned above.
     125For simple applications, where a single kernel thread does most of the scheduling, it is generally the dominating cost.
     126\newterm{Scalability} is the cost of adding multiple kernel threads because it increases the time for context switching because of contention by multiple threads accessing shared resources, \eg the ready queue.
     127Finally, \newterm{tail latency} is service delay and relates to thread fairness.
     128Specifically, latency measures how long a thread waits to run once scheduled and is evaluated in the worst case.
     129The \CFA scheduler should offer good performance for all three metrics.
     130
     131\paragraph{Fairness} Like performance, this requirement has several aspect : eventual progress, predictability and performance reliability.
     132\newterm{Eventual progress} guarantees every scheduled thread is eventually run, \ie prevent starvation.
     133As a hard requirement, the \CFA scheduler must guarantee eventual progress, otherwise the above mentioned illusion of simultaneous execution is broken and the scheduler becomes much more complex to reason about.
     134\newterm{Predictability} and \newterm{reliability} means similar workloads achieve similar performance and programmer execution intuition is respected.
     135For example, a thread that yields aggressively should not run more often then other tasks.
     136While this is intuitive, it does not hold true for many work-stealing or feedback based schedulers.
     137The \CFA scheduler must guarantee eventual progress and should be predictable and offer reliable performance.
     138
     139\paragraph{Efficiency} Finally, efficient usage of CPU resources is also an important requirement and is discussed in depth towards the end of the proposal.
     140\newterm{Efficiency} means avoiding using CPU cycles when there are no threads to run, and conversely, use all CPUs available when the workload can benefit from it.
     141Balancing these two states is where the complexity lies.
     142The \CFA scheduler should be efficient with respect to the underlying (shared) computer.
     143
     144\bigskip To achieve these requirements, I can reject two broad types of scheduling strategies : feedback-based and priority schedulers.
    67145
    68146\subsection{Feedback-Based Schedulers}
    69 Many operating systems use schedulers based on feadback loops in some form, they measure how much CPU a particular thread has used\footnote{Different metrics can be used to here but it is not relevant to the discussion.} and schedule threads based on this metric. These strategies are sensible for operating systems but rely on two assumptions on the workload :
    70 
    71 \begin{enumerate}
    72         \item Threads live long enough to be scheduled many times.
    73         \item Cooperation among all threads is not simply infeasible, it is a security risk.
    74 \end{enumerate}
    75 
    76 While these two assumptions generally hold for operating systems, they may not for \CFA programs. In fact, \CFA uses \glspl{uthrd} which have the explicit goal of reducing the cost of threading primitives to allow many smaller threads. This can naturally lead to have threads with much shorter lifetime and only being scheduled a few times. Scheduling strategies based on feadback loops cannot be effective in these cases because they will not have the opportunity to measure the metrics that underlay the algorithm. Note that the problem of feadback loop convergence (reacting too slowly to scheduling events) is not specific to short lived threads but can also occur with threads that show drastic changes in scheduling event, e.g., threads running for long periods of time and then suddenly blocking and unblocking quickly and repeatedly.
    77 
    78 In the context of operating systems, these concerns can be overshadowed by a more pressing concern : security. When multiple users are involved, it is possible that some users are malevolent and try to exploit the scheduling strategy in order to achieve some nefarious objective. Security concerns mean that more precise and robust fairness metrics must be used. In the case of the \CFA scheduler, every thread runs in the same user-space and are controlled from the same user. It is then possible to safely ignore the possibility that threads are malevolent and assume that all threads will ignore or cooperate with each other. This allows for a much simpler fairness metric and in this proposal ``fairness'' will be considered as equal opportunities to run once scheduled.
    79 
    80 Since feadback is not necessarily feasible within the lifetime of all threads and a simple fairness metric can be used, the scheduling strategy proposed for the \CFA runtime does not user per-threads feedback. Feedback loops in general are not rejected for secondary concerns like idle sleep, but no feedback loop is used to decide which thread to run next.
     147Many operating systems use schedulers based on feedback in some form, \eg measuring how much CPU a particular thread has used\footnote{Different metrics can be measured but it is not relevant to the discussion.} and schedule threads based on this metric.
     148These strategies are sensible for operating systems but rely on two assumptions for the workload:
     149
     150\begin{enumerate}
     151        \item Threads live long enough for useful feedback information to be to gathered.
     152        \item Threads belong to multiple users so fairness across threads is insufficient.
     153\end{enumerate}
     154
     155While these two assumptions generally hold for operating systems, they may not for user-level threading.
     156Since \CFA has the explicit goal of allowing many smaller threads, this can naturally lead to threads with much shorter lifetimes that are only scheduled a few times.
     157Scheduling strategies based on feedback cannot be effective in these cases because there is no opportunity to measure the metrics that underlie the algorithm.
     158Note, the problem of \newterm{feedback convergence} (reacting too slowly to scheduling events) is not specific to short lived threads but can also occur with threads that show drastic changes in scheduling, \eg threads running for long periods of time and then suddenly blocking and unblocking quickly and repeatedly.
     159
     160In the context of operating systems, these concerns can be overshadowed by a more pressing concern : security.
     161When multiple users are involved, it is possible some users are malevolent and try to exploit the scheduling strategy to achieve some nefarious objective.
     162Security concerns mean more precise and robust fairness metrics must be used to guarantee fairness across processes created by users as well as threads created within a process.
     163In the case of the \CFA scheduler, every thread runs in the same user space and is controlled by the same user.
     164Fairness across users is therefore a given and it is then possible to safely ignore the possibility that threads are malevolent.
     165This approach allows for a much simpler fairness metric and in this proposal \emph{fairness} is defined as: when multiple threads are cycling through the system, the total ordering of threads being scheduled, \ie pushed onto the ready-queue, should not differ much from the total ordering of threads being executed, \ie popped from the ready-queue.
     166
     167Since feedback is not necessarily feasible within the lifetime of all threads and a simple fairness metric can be used, the scheduling strategy proposed for the \CFA runtime does not use per-threads feedback.
     168Feedback in general is not rejected for secondary concerns like idle sleep for kernel threads, but no feedback is used to decide which thread to run next.
    81169
    82170\subsection{Priority Schedulers}
    83 Another broad category of schedulers are priority schedulers. In these scheduling strategies threads have priorities and the runtime schedules the threads with the highest priority before scheduling other threads. Threads with equal priority are scheduled using a secondary strategy, often something simple like round-robin or FIFO. These priority mean that, as long as there is a thread with a higher priority that desires to run, a thread with a lower priority will not run. This possible starving of threads can dramatically increase programming complexity since starving threads and priority inversion (prioritising a lower priority thread) can both lead to serious problems, leaving programmers between a rock and a hard place.
    84 
    85 An important observation to make is that threads do not need to have explicit priorities for problems to be possible. Indeed, any system with multiple ready-queues and attempts to exhaust one queue before accessing the other queues, could encounter starvation problems. A popular scheduling strategy that suffers from implicit priorities is work-stealing. Work-stealing is generally presented as follows :
    86 
     171Another broad category of schedulers are priority schedulers.
     172In these scheduling strategies, threads have priorities and the runtime schedules the threads with the highest priority before scheduling other threads.
     173Threads with equal priority are scheduled using a secondary strategy, often something simple like round-robin or FIFO.
     174A consequence of priority is that, as long as there is a thread with a higher priority that desires to run, a thread with a lower priority does not run.
     175This possible starving of threads can dramatically increase programming complexity since starving threads and priority inversion (prioritizing a lower priority thread) can both lead to serious problems.
     176
     177An important observation is that threads do not need to have explicit priorities for problems to occur.
     178Indeed, any system with multiple ready-queues that attempts to exhaust one queue before accessing the other queues, essentially provide implicit priority, which can encounter starvation problems.
     179For example, a popular scheduling strategy that suffers from implicit priorities is work stealing.
     180\newterm{Work stealing} is generally presented as follows:
     181\begin{enumerate}
     182        \item Each processor has a list of ready threads.
     183        \item Each processor runs threads from its ready queue first.
     184        \item If a processor's ready queue is empty, attempt to run threads from some other processor's ready queue.
     185\end{enumerate}
     186
     187In a loaded system\footnote{A \newterm{loaded system} is a system where threads are being run at the same rate they are scheduled.}, if a thread does not yield, block, or preempt for an extended period of time, threads on the same processor's list starve if no other processors exhaust their list.
     188
     189Since priorities can be complex for programmers to incorporate into their execution intuition, the scheduling strategy proposed for the \CFA runtime does not use a strategy with either implicit or explicit thread priorities.
     190
     191\subsection{Schedulers without feedback or priorities}
     192This proposal conjectures that is is possible to construct a default scheduler for the \CFA runtime that offers good scalability and a simple fairness guarantee that is easy for programmers to reason about.
     193The simplest fairness guarantee is FIFO ordering, \ie threads scheduled first run first.
     194However, enforcing FIFO ordering generally conflicts with scalability across multiple processors because of the additional synchronization.
     195Thankfully, strict FIFO is not needed for sufficient fairness.
     196Since concurrency is inherently non-deterministic, fairness concerns in scheduling are only a problem if a thread repeatedly runs before another thread can run.
     197Some relaxation is possible because non-determinism means programmers already handle ordering problems to produce correct code and hence rely on weak guarantees, \eg that a specific thread will \emph{eventually} run.
     198Since some reordering does not break correctness, the FIFO fairness guarantee can be significantly relaxed without causing problems.
     199For this proposal, the target guarantee is that the \CFA scheduler provides \emph{probable} FIFO ordering, which allows reordering but makes it improbable that threads are reordered far from their position in total ordering.
     200
     201The \CFA scheduler fairness is defined as follows:
    87202\begin{itemize}
    88         \item Each processor has a list of threads.
     203        \item Given two threads $X$ and $Y$, the odds that thread $X$ runs $N$ times \emph{after} thread $Y$ is scheduled but \emph{before} it is run, decreases exponentially with regard to $N$.
    89204\end{itemize}
    90 \begin{enumerate}
    91         \item Run threads from ``this'' processor's list.
    92         \item If ``this'' processor's list is empty, run threads from some other processor's list.
    93 \end{enumerate}
    94 
    95 In a loaded system\footnote{A loaded system is a system where threads are being run at the same rate they are scheduled}, if a thread does not yield or block for an extended period of time, threads on the same processor list will starve if no other processors can exhaust their list.
    96 
    97 Since priorities can be complex to handle for programmers, the scheduling strategy proposed for the \CFA runtime does not use a strategy with either implicit or explicit thread priorities.
    98 
    99 \subsection{Schedulers without feadback or priorities}
    100 I claim that the ideal default scheduler for the \CFA runtime is a scheduler that offers good scalability and a simple fairness guarantee that is easy for programmers to reason about. The simplest fairness guarantee is to guarantee FIFO ordering, i.e., threads scheduled first will run first. However, enforcing FIFO ordering generally conflicts with scalability across multiple processors because of the additionnal synchronization. Thankfully, strict FIFO is not needed for scheduling. Since concurrency is inherently non-deterministic, fairness concerns in scheduling are only a problem if a thread repeatedly runs before another thread can run\footnote{This is because the non-determinism means that programmers must already handle ordering problems in order to produce correct code and already must rely on weak guarantees, for example that a specific thread will \emph{eventually} run.}. This need for unfairness to persist before problems occur means that the FIFO fairness guarantee can be significantly relaxed without causing problems. For this proposal, the target guarantee is that the \CFA scheduler guarantees \emph{probable} FIFO ordering, which is defined as follows :
    101 \begin{itemize}
    102         \item Given two threads $X$ and $Y$, the odds that thread $X$ runs $N$ times \emph{after} thread $Y$ is scheduled but \emph{before} it is run, decreases exponentially with regards to $N$.
    103 \end{itemize}
    104 
    105 While this is not a strong guarantee, the probability that problems persist for long period of times decreases exponentially, making persisting problems virtually impossible.
    106 
    107 \subsection{Real-Time}
    108 While the objective of this proposed scheduler is similar to the objective of real-time scheduling, this proposal is not a proposal for real-time scheduler and as such makes no attempt to offer either soft or hard guarantees on scheduling delays.
    109 
    110 % ===============================================================================
    111 % ===============================================================================
    112 \section{Proposal}
    113 
    114 \subsection{Ready-Queue}
    115 Using trevor's paper\cit as basis, it is simple to build a relaxed FIFO list that is fast and scalable for loaded or overloaded systems. The described queue uses an array of underlying strictly FIFO queue. Pushing new data is done by selecting one of these underlying queues at random, recording a timestamp for the push and pushing to the selected queue. Popping is done by selecting two queues at random and popping from the queue for which the head has the oldest timestamp. In loaded or overloaded systems, it is higly likely that the queues is far from empty, e.i., several tasks are on each of the underlying queues. This means that selecting a queue at random to pop from is higly likely to yield a queue that is not empty.
    116 
    117 When the ready queue is "more empty", i.e., several of the inner queues are empty, selecting a random queue for popping is less likely to yield a valid selection and more attempts need to be made, resulting in a performance degradation. In cases, with few elements on the ready queue and few processors running, performance can be improved by adding information to help processors find which inner queues are used. Preliminary performance tests indicate that with few processors, a bitmask can be used to identify which inner queues are currently in use. This is especially effective in the single-thread case, where the bitmask will always be up-to-date. Furthermore, modern x86 CPUs have a BMI2 extension which allow using the bitmask with very little overhead over directly accessing the readyqueue offerring decent performance even in cases with many empty inner queues. This technique does not solve the problem completely, it randomly attempts to find a block of 64 queues where at least one is used, instead of attempting to find a used queue. For systems with a large number of cores this does not completely solve the problem, but it is a fixed improvement. The size of the blocks are limited by the maximum size atomic instruction can operate on, therefore atomic instructions on large words would increase the 64 queues per block limit.
    118 
    119 \TODO double check the next sentence
    120 Preliminary result indicate that the bitmask approach with the BMI2 extension can lead to multi-threaded performance that is contention agnostic in the worst case.
    121 This result suggests that the contention penalty and the increase performance for additionnal thread cancel each other exactly. This may indicate that a relatively small reduction in contention may tip the performance into positive scalling even for the worst case. It can be noted that in cases of high-contention, the use of the bitmask to find queues that are not empty is much less reliable. Indeed, if contention on the bitmask is high, it means it probably changes significantly between the moment it is read and the actual operation on the queues it represents. Furthermore, the objective of the bitmask is to avoid probing queues that are empty. Therefore, in cases where the bitmask is highly contented, it may be preferrable to probe queues randomly, either until contention decreases or until a prior prefetch of the bitmask completes. Ideally, the scheduler would be able to observe that the bitmask is highly contented and adjust its behaviour appropriately. However, I am not aware of any mechanism to query whether a cacheline is in cache or to run other instructions until a cacheline is fetch without blocking on the cacheline. As such, an alternative that may have a similar impact would be for each thread to have their own bitmask, which would be updated both after each scheduler action and after a certain number of failed probing. If the bitmask has little contention, the local bitmask will be mostly up-to-date and several threads won't need to contend as much on the global bitmask. If the bitmask has significant contention, then fetching it becomes more expensive and threads may as well probe randomly. This solution claims that probing randomly or against an out-of-date bitmask is equivalent.
    122 
    123 In cases where this is insufficient, another approach is to use a hiearchical data structure. Creating a tree of nodes to reduce contention has been shown to work in similar cases\cit(SNZI: Scalable NonZero Indicators)\footnote{This particular paper seems to be patented in the US. How does that affect \CFA? Can I use it in my work?}. However, this approach may lead to poorer single-threaded performance due to the inherent pointer chasing, as such, it was not considered as the first approach but as a fallback in case the bitmask approach does not satisfy the performance goals.
    124 
    125 Part of this performance relies on contention being low when there are few threads on the readyqueue. However, this can be assumed reliably if the system handles putting idle processors to sleep, which is addressed in section \ref{sleep}.
     205While this is not a bounded guarantee, the probability that unfairness persist for long periods of times decreases exponentially, making persisting unfairness virtually impossible.
     206
     207% ===============================================================================
     208% ===============================================================================
     209\section{Proposal Details}
     210
     211\subsection{Central Ready Queue} \label{sec:queue}
     212A central ready queue can be built from a FIFO queue, where user threads are pushed onto the queue when they are ready to run, and processors (kernel-threads acting as virtual processors) pop the user threads from the queue and execute them.
     213Alistarh \etal~\cite{alistarh2018relaxed} show it is straightforward to build a relaxed FIFO list that is fast and scalable for loaded or overloaded systems.
     214The described queue uses an array of underlying strictly FIFO queues as shown in Figure~\ref{fig:base}\footnote{For this section, the number of underlying queues is assumed to be constant.
     215Section~\ref{sec:resize} discusses resizing the array.}.
     216Pushing new data is done by selecting one of these underlying queues at random, recording a timestamp for the operation and pushing to the selected queue.
     217Popping is done by selecting two queues at random and popping from the queue with the oldest timestamp.
     218A higher number of underlying queues leads to less contention on each queue and therefore better performance.
     219In a loaded system, it is highly likely the queues are non-empty, \ie several tasks are on each of the underlying queues.
     220This means that selecting a queue at random to pop from is highly likely to yield a queue with available items.
     221In Figure~\ref{fig:base}, ignoring the ellipsis, the chances of getting an empty queue is 2/7 per pick, meaning two random picks yield an item approximately 9 times out of 10.
     222
     223\begin{figure}
     224        \begin{center}
     225                \input{base}
     226        \end{center}
     227        \caption{Relaxed FIFO list at the base of the scheduler: an array of strictly FIFO lists.
     228The timestamp is in all nodes and cell arrays.}
     229        \label{fig:base}
     230\end{figure}
     231
     232\begin{figure}
     233        \begin{center}
     234                \input{empty}
     235        \end{center}
     236        \caption{``More empty'' state of the queue: the array contains many empty cells.}
     237        \label{fig:empty}
     238\end{figure}
     239
     240When the ready queue is \emph{more empty}, \ie several of the queues are empty, selecting a random queue for popping is less likely to yield a successful selection and more attempts are needed, resulting in a performance degradation.
     241Figure~\ref{fig:empty} shows an example with fewer elements, where the chances of getting an empty queue is 5/7 per pick, meaning two random picks yield an item only half the time.
     242Since the ready queue is not empty, the pop operation \emph{must} find an element before returning and therefore must retry.
     243Note, the popping kernel thread has no work to do, but CPU cycles are wasted both for available user and kernel threads during the pop operation as the popping thread is using a CPU.
     244Overall performance is therefore influenced by the contention on the underlying queues and pop performance is influenced by the item density.
     245
     246This leads to four performance cases for the centralized ready-queue, as depicted in Table~\ref{tab:perfcases}.
     247The number of processors (many or few) refers to the number of kernel threads \emph{actively} attempting to pop user threads from the queues, not the total number of kernel threads.
     248The number of threads (many or few) refers to the number of user threads ready to be run.
     249Many threads means they outnumber processors significantly and most underlying queues have items, few threads mean there are barely more threads than processors and most underlying queues are empty.
     250Cases with fewer threads than processors are discussed in Section~\ref{sec:sleep}.
     251
     252\begin{table}
     253        \begin{center}
     254                \begin{tabular}{|r|l|l|}
     255                        \cline{2-3}
     256                        \multicolumn{1}{r|}{} & \multicolumn{1}{c|}{Many Processors} & \multicolumn{1}{c|}{Few Processors} \\
     257                        \hline
     258                        Many Threads & A: good performance & B: good performance \\
     259                        \hline
     260                        Few Threads  & C: worst performance & D: poor performance \\
     261                        \hline
     262                \end{tabular}
     263        \end{center}
     264        \caption{Expected performance of the relaxed FIFO list in different cases.}
     265        \label{tab:perfcases}
     266\end{table}
     267
     268Performance can be improved in case~D (Table~\ref{tab:perfcases}) by adding information to help processors find which inner queues are used.
     269This addition aims to avoid the cost of retrying the pop operation but does not affect contention on the underlying queues and can incur some management cost for both push and pop operations.
     270The approach used to encode this information can vary in density and be either global or local.
     271\newterm{Density} means the information is either packed in a few cachelines or spread across several cachelines, and \newterm{local information} means each thread uses an independent copy instead of a single global, \ie common, source of information.
     272
     273For example, Figure~\ref{fig:emptybit} shows a dense bitmask to identify which inner queues are currently in use.
     274This approach means processors can often find user threads in constant time, regardless of how many underlying queues are empty.
     275Furthermore, modern x86 CPUs have extended bit manipulation instructions (BMI2) that allow using the bitmask with very little overhead compared to the randomized selection approach for a filled ready queue, offering good performance even in cases with many empty inner queues.
     276However, this technique has its limits: with a single word\footnote{Word refers here to however many bits can be written atomically.} bitmask, the total number of underlying queues in the ready queue is limited to the number of bits in the word.
     277With a multi-word bitmask, this maximum limit can be increased arbitrarily, but it is not possible to check if the queue is empty by reading the bitmask atomically.
     278
     279Finally, a dense bitmap, either single or multi-word, causes additional problems in case C (Table 1), because many processors are continuously scanning the bitmask to find the few available threads.
     280This increased contention on the bitmask(s) reduces performance because of cache misses after updates and the bitmask is updated more frequently by the scanning processors racing to read and/or update that information.
     281This increased update frequency means the information in the bitmask is more often stale before a processor can use it to find an item, \ie mask read says there are available user threads but none on queue.
     282
     283\begin{figure}
     284        \begin{center}
     285                {\resizebox{0.8\textwidth}{!}{\input{emptybit}}}
     286        \end{center}
     287        \caption{``More empty'' queue with added bitmask to indicate which array cells have items.}
     288        \label{fig:emptybit}
     289\end{figure}
     290
     291Figure~\ref{fig:emptytree} shows another approach using a hierarchical tree data-structure to reduce contention and has been shown to work in similar cases~\cite{ellen2007snzi}\footnote{This particular paper seems to be patented in the US.
     292How does that affect \CFA? Can I use it in my work?}.
     293However, this approach may lead to poorer performance in case~B (Table~\ref{tab:perfcases}) due to the inherent pointer chasing cost and already low contention cost in that case.
     294
     295\begin{figure}
     296        \begin{center}
     297                {\resizebox{0.8\textwidth}{!}{\input{emptytree}}}
     298        \end{center}
     299        \caption{``More empty'' queue with added binary search tree indicate which array cells have items.}
     300        \label{fig:emptytree}
     301\end{figure}
     302
     303Finally, a third approach is to use dense information, similar to the bitmap, but have each thread keep its own independent copy of it.
     304While this approach can offer good scalability \emph{and} low latency, the liveliness of the information can become a problem.
     305In the simple cases, local copies of which underlying queues are empty can become stale and end-up not being useful for the pop operation.
     306A more serious problem is that reliable information is necessary for some parts of this algorithm to be correct.
     307As mentioned in this section, processors must know \emph{reliably} whether the list is empty or not to decide if they can return \texttt{NULL} or if they must keep looking during a pop operation.
     308Section~\ref{sec:sleep} discusses another case where reliable information is required for the algorithm to be correct.
     309
     310\begin{figure}
     311        \begin{center}
     312                \input{emptytls}
     313        \end{center}
     314        \caption{``More empty'' queue with added per processor bitmask to indicate which array cells have items.}
     315        \label{fig:emptytls}
     316\end{figure}
     317
     318There is a fundamental tradeoff among these approach.
     319Dense global information about empty underlying queues helps zero-contention cases at the cost of high-contention case.
     320Sparse global information helps high-contention cases but increases latency in zero-contention-cases, to read and ``aggregate'' the information\footnote{Hierarchical structures, \eg binary search tree, effectively aggregate information but follow pointer chains, learning information at each node.
     321Similarly, other sparse schemes need to read multiple cachelines to acquire all the information needed.}.
     322Finally, dense local information has both the advantages of low latency in zero-contention cases and scalability in high-contention cases, however the information can become stale making it difficult to use to ensure correctness.
     323The fact that these solutions have these fundamental limits suggest to me a better solution that attempts to combine these properties in an interesting ways.
     324Also, the lock discussed in Section~\ref{sec:resize} allows for solutions that adapt to the number of processors, which could also prove useful.
    126325
    127326\paragraph{Objectives and Existing Work}
    128 How much scalability is actually needed is highly debatable, libfibre\cit is has compared favorably to other schedulers in webserver tests\cit and uses a single atomic counter in its scheduling algorithm similarly to the proposed bitmask. As such the single atomic instruction on a shared cacheline may be sufficiently performant.
    129 
    130 I have built a prototype of this ready-queue (including the bitmask and BMI2 usage, but not the sharded bitmask) and ran performance experiments on it but it is difficult to compare this prototype to a thread scheduler as the prototype is used as a data-queue. I have also integrated this prototype into the \CFA runtime, but have not yet created performance experiments to compare results. I believe that the bitmask approach is currently one of the larger risks of the proposal, early tests lead me to believe it may work but it is not clear that the contention problem can be overcome. The worst-case scenario is a case where the number of processors and the number of ready threads are similar, yet scheduling events are very frequent. Fewer threads should lead to the Idle Sleep mechanism reducing contention while having many threads ready leads to optimal performance. It is difficult to evaluate the likeliness of this worst-case scenario in real workloads. I believe, frequent scheduling events suggest a more ``bursty'' workload where new work is finely divided among many threads which race to completion. This type of workload would only see a peek of contention close to the end of the work, but no sustained contention. Very fine-grained pipelines are less ``bursty'', these may lead to more sustained contention. However, they could also easily benefit from a direct hand-off strategy which would circumvent the problem entirely.
    131 
    132 \subsection{Dynamic Resizing}
    133 The \CFA runtime system currently handles dynamically adding and removing processors from clusters at any time. Since this is part of the existing design, the proposed scheduler must also support this behaviour. However, dynamicly resizing the clusters is considered a rare event associated with setup, teardown and major configuration changes. This assumptions is made both in the design of the proposed scheduler as well as in the original design of the \CFA runtime system. As such, the proposed scheduler must honor the correctness of these behaviour but does not have any performance objectives with regards to resizing a cluster. How long adding or removing processors take and how much this disrupts the performance of other threads is considered a secondary concern since it should be amortized over long period of times. This description effectively matches with te description of a Reader-Writer lock, in frequent but invasive updates among frequent (mostly) read operations. In the case of the Ready-Queue described above, read operations are operations that push or pop from the ready-queue but do not invalidate any references to the ready queue data structures. Writes on the other-hand would add or remove inner queues, invalidating references to the array of inner queues in the process. Therefore, the current proposed approach to this problem is the add a per-cluster Reader Writer lock around the ready queue to prevent restructuring of the ready-queue data structure while threads are being pushed or popped.
    134 
    135 There are possible alternatives to the Reader Writer lock solution. This problem is effectively a memory reclamation problem and as such there is a large body of research on the subject. However, the RWlock solution is simple and can be leveraged to solve other problems (e.g. processor ordering and memory reclamation of threads) which makes it an attractive solution.
     327
     328How much scalability is actually needed is highly debatable.
     329\emph{libfibre}\cite{libfibre} has compared favorably to other schedulers in webserver tests\cite{karstenuser} and uses a single atomic counter in its scheduling algorithm similarly to the proposed bitmask.
     330As such, the single atomic instruction on a shared cacheline may be sufficiently performant.
     331
     332I have built a prototype of this ready queue in the shape of a data queue, \ie nodes on the queue are structures with a single int representing a thread and intrusive data fields.
     333Using this prototype I ran preliminary performance experiments that confirm the expected performance in Table~\ref{tab:perfcases}.
     334However, these experiments only offer a hint at the actual performance of the scheduler since threads form more complex operations than simple integer nodes, \eg threads are not independent of each other, when a thread blocks some other thread must intervene to wake it.
     335
     336I have also integrated this prototype into the \CFA runtime, but have not yet created performance experiments to compare results, as creating one-to-one comparisons between the prototype and the \CFA runtime will be complex.
     337
     338\subsection{Dynamic Resizing} \label{sec:resize}
     339
     340\begin{figure}
     341        \begin{center}
     342                \input{system}
     343        \end{center}
     344        \caption{Global structure of the \CFA runtime system.}
     345        \label{fig:system}
     346\end{figure}
     347
     348The \CFA runtime system groups processors together as \newterm{clusters}, as shown in Figure~\ref{fig:system}.
     349Threads on a cluster are always scheduled on one of the processors of the cluster.
     350Currently, the runtime handles dynamically adding and removing processors from clusters at any time.
     351Since this is part of the existing design, the proposed scheduler must also support this behaviour.
     352However, dynamically resizing a cluster is considered a rare event associated with setup, tear down and major configuration changes.
     353This assumption is made both in the design of the proposed scheduler as well as in the original design of the \CFA runtime system.
     354As such, the proposed scheduler must honour the correctness of this behaviour but does not have any performance objectives with regard to resizing a cluster.
     355How long adding or removing processors take and how much this disrupts the performance of other threads is considered a secondary concern since it should be amortized over long period of times.
     356However, as mentioned in Section~\ref{sec:queue}, contention on the underlying queues can have a direct impact on performance.
     357The number of underlying queues must therefore be adjusted as the number of processors grows or shrinks.
     358Since the underlying queues are stored in a dense array, changing the number of queues requires resizing the array and expanding the array requires moving it, which can introduce memory reclamation problems if not done correctly.
     359
     360\begin{figure}
     361        \begin{center}
     362                \input{resize}
     363        \end{center}
     364        \caption{Copy of data structure shown in Figure~\ref{fig:base}.}
     365        \label{fig:base2}
     366\end{figure}
     367
     368It is important to note how the array is used in this case.
     369While the array cells are modified by every push and pop operation, the array itself, \ie the pointer that would change when resized, is only read during these operations.
     370Therefore the use of this pointer can be described as frequent reads and infrequent writes.
     371This description effectively matches with the description of a reader-writer lock, infrequent but invasive updates among frequent read operations.
     372In the case of the ready queue described above, read operations are operations that push or pop from the ready queue but do not invalidate any references to the ready queue data structures.
     373Writes on the other hand would add or remove inner queues, invalidating references to the array of inner queues in a process.
     374Therefore, the current proposed approach to this problem is to add a per-cluster reader-writer lock around the ready queue to prevent restructuring of the ready-queue data-structure while threads are being pushed or popped.
     375
     376There are possible alternatives to the reader-writer lock solution.
     377This problem is effectively a memory reclamation problem and as such there is a large body of research on the subject\cite{michael2004hazard, brown2015reclaiming}.
     378However, the reader-write lock-solution is simple and can be leveraged to solve other problems (\eg processor ordering and memory reclamation of threads), which makes it an attractive solution.
    136379
    137380\paragraph{Objectives and Existing Work}
    138 The lock must offer scalability and performance on par with the actual ready-queue in order not to introduce a new bottle neck. I have already built a lock that fits the desired requirements and preliminary testing show scalability and performance that exceed the target. As such, I do not consider this lock to be a risk on this project.
    139 
    140 \subsection{Idle Sleep} \label{sleep}
    141 As mentionned above, idle sleep is the process of putting processors to sleep while they do not have threads to execute. In this context processors are kernel-threads and sleeping refers to asking the kernel to block a thread. This can be achieved with either thread synchronization operations like pthread\_cond\_wait or using signal operations like sigsuspend.
    142 
    143 Support for idle sleep broadly involves calling the operating system to block the kernel thread but also handling the race between the sleeping and the waking up, and handling which kernel thread should sleep or wake-up.
    144 
    145 When a processor decides to sleep, there is a race that occurs between it signalling that it will go to sleep (so other processors can find sleeping processors) and actually blocking the kernel thread. This is equivalent to the classic problem of missing signals when using condition variables, the ``sleepy'' processor indicates that it will sleep but has not yet gone to sleep, if another processor attempts to wake it up, the waking-up operation may claim nothing needs to be done and the signal will have been missed. In cases where threads are scheduled from processors on the current cluster, loosing signals is not necessarily critical, because at least some processors on the cluster are awake. Individual processors always finish shceduling threads before looking for new work, which means that the last processor to go to sleep cannot miss threads scheduled from inside the cluster (if they do, that demonstrates the ready-queue is not linearizable). However, this guarantee does not hold if threads are shceduled from outside the cluster, either due to an external event like timers and I/O, or due to a thread migrating from a different cluster. In this case, missed signals can lead to the cluster deadlocking where it should not\footnote{Clusters ``should'' never deadlock, but for this proposal, cases where \CFA users \emph{actually} wrote \CFA code that leads to a deadlock it is considered as a deadlock that ``should'' happen. }. Therefore, it is important that the scheduling of threads include a mechanism where signals \emph{cannot} be missed. For performance reasons, it can be advantageous to have a secondary mechanism that allows signals to be missed in cases where it cannot lead to a deadlock. To be safe, this process must include a ``handshake'' where it is guaranteed that either~: the sleepy processor notices that a thread was scheduled after it signalled its intent to block or code scheduling threads well see the intent to sleep before scheduling and be able to wake-up the processor. This matter is complicated by the fact that pthread offers few tools to implement this solution and offers no guarantee of ordering of threads waking up for most of these tools.
    146 
    147 Another issues is trying to avoid kernel sleeping and waking frequently. A possible partial solution is to order the processors so that the one which most recently went to sleep is woken up. This allows other sleeping processors to reach deeper sleep state (when these are available) while keeping ``hot'' processors warmer. Note that while this generally means organising the processors in a stack, I believe that the unique index provided by the ReaderWriter lock can be reused to strictly order the waking order of processors, causing a LIFO like waking order. While a strict LIFO stack is probably better, using the processor index could proove useful and offer a sufficiently LIFO ordering.
    148 
    149 Finally, another important aspect of Idle Sleep is when should processors make the decision to sleep and when it is appropriate for sleeping processors to be woken up. Processors that are unnecessarily awake lead to unnecessary contention and power consumption, while too many sleeping processors can lead to sub-optimal throughput. Furthermore, transitions from sleeping to awake and vice-versa also add unnecessary latency. There is already a wealth of research on the subject and I do not plan to implement a novel idea for the Idle Sleep heuristic in this project.
     381The lock must offer scalability and performance on par with the actual ready-queue in order not to introduce a new bottleneck.
     382I have already built a lock that fits the desired requirements and preliminary testing show scalability and performance that exceed the target.
     383As such, I do not consider this lock to be a risk for this project.
     384
     385\subsection{Idle Sleep} \label{sec:sleep}
     386
     387\newterm{Idle sleep} is the process of putting processors to sleep when they have no threads to execute.
     388In this context, processors are kernel threads and sleeping refers to asking the kernel to block a thread.
     389This operation can be achieved with either thread synchronization operations like $pthread_cond_wait$ or using signal operations like $sigsuspend$.
     390The goal of putting idle processors to sleep is:
     391\begin{enumerate}
     392\item
     393reduce contention on the ready queue, since the otherwise idle processors generally contend trying to pop items from the queue,
     394\item
     395give back unneeded CPU time associated with a process to other user processors executing on the computer,
     396\item
     397and reduce energy consumption in cases where more idle kernel-threads translate to idle CPUs, which can cycle down.
     398\end{enumerate}
     399Support for idle sleep broadly involves calling the operating system to block the kernel thread and handling the race between a blocking thread and the waking thread, and handling which kernel thread should sleep or wake up.
     400
     401When a processor decides to sleep, there is a race that occurs between it signalling that is going to sleep (so other processors can find sleeping processors) and actually blocking the kernel thread.
     402This operation is equivalent to the classic problem of missing signals when using condition variables: the ``sleepy'' processor indicates its intention to block but has not yet gone to sleep when another processor attempts to wake it up.
     403The waking-up operation sees the blocked process and signals it, but the blocking process is racing to sleep so the signal is missed.
     404In cases where kernel threads are managed as processors on the current cluster, loosing signals is not necessarily critical, because at least some processors on the cluster are awake and may check for more processors eventually.
     405Individual processors always finish scheduling user threads before looking for new work, which means that the last processor to go to sleep cannot miss threads scheduled from inside the cluster (if they do, that demonstrates the ready queue is not linearizable).
     406However, this guarantee does not hold if threads are scheduled from outside the cluster, either due to an external event like timers and I/O, or due to a user (or kernel) thread migrating from a different cluster.
     407In this case, missed signals can lead to the cluster deadlocking\footnote{Clusters should only deadlock in cases where a \CFA programmer \emph{actually} write \CFA code that leads to a deadlock.}.
     408Therefore, it is important that the scheduling of threads include a mechanism where signals \emph{cannot} be missed.
     409For performance reasons, it can be advantageous to have a secondary mechanism that allows signals to be missed in cases where it cannot lead to a deadlock.
     410To be safe, this process must include a ``handshake'' where it is guaranteed that either~: the sleeping processor notices that a user thread is scheduled after the sleeping processor signalled its intent to block or code scheduling threads sees the intent to sleep before scheduling and be able to wake-up the processor.
     411This matter is complicated by the fact that pthreads and Linux offer few tools to implement this solution and no guarantee of ordering of threads waking up for most of these tools.
     412
     413Another important issue is avoiding kernel threads sleeping and waking frequently because there is a significant operating-system cost.
     414This scenario happens when a program oscillates between high and low activity, needing most and then less processors.
     415A possible partial solution is to order the processors so that the one which most recently went to sleep is woken up.
     416This allows other sleeping processors to reach deeper sleep state (when these are available) while keeping ``hot'' processors warmer.
     417Note that while this generally means organizing the processors in a stack, I believe that the unique index provided in my reader-writer lock can be reused to strictly order the waking processors, causing a mostly LIFO order.
     418While a strict LIFO stack is probably better, the processor index could prove useful for other reasons, while still offering a sufficiently LIFO ordering.
     419
     420A final important aspect of idle sleep is when should processors make the decision to sleep and when is it appropriate for sleeping processors to be woken up.
     421Processors that are unnecessarily unblocked lead to unnecessary contention, CPU usage, and power consumption, while too many sleeping processors can lead to sub-optimal throughput.
     422Furthermore, transitions from sleeping to awake and vice-versa also add unnecessary latency.
     423There is already a wealth of research on the subject\cite{schillings1996engineering, wiki:thunderherd} and I may use an existing approach for the idle-sleep heuristic in this project, \eg\cite{karstenuser}.
    150424
    151425\subsection{Asynchronous I/O}
    152 The final aspect of this proposal is asynchronous I/O. Without it, user threads that execute I/O operations will block the underlying kernel thread. This leads to poor throughput, it would be preferrable to block the user-thread and reuse the underlying kernel-thread to run other ready threads. This requires intercepting the user-threads' calls to I/O operations, redirecting them to an asynchronous I/O interface and handling the multiplexing between the synchronous and asynchronous API. As such, these are the three components needed to implemented to support asynchronous I/O : an OS abstraction layer over the asynchronous interface, an event-engine to (de)multiplex the operations and a synchronous interface for users to use. None of these components currently exist in \CFA and I will need to build all three for this project.
     426
     427The final aspect of this proposal is asynchronous I/O.
     428Without it, user threads that execute I/O operations block the underlying kernel thread, which leads to poor throughput.
     429It is preferable to block the user thread performing the I/O and reuse the underlying kernel-thread to run other ready user threads.
     430This approach requires intercepting user-thread calls to I/O operations, redirecting them to an asynchronous I/O interface, and handling the multiplexing/demultiplexing between the synchronous and asynchronous API.
     431As such, there are three components needed to implemented support for asynchronous I/O:
     432\begin{enumerate}
     433\item
     434an OS abstraction layer over the asynchronous interface,
     435\item
     436an event-engine to (de)multiplex the operations,
     437\item
     438and a synchronous interface for users to use.
     439\end{enumerate}
     440None of these components currently exist in \CFA and I will need to build all three for this project.
    153441
    154442\paragraph{OS Abstraction}
    155 One of the fundamental part of this converting blocking I/O operations into non-blocking ones. This relies on having an underlying asynchronous I/O interface to which to direct the I/O operations. While there exists many different APIs for asynchronous I/O, it is not part of this proposal to create a novel API, simply to use an existing one that is sufficient. uC++ uses the \texttt{select} as its interface, which handles pipes and sockets. It entails significant complexity and has performances problems which make it a less interesting alternative. Another interface which is becoming popular recently\cit is \texttt{epoll}. However, epoll also does not handle file system and seems to have problem to linux pipes and \texttt{TTY}s\cit. A very recent alternative that must still be investigated is \texttt{io\_uring}. It claims to address some of the issues with \texttt{epoll} but is too recent to be confident that it does. Finally, a popular cross-platform alternative is \texttt{libuv}, which offers asynchronous sockets and asynchronous file system operations (among other features). However, as a full-featured library it includes much more than what is needed and could conflict with other features of \CFA unless significant efforts are made to merge them together.
    156 
    157 \paragraph{Event-Engine}
    158 Laying on top of the asynchronous interface layer is the event-engine. This engine is responsible for multiplexing (batching) the synchronous I/O requests into an asynchronous I/O request and demultiplexing the results onto appropriate blocked threads. This can be straightforward for the simple cases, but can become quite complex. Decisions that will need to be made include : whether to poll from a seperate kernel thread or a regularly scheduled user thread, what should be the ordering used when results satisfy many requests, how to handle threads waiting for multiple operations, etc.
     443One fundamental part for converting blocking I/O operations into non-blocking ones is having an underlying asynchronous I/O interface to direct the I/O operations.
     444While there exists many different APIs for asynchronous I/O, it is not part of this proposal to create a novel API.
     445It is sufficient to make one work in the complex context of the \CFA runtime.
     446\uC uses the $select$\cite{select} as its interface, which handles ttys, pipes and sockets, but not disk.
     447$select$ entails significant complexity and is being replaced in UNIX operating-systems, which make it a less interesting alternative.
     448Another popular interface is $epoll$\cite{epoll}, which is supposed to be cheaper than $select$.
     449However, $epoll$ also does not handle the file system and anectodal evidence suggest it has problem with linux pipes and $TTY$s.
     450A popular cross-platform alternative is $libuv$\cite{libuv}, which offers asynchronous sockets and asynchronous file system operations (among other features).
     451However, as a full-featured library it includes much more than I need and could conflict with other features of \CFA unless significant effort is made to merge them together.
     452A very recent alternative that I am investigating is $io_uring$\cite{io_uring}.
     453It claims to address some of the issues with $epoll$ and my early investigating suggest that the claim is accurate.
     454$io_uring$ uses a much more general approach where system calls are register to a queue and later executed by the kernel, rather than relying on system calls to return an error instead of blocking and subsequently waiting for changes on file descriptors.
     455I believe this approach allows for fewer problems, \eg the manpage for $open$\cite{open} states:
     456\begin{quote}
     457        Note that [the $O_NONBLOCK$ flag] has no effect for regular files and block devices;
     458        that is, I/O operations will (briefly) block when device activity is required, regardless of whether $O_NONBLOCK$ is set.
     459        Since $O_NONBLOCK$ semantics might eventually be implemented, applications should not depend upon blocking behavior when specifying this flag for regular files and block devices.
     460\end{quote}
     461This makes approach based on $epoll$/$select$ less reliable since they may not work for every file descriptors.
     462For this reason, I plan to use $io_uring$ as the OS abstraction for the \CFA runtime, unless further work shows problems I haven't encountered yet.
     463However, only a small subset of the features are available in Ubuntu as of April 2020\cite{wiki:ubuntu-linux}, which will limit performance comparisons.
     464I do not believe this will affect the comparison result.
     465
     466\paragraph{Event Engine}
     467Laying on top of the asynchronous interface layer is the event engine.
     468This engine is responsible for multiplexing (batching) the synchronous I/O requests into asynchronous I/O requests and demultiplexing the results to appropriate blocked user threads.
     469This step can be straightforward for simple cases, but becomes quite complex when there are thousands of user threads performing both reads and writes, possibly on overlapping file descriptors.
     470Decisions that need to be made include:
     471\begin{enumerate}
     472\item
     473whether to poll from a separate kernel thread or a regularly scheduled user thread,
     474\item
     475what should be the ordering used when results satisfy many requests,
     476\item
     477how to handle threads waiting for multiple operations, etc.
     478\end{enumerate}
    159479
    160480\paragraph{Interface}
    161 Finally, for these components to be available, it is necessary to expose them through a synchronous interface. This can be a novel interface but it is preferrable to attempt to intercept the existing POSIX interface in order to be compatible with existing code. This will allow C programs written using this interface to be transparently converted to \CFA with minimal effeort. Where this is not applicable, a novel interface will be created to fill the gaps.
     481Finally, for these non-blocking I/O components to be available, it is necessary to expose them through a synchronous interface because that is the \CFA concurrent programming style.
     482The interface can be novel but it is preferable to match the existing POSIX interface when possible to be compatible with existing code.
     483Matching allows C programs written using this interface to be transparently converted to \CFA with minimal effort.
     484Where new functionality is needed, I will create a novel interface to fill gaps and provide advanced features.
    162485
    163486
     
    165488% ===============================================================================
    166489\section{Discussion}
    167 
     490I believe that runtime system and scheduling are still open topics.
     491Many ``state of the art'' production frameworks still use single threaded event-loops because of performance considerations, \eg \cite{nginx-design}, and, to my knowledge, no wideyl available system language offers modern threading facilities.
     492I believe the proposed work offers a novel runtime and scheduling package, where existing work only offers fragments that users must assemble themselves when possible.
    168493
    169494% ===============================================================================
    170495% ===============================================================================
    171496\section{Timeline}
    172 
    173 
    174 \cleardoublepage
     497\begin{center}
     498\begin{tabular}{ | r @{--} l | p{4in} | }
     499\hline May 2020 & October 2020   & Creation of the performance benchmark. \\
     500\hline November 2020 & March 2021   & Completion of the implementation. \\
     501\hline March 2021 & April 2021  & Final Performance experiments. \\
     502\hline May 2021 & August 2021 & Thesis writing and defense. \\
     503\hline
     504\end{tabular}
     505\end{center}
    175506
    176507% B I B L I O G R A P H Y
    177508% -----------------------------
    178 \addcontentsline{toc}{chapter}{Bibliography}
     509\cleardoublepage
     510\phantomsection         % allows hyperref to link to the correct page
     511\addcontentsline{toc}{section}{\refname}
    179512\bibliographystyle{plain}
    180513\bibliography{pl,local}
     514
     515% G L O S S A R Y
     516% -----------------------------
    181517\cleardoublepage
    182518\phantomsection         % allows hyperref to link to the correct page
    183 
    184 % G L O S S A R Y
    185 % -----------------------------
    186 \addcontentsline{toc}{chapter}{Glossary}
     519\addcontentsline{toc}{section}{Glossary}
    187520\printglossary
    188 \cleardoublepage
    189 \phantomsection         % allows hyperref to link to the correct page
    190521
    191522\end{document}
  • doc/theses/thierry_delisle_PhD/comp_II/local.bib

    rb7d6a36 r6a490b2  
    7676
    7777@article{finkel1987dib,
    78   title={DIB—a distributed implementation of backtracking},
     78  title={DIB-a distributed implementation of backtracking},
    7979  author={Finkel, Raphael and Manber, Udi},
    8080  journal={ACM Transactions on Programming Languages and Systems (TOPLAS)},
     
    221221  organization={ACM}
    222222}
     223
     224% ===============================================================================
     225% Algorithms
     226% ===============================================================================
     227@article{michael2004hazard,
     228  title={Hazard pointers: Safe memory reclamation for lock-free objects},
     229  author={Michael, Maged M},
     230  journal={IEEE Transactions on Parallel and Distributed Systems},
     231  volume={15},
     232  number={6},
     233  pages={491--504},
     234  year={2004},
     235  publisher={IEEE}
     236}
     237
     238@inproceedings{brown2015reclaiming,
     239  title={Reclaiming memory for lock-free data structures: There has to be a better way},
     240  author={Brown, Trevor Alexander},
     241  booktitle={Proceedings of the 2015 ACM Symposium on Principles of Distributed Computing},
     242  pages={261--270},
     243  year={2015}
     244}
     245
     246% Trevor's relaxed FIFO list
     247@inproceedings{alistarh2018relaxed,
     248  title={Relaxed schedulers can efficiently parallelize iterative algorithms},
     249  author={Alistarh, Dan and Brown, Trevor and Kopinsky, Justin and Nadiradze, Giorgi},
     250  booktitle={Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing},
     251  pages={377--386},
     252  year={2018}
     253}
     254
     255% Scalable counters which only support is !0
     256@inproceedings{ellen2007snzi,
     257  title={SNZI: Scalable nonzero indicators},
     258  author={Ellen, Faith and Lev, Yossi and Luchangco, Victor and Moir, Mark},
     259  booktitle={Proceedings of the twenty-sixth annual ACM symposium on Principles of distributed computing},
     260  pages={13--22},
     261  year={2007}
     262}
     263
     264% ===============================================================================
     265% Linux Man Pages
     266% ===============================================================================
     267@manual{open,
     268  key        = "open",
     269  title      = "open(2) Linux User's Manual",
     270  year       = "2020",
     271  month      = "February",
     272}
     273
     274@manual{epoll,
     275  key        = "epoll",
     276  title      = "epoll(7) Linux User's Manual",
     277  year       = "2019",
     278  month      = "March",
     279}
     280
     281@manual{select,
     282  key        = "select",
     283  title      = "select(7) Linux User's Manual",
     284  year       = "2019",
     285  month      = "March",
     286}
     287
     288@misc{io_uring,
     289  title   = {Efficient IO with io\_uring},
     290  author  = {Axboe, Jens},
     291  year    = "2019",
     292  month   = "March",
     293  version = {0,4},
     294  howpublished = {\url{https://kernel.dk/io_uring.pdf}}
     295}
     296
     297@misc{libuv,
     298  key   = "libuv",
     299  title = {libuv},
     300  howpublished = {\url{https://github.com/libuv/libuv}}
     301}
     302
     303% ===============================================================================
     304% MISC
     305% ===============================================================================
     306
     307@misc{nginx-design,
     308  key   = "nginx",
     309  title={Inside {NGINX}: How We Designed for Performance \& Scale},
     310  howpublished= {\href{https://www.nginx.com/blog/inside-nginx-how-we-designed-for-performance-scale}
     311                {https://\-www.nginx.com/\-blog/\-inside\--nginx\--how\--we\--designed\--for\--performance\--scale}},
     312}
     313
     314@article{schillings1996engineering,
     315  title={Be engineering insights: Benaphores},
     316  author={Schillings, Benoit},
     317  journal={Be Newsletters},
     318  volume={1},
     319  number={26},
     320  year={1996}
     321}
     322
     323@misc{wiki:thunderherd,
     324   author = "{Wikipedia contributors}",
     325   title = "Thundering herd problem --- {W}ikipedia{,} The Free Encyclopedia",
     326   year = "2020",
     327   howpublished = {\href{https://en.wikipedia.org/wiki/Thundering_herd_problem}
     328                  {https://\-en.wikipedia.org/\-wiki/\-Thundering\_herd\_problem}},},
     329   note = "[Online; accessed 14-April-2020]"
     330}
     331
     332@misc{wiki:ubuntu-linux,
     333   author = "{Wikipedia contributors}",
     334   title = "Ubuntu version history : Table of versions --- {W}ikipedia{,} The Free Encyclopedia",
     335   year = "2020",
     336   howpublished = {\href{https://en.wikipedia.org/wiki/Ubuntu_version_history\#Table_of_versions}
     337                  {https://\-en.wikipedia.org/\-wiki/\-Ubuntu\_version\_history\#Table\_of\_versions}},
     338   note = "[Online; accessed 15-April-2020]"
     339}
  • doc/user/user.tex

    rb7d6a36 r6a490b2  
    1111%% Created On       : Wed Apr  6 14:53:29 2016
    1212%% Last Modified By : Peter A. Buhr
    13 %% Last Modified On : Sat Jul 13 18:36:18 2019
    14 %% Update Count     : 3876
     13%% Last Modified On : Fri Mar  6 13:34:52 2020
     14%% Update Count     : 3924
    1515%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    1616
     
    211211Even with all its problems, C continues to be popular because it allows writing software at virtually any level in a computer system without restriction.
    212212For system programming, where direct access to hardware, storage management, and real-time issues are a requirement, C is usually the only language of choice.
    213 The TIOBE index~\cite{TIOBE} for July 2018 ranks the top five most \emph{popular} programming languages as \Index*{Java} 16\%, C 14\%, \Index*[C++]{\CC{}} 7.5\%, Python 6\%, Visual Basic 4\% = 47.5\%, where the next 50 languages are less than 4\% each, with a long tail.
    214 The top 3 rankings over the past 30 years are:
     213The TIOBE index~\cite{TIOBE} for February 2020 ranks the top six most \emph{popular} programming languages as \Index*{Java} 17.4\%, C 16.8\%, Python 9.3\%, \Index*[C++]{\CC{}} 6.2\%, \Csharp 5.9\%, Visual Basic 5.9\% = 61.5\%, where the next 50 languages are less than 2\% each, with a long tail.
     214The top 4 rankings over the past 35 years are:
    215215\begin{center}
    216216\setlength{\tabcolsep}{10pt}
    217 \begin{tabular}{@{}rccccccc@{}}
    218                 & 2018  & 2013  & 2008  & 2003  & 1998  & 1993  & 1988  \\ \hline
    219 Java    & 1             & 2             & 1             & 1             & 16    & -             & -             \\
    220 \R{C}   & \R{2} & \R{1} & \R{2} & \R{2} & \R{1} & \R{1} & \R{1} \\
    221 \CC             & 3             & 4             & 3             & 3             & 2             & 2             & 5             \\
     217\begin{tabular}{@{}rcccccccc@{}}
     218                & 2020  & 2015  & 2010  & 2005  & 2000  & 1995  & 1990  & 1985  \\ \hline
     219Java    & 1             & 2             & 1             & 2             & 3             & -             & -             & -             \\
     220\R{C}   & \R{2} & \R{1} & \R{2} & \R{1} & \R{1} & \R{2} & \R{1} & \R{1} \\
     221Python  & 3             & 7             & 6             & 6             & 22    & 21    & -             & -             \\
     222\CC             & 4             & 4             & 4             & 3             & 2             & 1             & 2             & 12    \\
    222223\end{tabular}
    223224\end{center}
     
    512513Keyword clashes are accommodated by syntactic transformations using the \CFA backquote escape-mechanism:
    513514\begin{cfa}
    514 int Ā®`Ā®otypeĀ®`Ā® = 3; §\C{// make keyword an identifier}§
    515 double Ā®`Ā®forallĀ®`Ā® = 3.5;
     515int Ā®``Ā®otype = 3; §\C{// make keyword an identifier}§
     516double Ā®``Ā®forall = 3.5;
    516517\end{cfa}
    517518
     
    524525// include file uses the CFA keyword "with".
    525526#if ! defined( with ) §\C{// nesting ?}§
    526 #define with Ā®`Ā®withĀ®`Ā® §\C{// make keyword an identifier}§
     527#define with Ā®``Ā®with §\C{// make keyword an identifier}§
    527528#define __CFA_BFD_H__
    528529#endif
    529 
    530 Ā®#include_next <bfdlink.h> §\C{// must have internal check for multiple expansion}§
    531 Ā®
     530§{\color{red}\#\textbf{include\_next} <bfdlink.h>}§ §\C{// must have internal check for multiple expansion}§
    532531#if defined( with ) && defined( __CFA_BFD_H__ ) §\C{// reset only if set}§
    533532#undef with
     
    576575\section{Exponentiation Operator}
    577576
    578 C, \CC, and Java (and many other programming languages) have no exponentiation operator\index{exponentiation!operator}\index{operator!exponentiation}, \ie $x^y$, and instead use a routine, like \Indexc{pow}, to perform the exponentiation operation.
    579 \CFA extends the basic operators with the exponentiation operator Ā©?\?Ā©\index{?\\?@Ā©?\?Ā©} and Ā©?\=?Ā©\index{?\\=?@Ā©\=?Ā©}, as in, Ā©x \ yĀ© and Ā©x \= yĀ©, which means $x^y$ and $x \leftarrow x^y$.
     577C, \CC, and Java (and many other programming languages) have no exponentiation operator\index{exponentiation!operator}\index{operator!exponentiation}, \ie $x^y$, and instead use a routine, like \Indexc{pow(x,y)}, to perform the exponentiation operation.
     578\CFA extends the basic operators with the exponentiation operator Ā©?Ā®\Ā®?Ā©\index{?\\?@Ā©?Ā®\Ā®?Ā©} and Ā©?\=?Ā©\index{?\\=?@©®\Ā®=?Ā©}, as in, Ā©x Ā®\Ā® yĀ© and Ā©x Ā®\Ā®= yĀ©, which means $x^y$ and $x \leftarrow x^y$.
    580579The priority of the exponentiation operator is between the cast and multiplicative operators, so that ©w * (int)x \ (int)y * z© is parenthesized as ©((w * (((int)x) \ ((int)y))) * z)©.
    581580
    582 As for \Index{division}, there are exponentiation operators for integral and floating types, including the builtin \Index{complex} types.
     581There are exponentiation operators for integral and floating types, including the builtin \Index{complex} types.
    583582Integral exponentiation\index{exponentiation!unsigned integral} is performed with repeated multiplication\footnote{The multiplication computation is $O(\log y)$.} (or shifting if the exponent is 2).
    584 Overflow from large exponents or negative exponents return zero.
     583Overflow for a large exponent or negative exponent returns zero.
    585584Floating exponentiation\index{exponentiation!floating} is performed using \Index{logarithm}s\index{exponentiation!logarithm}, so the exponent cannot be negative.
    586585\begin{cfa}
     
    5895881 1 256 -64 125 ®0® 3273344365508751233 ®0® ®0® -0.015625 18.3791736799526 0.264715-1.1922i
    590589\end{cfa}
    591 Note, Ā©5 Ā®\Ā® 32Ā© and Ā©5L Ā®\Ā® 64Ā© overflow, and Ā©-4 Ā®\Ā® -3Ā© is a fraction but stored in an integer so all three computations generate an integral zero.
     590Note, Ā©5 \ 32Ā© and Ā©5L \ 64Ā© overflow, and Ā©-4 \ -3Ā© is a fraction but stored in an integer so all three computations generate an integral zero.
    592591Parenthesis are necessary for complex constants or the expression is parsed as ©1.0f+®(®2.0fi \ 3.0f®)®+2.0fi©.
    593592The exponentiation operator is available for all the basic types, but for user-defined types, only the integral-computation version is available.
     
    598597OT ?Ā®\Ā®?( OT ep, unsigned long int y );
    599598\end{cfa}
    600 The user type Ā©TĀ© must define multiplication, one, Ā©1Ā©, and, Ā©*Ā©.
     599The user type Ā©TĀ© must define multiplication, one (Ā©1Ā©), and Ā©*Ā©.
    601600
    602601
     
    626625
    627626
    628 \subsection{Loop Control}
    629 
    630 The Ā©forĀ©/Ā©whileĀ©/Ā©do-whileĀ© loop-control allows empty or simplified ranges (see Figure~\ref{f:LoopControlExamples}).
    631 \begin{itemize}
    632 \item
    633 An empty conditional implies Ā©1Ā©.
    634 \item
    635 The up-to range Ā©~Ā©\index{~@Ā©~Ā©} means exclusive range [M,N).
    636 \item
    637 The up-to range Ā©~=Ā©\index{~=@Ā©~=Ā©} means inclusive range [M,N].
    638 \item
    639 The down-to range Ā©-~Ā©\index{-~@Ā©-~Ā©} means exclusive range [N,M).
    640 \item
    641 The down-to range Ā©-~=Ā©\index{-~=@Ā©-~=Ā©} means inclusive range [N,M].
    642 \item
    643 Ā©@Ā© means put nothing in this field.
    644 \item
    645 Ā©0Ā© is the implicit start value;
    646 \item
    647 Ā©1Ā© is the implicit increment value.
    648 \item
    649 The up-to range uses Ā©+=Ā© for increment;
    650 \item
    651 The down-to range uses Ā©-=Ā© for decrement.
    652 \item
    653 The loop index is polymorphic in the type of the start value or comparison value when start is implicitly Ā©0Ā©.
    654 \end{itemize}
    655 
    656 \begin{figure}
     627%\section{\texorpdfstring{\protect\lstinline@case@ Clause}{case Clause}}
     628\subsection{\texorpdfstring{\LstKeywordStyle{case} Clause}{case Clause}}
     629
     630C restricts the Ā©caseĀ© clause of a Ā©switchĀ© statement to a single value.
     631For multiple Ā©caseĀ© clauses associated with the same statement, it is necessary to have multiple Ā©caseĀ© clauses rather than multiple values.
     632Requiring a Ā©caseĀ© clause for each value does not seem to be in the spirit of brevity normally associated with C.
     633Therefore, the Ā©caseĀ© clause is extended with a list of values, as in:
    657634\begin{cquote}
    658 \begin{tabular}{@{}l|l@{}}
    659 \multicolumn{1}{c|}{loop control} & \multicolumn{1}{c}{output} \\
    660 \hline
    661 \begin{cfa}
    662 sout | nlOff;
    663 while Ā®()Ā® { sout | "empty"; break; } sout | nl;
    664 do { sout | "empty"; break; } while Ā®()Ā®; sout | nl;
    665 for Ā®()Ā® { sout | "empty"; break; } sout | nl;
    666 for ( Ā®0Ā® ) { sout | "A"; } sout | "zero" | nl;
    667 for ( Ā®1Ā® ) { sout | "A"; } sout | nl;
    668 for ( Ā®10Ā® ) { sout | "A"; } sout | nl;
    669 for ( Ā®1 ~= 10 ~ 2Ā® ) { sout | "B"; } sout | nl;
    670 for ( Ā®10 -~= 1 ~ 2Ā® ) { sout | "C"; } sout | nl;
    671 for ( Ā®0.5 ~ 5.5Ā® ) { sout | "D"; } sout | nl;
    672 for ( Ā®5.5 -~ 0.5Ā® ) { sout | "E"; } sout | nl;
    673 for ( Ā®i; 10Ā® ) { sout | i; } sout | nl;
    674 for ( Ā®i; 1 ~= 10 ~ 2Ā® ) { sout | i; } sout | nl;
    675 for ( Ā®i; 10 -~= 1 ~ 2Ā® ) { sout | i; } sout | nl;
    676 for ( Ā®i; 0.5 ~ 5.5Ā® ) { sout | i; } sout | nl;
    677 for ( Ā®i; 5.5 -~ 0.5Ā® ) { sout | i; } sout | nl;
    678 for ( Ā®ui; 2u ~= 10u ~ 2uĀ® ) { sout | ui; } sout | nl;
    679 for ( Ā®ui; 10u -~= 2u ~ 2uĀ® ) { sout | ui; } sout | nl;
    680 enum { N = 10 };
    681 for ( Ā®NĀ® ) { sout | "N"; } sout | nl;
    682 for ( Ā®i; NĀ® ) { sout | i; } sout | nl;
    683 for ( Ā®i; N -~ 0Ā® ) { sout | i; } sout | nl;
    684 const int start = 3, comp = 10, inc = 2;
    685 for ( Ā®i; start ~ comp ~ inc + 1Ā® ) { sout | i; } sout | nl;
    686 for ( Ā®i; 1 ~ @Ā® ) { if ( i > 10 ) break;
    687         sout | i; } sout | nl;
    688 for ( Ā®i; 10 -~ @Ā® ) { if ( i < 0 ) break;
    689         sout | i; } sout | nl;
    690 for ( Ā®i; 2 ~ @ ~ 2Ā® ) { if ( i > 10 ) break;
    691         sout | i; } sout | nl;
    692 for ( Ā®i; 2.1 ~ @ ~ @Ā® ) { if ( i > 10.5 ) break;
    693         sout | i; i += 1.7; } sout | nl;
    694 for ( Ā®i; 10 -~ @ ~ 2Ā® ) { if ( i < 0 ) break;
    695         sout | i; } sout | nl;
    696 for ( Ā®i; 12.1 ~ @ ~ @Ā® ) { if ( i < 2.5 ) break;
    697         sout | i; i -= 1.7; } sout | nl;
    698 for ( Ā®i; 5 : j; -5 ~ @Ā® ) { sout | i | j; } sout | nl;
    699 for ( Ā®i; 5 : j; -5 -~ @Ā® ) { sout | i | j; } sout | nl;
    700 for ( Ā®i; 5 : j; -5 ~ @ ~ 2Ā® ) { sout | i | j; } sout | nl;
    701 for ( Ā®i; 5 : j; -5 -~ @ ~ 2Ā® ) { sout | i | j; } sout | nl;
    702 for ( Ā®j; -5 ~ @ : i; 5Ā® ) { sout | i | j; } sout | nl;
    703 for ( Ā®j; -5 -~ @ : i; 5Ā® ) { sout | i | j; } sout | nl;
    704 for ( Ā®j; -5 ~ @ ~ 2 : i; 5Ā® ) { sout | i | j; } sout | nl;
    705 for ( Ā®j; -5 -~ @ ~ 2 : i; 5Ā® ) { sout | i | j; } sout | nl;
    706 for ( Ā®j; -5 -~ @ ~ 2 : i; 5 : k; 1.5 ~ @Ā® ) {
    707         sout | i | j | k; } sout | nl;
    708 for ( Ā®j; -5 -~ @ ~ 2 : k; 1.5 ~ @ : i; 5Ā® ) {
    709         sout | i | j | k; } sout | nl;
    710 for ( Ā®k; 1.5 ~ @ : j; -5 -~ @ ~ 2 : i; 5Ā® ) {
    711         sout | i | j | k; } sout | nl;
     635\begin{tabular}{@{}l@{\hspace{3em}}l@{\hspace{2em}}l@{}}
     636\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c@{\hspace{2em}}}{\textbf{C}} \\
     637\begin{cfa}
     638switch ( i ) {
     639  case Ā®1, 3, 5Ā®:
     640        ...
     641  case Ā®2, 4, 6Ā®:
     642        ...
     643}
    712644\end{cfa}
    713645&
    714646\begin{cfa}
    715 
    716 empty
    717 empty
    718 empty
    719 zero
    720 A
    721 A A A A A A A A A A
    722 B B B B B
    723 C C C C C
    724 D D D D D
    725 E E E E E
    726 0 1 2 3 4 5 6 7 8 9
    727 1 3 5 7 9
    728 10 8 6 4 2
    729 0.5 1.5 2.5 3.5 4.5
    730 5.5 4.5 3.5 2.5 1.5
    731 2 4 6 8 10
    732 10 8 6 4 2
    733 
    734 N N N N N N N N N N
    735 0 1 2 3 4 5 6 7 8 9
    736 10 9 8 7 6 5 4 3 2 1
    737 
    738 3 6 9
    739 
    740 1 2 3 4 5 6 7 8 9 10
    741 
    742 10 9 8 7 6 5 4 3 2 1 0
    743 
    744 2 4 6 8 10
    745 
    746 2.1 3.8 5.5 7.2 8.9
    747 
    748 10 8 6 4 2 0
    749 
    750 12.1 10.4 8.7 7 5.3 3.6
    751 0 -5 1 -4 2 -3 3 -2 4 -1
    752 0 -5 1 -6 2 -7 3 -8 4 -9
    753 0 -5 1 -3 2 -1 3 1 4 3
    754 0 -5 1 -7 2 -9 3 -11 4 -13
    755 0 -5 1 -4 2 -3 3 -2 4 -1
    756 0 -5 1 -6 2 -7 3 -8 4 -9
    757 0 -5 1 -3 2 -1 3 1 4 3
    758 0 -5 1 -7 2 -9 3 -11 4 -13
    759 
    760 0 -5 1.5 1 -7 2.5 2 -9 3.5 3 -11 4.5 4 -13 5.5
    761 
    762 0 -5 1.5 1 -7 2.5 2 -9 3.5 3 -11 4.5 4 -13 5.5
    763 
    764 0 -5 1.5 1 -7 2.5 2 -9 3.5 3 -11 4.5 4 -13 5.5
     647switch ( i ) {
     648  case 1: case 3 : case 5:
     649        ...
     650  case 2: case 4 : case 6:
     651        ...
     652}
     653\end{cfa}
     654&
     655\begin{cfa}
     656
     657// odd values
     658
     659// even values
     660
     661
    765662\end{cfa}
    766663\end{tabular}
    767664\end{cquote}
    768 \caption{Loop Control Examples}
    769 \label{f:LoopControlExamples}
    770 \end{figure}
     665In addition, subranges are allowed to specify case values.\footnote{
     666gcc has the same mechanism but awkward syntax, \lstinline@2 ...42@, because a space is required after a number, otherwise the period is a decimal point.}
     667\begin{cfa}
     668switch ( i ) {
     669  case Ā®1~5:Ā® §\C{// 1, 2, 3, 4, 5}§
     670        ...
     671  case Ā®10~15:Ā® §\C{// 10, 11, 12, 13, 14, 15}§
     672        ...
     673}
     674\end{cfa}
     675Lists of subranges are also allowed.
     676\begin{cfa}
     677case Ā®1~5, 12~21, 35~42Ā®:
     678\end{cfa}
    771679
    772680
     
    977885
    978886
    979 %\section{\texorpdfstring{\protect\lstinline@case@ Clause}{case Clause}}
    980 \subsection{\texorpdfstring{\LstKeywordStyle{case} Statement}{case Statement}}
    981 
    982 C restricts the Ā©caseĀ© clause of a Ā©switchĀ© statement to a single value.
    983 For multiple Ā©caseĀ© clauses associated with the same statement, it is necessary to have multiple Ā©caseĀ© clauses rather than multiple values.
    984 Requiring a Ā©caseĀ© clause for each value does not seem to be in the spirit of brevity normally associated with C.
    985 Therefore, the Ā©caseĀ© clause is extended with a list of values, as in:
    986 \begin{cquote}
    987 \begin{tabular}{@{}l@{\hspace{3em}}l@{\hspace{2em}}l@{}}
    988 \multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c@{\hspace{2em}}}{\textbf{C}} \\
    989 \begin{cfa}
    990 switch ( i ) {
    991   case Ā®1, 3, 5Ā®:
     887\subsection{Non-terminating and Labelled \texorpdfstring{\LstKeywordStyle{fallthrough}}{Non-terminating and Labelled fallthrough}}
     888
     889The Ā©fallthroughĀ© clause may be non-terminating within a Ā©caseĀ© clause or have a target label to common code from multiple case clauses.
     890\begin{center}
     891\begin{tabular}{@{}lll@{}}
     892\begin{cfa}
     893choose ( ... ) {
     894  case 3:
     895        if ( ... ) {
     896                ... Ā®fallthru;Ā® // goto case 4
     897        } else {
     898                ...
     899        }
     900        // implicit break
     901  case 4:
     902
     903
     904
     905
     906\end{cfa}
     907&
     908\begin{cfa}
     909choose ( ... ) {
     910  case 3:
     911        ... Ā®fallthrough common;Ā®
     912  case 4:
     913        ... Ā®fallthrough common;Ā®
     914
     915  Ā®common:Ā® // below fallthrough
     916                          // at case-clause level
     917        ...     // common code for cases 3/4
     918        // implicit break
     919  case 4:
     920
     921
     922\end{cfa}
     923&
     924\begin{cfa}
     925choose ( ... ) {
     926  case 3:
     927        choose ( ... ) {
     928          case 4:
     929                for ( ... ) {
     930                        // multi-level transfer
     931                        ... Ā®fallthru common;Ā®
     932                }
     933                ...
     934        }
    992935        ...
    993   case Ā®2, 4, 6Ā®:
    994         ...
    995 }
     936  Ā®common:Ā® // below fallthrough
     937                          // at case-clause level
     938\end{cfa}
     939\end{tabular}
     940\end{center}
     941The target label must be below the Ā©fallthroughĀ© and may not be nested in a control structure, and
     942the target label must be at the same or higher level as the containing Ā©caseĀ© clause and located at
     943the same level as a Ā©caseĀ© clause; the target label may be case Ā©defaultĀ©, but only associated
     944with the current Ā©switchĀ©/Ā©chooseĀ© statement.
     945
     946
     947\subsection{Loop Control}
     948
     949The Ā©forĀ©/Ā©whileĀ©/Ā©do-whileĀ© loop-control allows empty or simplified ranges (see Figure~\ref{f:LoopControlExamples}).
     950\begin{itemize}
     951\item
     952The loop index is polymorphic in the type of the comparison value N (when the start value is implicit) or the start value M.
     953\item
     954An empty conditional implies comparison value of Ā©1Ā© (true).
     955\item
     956A comparison N is implicit up-to exclusive range [0,N©®)®©.
     957\item
     958A comparison Ā©=Ā© N is implicit up-to inclusive range [0,N©®]®©.
     959\item
     960The up-to range M Ā©~Ā©\index{~@Ā©~Ā©} N means exclusive range [M,N©®)®©.
     961\item
     962The up-to range M Ā©~=Ā©\index{~=@Ā©~=Ā©} N means inclusive range [M,N©®]®©.
     963\item
     964The down-to range M Ā©-~Ā©\index{-~@Ā©-~Ā©} N means exclusive range [N,M©®)®©.
     965\item
     966The down-to range M Ā©-~=Ā©\index{-~=@Ā©-~=Ā©} N means inclusive range [N,M©®]®©.
     967\item
     968Ā©0Ā© is the implicit start value;
     969\item
     970Ā©1Ā© is the implicit increment value.
     971\item
     972The up-to range uses operator Ā©+=Ā© for increment;
     973\item
     974The down-to range uses operator Ā©-=Ā© for decrement.
     975\item
     976Ā©@Ā© means put nothing in this field.
     977\item
     978Ā©:Ā© means start another index.
     979\end{itemize}
     980
     981\begin{figure}
     982\begin{tabular}{@{}l|l@{}}
     983\multicolumn{1}{c|}{loop control} & \multicolumn{1}{c}{output} \\
     984\hline
     985\begin{cfa}[xleftmargin=0pt]
     986while Ā®()Ā® { sout | "empty"; break; }
     987do { sout | "empty"; break; } while Ā®()Ā®;
     988for Ā®()Ā® { sout | "empty"; break; }
     989for ( Ā®0Ā® ) { sout | "A"; } sout | "zero";
     990for ( Ā®1Ā® ) { sout | "A"; }
     991for ( Ā®10Ā® ) { sout | "A"; }
     992for ( Ā®= 10Ā® ) { sout | "A"; }
     993for ( Ā®1 ~= 10 ~ 2Ā® ) { sout | "B"; }
     994for ( Ā®10 -~= 1 ~ 2Ā® ) { sout | "C"; }
     995for ( Ā®0.5 ~ 5.5Ā® ) { sout | "D"; }
     996for ( Ā®5.5 -~ 0.5Ā® ) { sout | "E"; }
     997for ( Ā®i; 10Ā® ) { sout | i; }
     998for ( Ā®i; = 10Ā® ) { sout | i; }
     999for ( Ā®i; 1 ~= 10 ~ 2Ā® ) { sout | i; }
     1000for ( Ā®i; 10 -~= 1 ~ 2Ā® ) { sout | i; }
     1001for ( Ā®i; 0.5 ~ 5.5Ā® ) { sout | i; }
     1002for ( Ā®i; 5.5 -~ 0.5Ā® ) { sout | i; }
     1003for ( Ā®ui; 2u ~= 10u ~ 2uĀ® ) { sout | ui; }
     1004for ( Ā®ui; 10u -~= 2u ~ 2uĀ® ) { sout | ui; }
     1005enum { N = 10 };
     1006for ( Ā®NĀ® ) { sout | "N"; }
     1007for ( Ā®i; NĀ® ) { sout | i; }
     1008for ( Ā®i; N -~ 0Ā® ) { sout | i; }
     1009const int start = 3, comp = 10, inc = 2;
     1010for ( Ā®i; start ~ comp ~ inc + 1Ā® ) { sout | i; }
     1011for ( i; 1 ~ Ā®@Ā® ) { if ( i > 10 ) break; sout | i; }
     1012for ( i; 10 -~ Ā®@Ā® ) { if ( i < 0 ) break; sout | i; }
     1013for ( i; 2 ~ Ā®@Ā® ~ 2 ) { if ( i > 10 ) break; sout | i; }
     1014for ( i; 2.1 ~ Ā®@Ā® ~ Ā®@Ā® ) { if ( i > 10.5 ) break; sout | i; i += 1.7; }
     1015for ( i; 10 -~ Ā®@Ā® ~ 2 ) { if ( i < 0 ) break; sout | i; }
     1016for ( i; 12.1 ~ Ā®@Ā® ~ Ā®@Ā® ) { if ( i < 2.5 ) break; sout | i; i -= 1.7; }
     1017for ( i; 5 Ā®:Ā® j; -5 ~ @ ) { sout | i | j; }
     1018for ( i; 5 Ā®:Ā® j; -5 -~ @ ) { sout | i | j; }
     1019for ( i; 5 Ā®:Ā® j; -5 ~ @ ~ 2 ) { sout | i | j; }
     1020for ( i; 5 Ā®:Ā® j; -5 -~ @ ~ 2 ) { sout | i | j; }
     1021for ( i; 5 Ā®:Ā® j; -5 ~ @ ) { sout | i | j; }
     1022for ( i; 5 Ā®:Ā® j; -5 -~ @ ) { sout | i | j; }
     1023for ( i; 5 Ā®:Ā® j; -5 ~ @ ~ 2 ) { sout | i | j; }
     1024for ( i; 5 Ā®:Ā® j; -5 -~ @ ~ 2 ) { sout | i | j; }
     1025for ( i; 5 Ā®:Ā® j; -5 -~ @ ~ 2 Ā®:Ā® k; 1.5 ~ @ ) { sout | i | j | k; }
     1026for ( i; 5 Ā®:Ā® j; -5 -~ @ ~ 2 Ā®:Ā® k; 1.5 ~ @ ) { sout | i | j | k; }
     1027for ( i; 5 Ā®:Ā® k; 1.5 ~ @ Ā®:Ā® j; -5 -~ @ ~ 2 ) { sout | i | j | k; }
    9961028\end{cfa}
    9971029&
    9981030\begin{cfa}
    999 switch ( i ) {
    1000   case 1: case 3 : case 5:
    1001         ...
    1002   case 2: case 4 : case 6:
    1003         ...
    1004 }
    1005 \end{cfa}
    1006 &
    1007 \begin{cfa}
    1008 
    1009 // odd values
    1010 
    1011 // even values
    1012 
    1013 
     1031empty
     1032empty
     1033empty
     1034zero
     1035A
     1036A A A A A A A A A A
     1037A A A A A A A A A A A
     1038B B B B B
     1039C C C C C
     1040D D D D D
     1041E E E E E
     10420 1 2 3 4 5 6 7 8 9
     10430 1 2 3 4 5 6 7 8 9 10
     10441 3 5 7 9
     104510 8 6 4 2
     10460.5 1.5 2.5 3.5 4.5
     10475.5 4.5 3.5 2.5 1.5
     10482 4 6 8 10
     104910 8 6 4 2
     1050
     1051N N N N N N N N N N
     10520 1 2 3 4 5 6 7 8 9
     105310 9 8 7 6 5 4 3 2 1
     1054
     10553 6 9
     10561 2 3 4 5 6 7 8 9 10
     105710 9 8 7 6 5 4 3 2 1 0
     10582 4 6 8 10
     10592.1 3.8 5.5 7.2 8.9
     106010 8 6 4 2 0
     106112.1 10.4 8.7 7. 5.3 3.6
     10620 -5 1 -4 2 -3 3 -2 4 -1
     10630 -5 1 -6 2 -7 3 -8 4 -9
     10640 -5 1 -3 2 -1 3 1 4 3
     10650 -5 1 -7 2 -9 3 -11 4 -13
     10660 -5 1 -4 2 -3 3 -2 4 -1
     10670 -5 1 -6 2 -7 3 -8 4 -9
     10680 -5 1 -3 2 -1 3 1 4 3
     10690 -5 1 -7 2 -9 3 -11 4 -13
     10700 -5 1.5 1 -7 2.5 2 -9 3.5 3 -11 4.5 4 -13 5.5
     10710 -5 1.5 1 -7 2.5 2 -9 3.5 3 -11 4.5 4 -13 5.5
     10720 -5 1.5 1 -7 2.5 2 -9 3.5 3 -11 4.5 4 -13 5.5
    10141073\end{cfa}
    10151074\end{tabular}
    1016 \end{cquote}
    1017 In addition, subranges are allowed to specify case values.\footnote{
    1018 gcc has the same mechanism but awkward syntax, \lstinline@2 ...42@, because a space is required after a number, otherwise the period is a decimal point.}
    1019 \begin{cfa}
    1020 switch ( i ) {
    1021   case Ā®1~5:Ā® §\C{// 1, 2, 3, 4, 5}§
    1022         ...
    1023   case Ā®10~15:Ā® §\C{// 10, 11, 12, 13, 14, 15}§
    1024         ...
    1025 }
    1026 \end{cfa}
    1027 Lists of subranges are also allowed.
    1028 \begin{cfa}
    1029 case Ā®1~5, 12~21, 35~42Ā®:
    1030 \end{cfa}
    1031 
     1075\caption{Loop Control Examples}
     1076\label{f:LoopControlExamples}
     1077\end{figure}
    10321078
    10331079% for ()  => for ( ;; )
     
    65476593hence, names in these include files are not mangled\index{mangling!name} (see~\VRef{s:Interoperability}).
    65486594All other C header files must be explicitly wrapped in ©extern "C"© to prevent name mangling.
    6549 For \Index*[C++]{\CC{}}, the name-mangling issue is often handled internally in many C header-files through checks for preprocessor variable Ā©__cplusplusĀ©, which adds appropriate Ā©extern "C"Ā© qualifiers.
     6595This approach is different from \Index*[C++]{\CC{}} where the name-mangling issue is handled internally in C header-files through checks for preprocessor variable Ā©__cplusplusĀ©, which adds appropriate Ā©extern "C"Ā© qualifiers.
    65506596
    65516597
     
    65616607The storage-management routines extend their C equivalents by overloading, alternate names, providing shallow type-safety, and removing the need to specify the allocation size for non-array types.
    65626608
    6563 Storage management provides the following capabilities:
     6609C storage management provides the following capabilities:
    65646610\begin{description}
    6565 \item[fill]
    6566 after allocation the storage is filled with a specified character.
     6611\item[filled]
     6612after allocation with a specified character or value.
    65676613\item[resize]
    6568 an existing allocation is decreased or increased in size.
    6569 In either case, new storage may or may not be allocated and, if there is a new allocation, as much data from the existing allocation is copied.
     6614an existing allocation to decreased or increased its size.
     6615In either case, new storage may or may not be allocated and, if there is a new allocation, as much data from the existing allocation is copied into the new allocation.
    65706616For an increase in storage size, new storage after the copied data may be filled.
    6571 \item[alignment]
    6572 an allocation starts on a specified memory boundary, \eg, an address multiple of 64 or 128 for cache-line purposes.
     6617\item[align]
     6618an allocation on a specified memory boundary, \eg, an address multiple of 64 or 128 for cache-line purposes.
    65736619\item[array]
    65746620the allocation size is scaled to the specified number of array elements.
    65756621An array may be filled, resized, or aligned.
    65766622\end{description}
    6577 The table shows allocation routines supporting different combinations of storage-management capabilities:
    6578 \begin{center}
    6579 \begin{tabular}{@{}r|r|l|l|l|l@{}}
     6623\VRef[Table]{t:AllocationVersusCapabilities} shows allocation routines supporting different combinations of storage-management capabilities.
     6624\begin{table}
     6625\centering
     6626\begin{minipage}{0.75\textwidth}
     6627\begin{tabular}{@{}r|l|l|l|l|l@{}}
    65806628\multicolumn{1}{c}{}&           & \multicolumn{1}{c|}{fill}     & resize        & alignment     & array \\
    65816629\hline
    65826630C               & Ā©mallocĀ©                      & no                    & no            & no            & no    \\
    65836631                & Ā©callocĀ©                      & yes (0 only)  & no            & no            & yes   \\
    6584                 & Ā©reallocĀ©                     & no/copy               & yes           & no            & no    \\
     6632                & Ā©reallocĀ©                     & copy                  & yes           & no            & no    \\
    65856633                & Ā©memalignĀ©            & no                    & no            & yes           & no    \\
     6634                & Ā©aligned_allocĀ©\footnote{Same as Ā©memalignĀ© but size is an integral multiple of alignment, which is universally ignored.}
     6635                                                        & no                    & no            & yes           & no    \\
    65866636                & Ā©posix_memalignĀ©      & no                    & no            & yes           & no    \\
     6637                & Ā©vallocĀ©                      & no                    & no            & yes (page size)& no   \\
     6638                & Ā©pvallocĀ©\footnote{Same as Ā©vallocĀ© but rounds size to multiple of page size.}
     6639                                                        & no                    & no            & yes (page size)& no   \\
    65876640\hline
    6588 C11             & Ā©aligned_allocĀ©       & no                    & no            & yes           & no    \\
    6589 \hline
    6590 \CFA    & Ā©allocĀ©                       & no/copy/yes   & no/yes        & no            & yes   \\
    6591                 & Ā©align_allocĀ©         & no/yes                & no            & yes           & yes   \\
     6641\CFA    & Ā©cmemalignĀ©           & yes (0 only)  & no            & yes           & yes   \\
     6642                & Ā©reallocĀ©                     & copy                  & yes           & yes           & no    \\
     6643                & Ā©allocĀ©                       & no                    & yes           & no            & yes   \\
     6644                & Ā©alloc_setĀ©           & yes                   & yes           & no            & yes   \\
     6645                & Ā©alloc_alignĀ©         & no                    & yes           & yes           & yes   \\
     6646                & Ā©alloc_align_setĀ©     & yes                   & yes           & yes           & yes   \\
    65926647\end{tabular}
    6593 \end{center}
    6594 It is impossible to resize with alignment because the underlying Ā©reallocĀ© allocates storage if more space is needed, and it does not honour alignment from the original allocation.
     6648\end{minipage}
     6649\caption{Allocation Routines versus Storage-Management Capabilities}
     6650\label{t:AllocationVersusCapabilities}
     6651\end{table}
     6652
     6653\CFA memory management extends the type safety of all allocations by using the type of the left-hand-side type to determine the allocation size and return a matching type for the new storage.
     6654Type-safe allocation is provided for all C allocation routines and new \CFA allocation routines, \eg in
     6655\begin{cfa}
     6656int * ip = (int *)malloc( sizeof(int) );                §\C{// C}§
     6657int * ip = malloc();                                                    §\C{// \CFA type-safe version of C malloc}§
     6658int * ip = alloc();                                                             Ā§\C{// \CFA type-safe uniform alloc}§
     6659\end{cfa}
     6660the latter two allocations determine the allocation size from the type of Ā©pĀ© (Ā©intĀ©) and cast the pointer to the allocated storage to Ā©int *Ā©.
     6661
     6662\CFA memory management extends allocation safety by implicitly honouring all alignment requirements, \eg in
     6663\begin{cfa}
     6664struct S { int i; } __attribute__(( aligned( 128 ) )); // cache-line alignment
     6665S * sp = malloc();                                                              §\C{// honour type alignment}§
     6666\end{cfa}
     6667the storage allocation is implicitly aligned to 128 rather than the default 16.
     6668The alignment check is performed at compile time so there is no runtime cost.
     6669
     6670\CFA memory management extends the resize capability with the notion of \newterm{sticky properties}.
     6671Hence, initial allocation capabilities are remembered and maintained when resize requires copying.
     6672For example, an initial alignment and fill capability are preserved during a resize copy so the copy has the same alignment and extended storage is filled.
     6673Without sticky properties it is dangerous to use Ā©reallocĀ©, resulting in an idiom of manually performing the reallocation to maintain correctness.
     6674
     6675\CFA memory management extends allocation to support constructors for initialization of allocated storage, \eg in
     6676\begin{cfa}
     6677struct S { int i; };                                                    §\C{// cache-line aglinment}§
     6678void ?{}( S & s, int i ) { s.i = i; }
     6679// assume ?|? operator for printing an S
     6680
     6681S & sp = *Ā®newĀ®( 3 );                                                   Ā§\C{// call constructor after allocation}§
     6682sout | sp.i;
     6683Ā®deleteĀ®( &sp );
     6684
     6685S * spa = Ā®anewĀ®( 10, 5 );                                              §\C{// allocate array and initialize each array element}§
     6686for ( i; 10 ) sout | spa[i] | nonl;
     6687sout | nl;
     6688Ā®adeleteĀ®( 10, spa );
     6689\end{cfa}
     6690Allocation routines Ā©newĀ©/Ā©anewĀ© allocate a variable/array and initialize storage using the allocated type's constructor.
     6691Note, the matching deallocation routines Ā©deleteĀ©/Ā©adeleteĀ©.
    65956692
    65966693\leavevmode
    65976694\begin{cfa}[aboveskip=0pt,belowskip=0pt]
    6598 // C unsafe allocation
    65996695extern "C" {
    6600 void * malloc( size_t size );§\indexc{memset}§
    6601 void * calloc( size_t dim, size_t size );§\indexc{calloc}§
    6602 void * realloc( void * ptr, size_t size );§\indexc{realloc}§
    6603 void * memalign( size_t align, size_t size );§\indexc{memalign}§
    6604 int posix_memalign( void ** ptr, size_t align, size_t size );§\indexc{posix_memalign}§
    6605 
    6606 // C unsafe initialization/copy
    6607 void * memset( void * dest, int c, size_t size );
    6608 void * memcpy( void * dest, const void * src, size_t size );
    6609 }
     6696        // C unsafe allocation
     6697        void * malloc( size_t size );§\indexc{malloc}§
     6698        void * calloc( size_t dim, size_t size );§\indexc{calloc}§
     6699        void * realloc( void * ptr, size_t size );§\indexc{realloc}§
     6700        void * memalign( size_t align, size_t size );§\indexc{memalign}§
     6701        void * aligned_alloc( size_t align, size_t size );§\indexc{aligned_alloc}§
     6702        int posix_memalign( void ** ptr, size_t align, size_t size );§\indexc{posix_memalign}§
     6703        void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize );§\indexc{cmemalign}§ // CFA
     6704
     6705        // C unsafe initialization/copy
     6706        void * memset( void * dest, int c, size_t size );§\indexc{memset}§
     6707        void * memcpy( void * dest, const void * src, size_t size );§\indexc{memcpy}§
     6708}
     6709
     6710void * realloc( void * oaddr, size_t nalign, size_t size ); // CFA heap
    66106711
    66116712forall( dtype T | sized(T) ) {
    6612 // §\CFA§ safe equivalents, i.e., implicit size specification
     6713        // §\CFA§ safe equivalents, i.e., implicit size specification
    66136714        T * malloc( void );
    66146715        T * calloc( size_t dim );
    66156716        T * realloc( T * ptr, size_t size );
    66166717        T * memalign( size_t align );
     6718        T * cmemalign( size_t align, size_t dim  );
    66176719        T * aligned_alloc( size_t align );
    66186720        int posix_memalign( T ** ptr, size_t align );
    66196721
    6620 // §\CFA§ safe general allocation, fill, resize, array
     6722        // §\CFA§ safe general allocation, fill, resize, alignment, array
    66216723        T * alloc( void );§\indexc{alloc}§
    6622         T * alloc( char fill );
    66236724        T * alloc( size_t dim );
    6624         T * alloc( size_t dim, char fill );
    66256725        T * alloc( T ptr[], size_t dim );
    6626         T * alloc( T ptr[], size_t dim, char fill );
    6627 
    6628 // §\CFA§ safe general allocation, align, fill, array
    6629         T * align_alloc( size_t align );
    6630         T * align_alloc( size_t align, char fill );
    6631         T * align_alloc( size_t align, size_t dim );
    6632         T * align_alloc( size_t align, size_t dim, char fill );
    6633 
    6634 // §\CFA§ safe initialization/copy, i.e., implicit size specification
    6635         T * memset( T * dest, char c );§\indexc{memset}§
     6726        T * alloc_set( char fill );§\indexc{alloc_set}§
     6727        T * alloc_set( T fill );
     6728        T * alloc_set( size_t dim, char fill );
     6729        T * alloc_set( size_t dim, T fill );
     6730        T * alloc_set( size_t dim, const T fill[] );
     6731        T * alloc_set( T ptr[], size_t dim, char fill );
     6732
     6733        T * alloc_align( size_t align );
     6734        T * alloc_align( size_t align, size_t dim );
     6735        T * alloc_align( T ptr[], size_t align ); // aligned realloc array
     6736        T * alloc_align( T ptr[], size_t align, size_t dim ); // aligned realloc array
     6737        T * alloc_align_set( size_t align, char fill );
     6738        T * alloc_align_set( size_t align, T fill );
     6739        T * alloc_align_set( size_t align, size_t dim, char fill );
     6740        T * alloc_align_set( size_t align, size_t dim, T fill );
     6741        T * alloc_align_set( size_t align, size_t dim, const T fill[] );
     6742        T * alloc_align_set( T ptr[], size_t align, size_t dim, char fill );
     6743
     6744        // §\CFA§ safe initialization/copy, i.e., implicit size specification
     6745        T * memset( T * dest, char fill );§\indexc{memset}§
    66366746        T * memcpy( T * dest, const T * src );§\indexc{memcpy}§
    66376747
    6638 // §\CFA§ safe initialization/copy array
    6639         T * amemset( T dest[], char c, size_t dim );
     6748        // §\CFA§ safe initialization/copy, i.e., implicit size specification, array types
     6749        T * amemset( T dest[], char fill, size_t dim );
    66406750        T * amemcpy( T dest[], const T src[], size_t dim );
    66416751}
    66426752
    6643 // §\CFA§ allocation/deallocation and constructor/destructor
    6644 forall( dtype T | sized(T), ttype Params | { void ?{}( T *, Params ); } ) T * new( Params p );§\indexc{new}§
    6645 forall( dtype T | { void ^?{}( T * ); } ) void delete( T * ptr );§\indexc{delete}§
    6646 forall( dtype T, ttype Params | { void ^?{}( T * ); void delete( Params ); } )
     6753// §\CFA§ allocation/deallocation and constructor/destructor, non-array types
     6754forall( dtype T | sized(T), ttype Params | { void ?{}( T &, Params ); } ) T * new( Params p );§\indexc{new}§
     6755forall( dtype T | sized(T) | { void ^?{}( T & ); } ) void delete( T * ptr );§\indexc{delete}§
     6756forall( dtype T, ttype Params | sized(T) | { void ^?{}( T & ); void delete( Params ); } )
    66476757  void delete( T * ptr, Params rest );
    66486758
    6649 // §\CFA§ allocation/deallocation and constructor/destructor, array
    6650 forall( dtype T | sized(T), ttype Params | { void ?{}( T *, Params ); } ) T * anew( size_t dim, Params p );§\indexc{anew}§
    6651 forall( dtype T | sized(T) | { void ^?{}( T * ); } ) void adelete( size_t dim, T arr[] );§\indexc{adelete}§
    6652 forall( dtype T | sized(T) | { void ^?{}( T * ); }, ttype Params | { void adelete( Params ); } )
     6759// §\CFA§ allocation/deallocation and constructor/destructor, array types
     6760forall( dtype T | sized(T), ttype Params | { void ?{}( T &, Params ); } ) T * anew( size_t dim, Params p );§\indexc{anew}§
     6761forall( dtype T | sized(T) | { void ^?{}( T & ); } ) void adelete( size_t dim, T arr[] );§\indexc{adelete}§
     6762forall( dtype T | sized(T) | { void ^?{}( T & ); }, ttype Params | { void adelete( Params ); } )
    66536763  void adelete( size_t dim, T arr[], Params rest );
    66546764\end{cfa}
  • driver/cfa.cc

    rb7d6a36 r6a490b2  
    385385        } // if
    386386
     387        string preludedir;
    387388        switch(path) {
    388         case Installed   : Putenv( argv, "--prelude-dir=" + libdir ); break;
    389         case BuildTree   : Putenv( argv, "--prelude-dir=" + libdir + "/prelude" ); break;
    390         case Distributed : Putenv( argv, "--prelude-dir=" + dir(argv[0]) ); break;
     389        case Installed   : preludedir = libdir; break;
     390        case BuildTree   : preludedir = libdir + "/prelude"; break;
     391        case Distributed : preludedir = dir(argv[0]); break;
    391392        }
     393
     394        Putenv( argv, "--prelude-dir=" + preludedir );
     395        args[nargs++] = "-include";
     396        args[nargs++] = (*new string(preludedir + "/defines.hfa")).c_str();
    392397
    393398        for ( int i = 0; i < nlibs; i += 1 ) {                          // copy non-user libraries after all user libraries
  • libcfa/Makefile.in

    rb7d6a36 r6a490b2  
    106106 configure.lineno config.status.lineno
    107107mkinstalldirs = $(install_sh) -d
     108CONFIG_HEADER = $(top_builddir)/prelude/defines.hfa
    108109CONFIG_CLEAN_FILES =
    109110CONFIG_CLEAN_VPATH_FILES =
  • libcfa/configure

    rb7d6a36 r6a490b2  
    790790enable_distcc
    791791with_cfa_name
     792enable_static
    792793enable_shared
    793 enable_static
    794794with_pic
    795795enable_fast_install
     
    14521452  --disable-silent-rules  verbose build output (undo: "make V=0")
    14531453  --enable-distcc     whether or not to enable distributed compilation
     1454  --enable-static[=PKGS]  build static libraries [default=no]
    14541455  --enable-shared[=PKGS]  build shared libraries [default=yes]
    1455   --enable-static[=PKGS]  build static libraries [default=yes]
    14561456  --enable-fast-install[=PKGS]
    14571457                          optimize for fast installation [default=yes]
     
    19601960
    19611961} # ac_fn_cxx_try_link
     1962
     1963# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
     1964# -------------------------------------------------------
     1965# Tests whether HEADER exists, giving a warning if it cannot be compiled using
     1966# the include files in INCLUDES and setting the cache variable VAR
     1967# accordingly.
     1968ac_fn_c_check_header_mongrel ()
     1969{
     1970  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
     1971  if eval \${$3+:} false; then :
     1972  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
     1973$as_echo_n "checking for $2... " >&6; }
     1974if eval \${$3+:} false; then :
     1975  $as_echo_n "(cached) " >&6
     1976fi
     1977eval ac_res=\$$3
     1978               { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
     1979$as_echo "$ac_res" >&6; }
     1980else
     1981  # Is the header compilable?
     1982{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
     1983$as_echo_n "checking $2 usability... " >&6; }
     1984cat confdefs.h - <<_ACEOF >conftest.$ac_ext
     1985/* end confdefs.h.  */
     1986$4
     1987#include <$2>
     1988_ACEOF
     1989if ac_fn_c_try_compile "$LINENO"; then :
     1990  ac_header_compiler=yes
     1991else
     1992  ac_header_compiler=no
     1993fi
     1994rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
     1995{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
     1996$as_echo "$ac_header_compiler" >&6; }
     1997
     1998# Is the header present?
     1999{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
     2000$as_echo_n "checking $2 presence... " >&6; }
     2001cat confdefs.h - <<_ACEOF >conftest.$ac_ext
     2002/* end confdefs.h.  */
     2003#include <$2>
     2004_ACEOF
     2005if ac_fn_c_try_cpp "$LINENO"; then :
     2006  ac_header_preproc=yes
     2007else
     2008  ac_header_preproc=no
     2009fi
     2010rm -f conftest.err conftest.i conftest.$ac_ext
     2011{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
     2012$as_echo "$ac_header_preproc" >&6; }
     2013
     2014# So?  What about this header?
     2015case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
     2016  yes:no: )
     2017    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
     2018$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
     2019    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
     2020$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
     2021    ;;
     2022  no:yes:* )
     2023    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
     2024$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
     2025    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     check for missing prerequisite headers?" >&5
     2026$as_echo "$as_me: WARNING: $2:     check for missing prerequisite headers?" >&2;}
     2027    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
     2028$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
     2029    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&5
     2030$as_echo "$as_me: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&2;}
     2031    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
     2032$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
     2033( $as_echo "## --------------------------------------- ##
     2034## Report this to cforall@plg.uwaterloo.ca ##
     2035## --------------------------------------- ##"
     2036     ) | sed "s/^/$as_me: WARNING:     /" >&2
     2037    ;;
     2038esac
     2039  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
     2040$as_echo_n "checking for $2... " >&6; }
     2041if eval \${$3+:} false; then :
     2042  $as_echo_n "(cached) " >&6
     2043else
     2044  eval "$3=\$ac_header_compiler"
     2045fi
     2046eval ac_res=\$$3
     2047               { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
     2048$as_echo "$ac_res" >&6; }
     2049fi
     2050  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
     2051
     2052} # ac_fn_c_check_header_mongrel
    19622053cat >config.log <<_ACEOF
    19632054This file contains any messages produced by compilers while
     
    79398030
    79408031# Set options
     8032# Check whether --enable-static was given.
     8033if test "${enable_static+set}" = set; then :
     8034  enableval=$enable_static; p=${PACKAGE-default}
     8035    case $enableval in
     8036    yes) enable_static=yes ;;
     8037    no) enable_static=no ;;
     8038    *)
     8039     enable_static=no
     8040      # Look at the argument we got.  We use all the common list separators.
     8041      lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR,
     8042      for pkg in $enableval; do
     8043        IFS=$lt_save_ifs
     8044        if test "X$pkg" = "X$p"; then
     8045          enable_static=yes
     8046        fi
     8047      done
     8048      IFS=$lt_save_ifs
     8049      ;;
     8050    esac
     8051else
     8052  enable_static=no
     8053fi
     8054
     8055
     8056
     8057
     8058
     8059
     8060
    79418061
    79428062
     
    79718091fi
    79728092
    7973 
    7974 
    7975 
    7976 
    7977 
    7978 
    7979 
    7980 
    7981   # Check whether --enable-static was given.
    7982 if test "${enable_static+set}" = set; then :
    7983   enableval=$enable_static; p=${PACKAGE-default}
    7984     case $enableval in
    7985     yes) enable_static=yes ;;
    7986     no) enable_static=no ;;
    7987     *)
    7988      enable_static=no
    7989       # Look at the argument we got.  We use all the common list separators.
    7990       lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR,
    7991       for pkg in $enableval; do
    7992         IFS=$lt_save_ifs
    7993         if test "X$pkg" = "X$p"; then
    7994           enable_static=yes
    7995         fi
    7996       done
    7997       IFS=$lt_save_ifs
    7998       ;;
    7999     esac
    8000 else
    8001   enable_static=yes
    8002 fi
    80038093
    80048094
     
    1685916949
    1686016950
     16951for ac_header in linux/io_uring.h
     16952do :
     16953  ac_fn_c_check_header_mongrel "$LINENO" "linux/io_uring.h" "ac_cv_header_linux_io_uring_h" "$ac_includes_default"
     16954if test "x$ac_cv_header_linux_io_uring_h" = xyes; then :
     16955  cat >>confdefs.h <<_ACEOF
     16956#define HAVE_LINUX_IO_URING_H 1
     16957_ACEOF
     16958
     16959fi
     16960
     16961done
     16962
     16963for ac_func in preadv2 pwritev2
     16964do :
     16965  as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
     16966ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
     16967if eval test \"x\$"$as_ac_var"\" = x"yes"; then :
     16968  cat >>confdefs.h <<_ACEOF
     16969#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1
     16970_ACEOF
     16971
     16972fi
     16973done
     16974
     16975
    1686116976ac_config_files="$ac_config_files Makefile src/Makefile prelude/Makefile"
     16977
     16978
     16979ac_config_headers="$ac_config_headers prelude/defines.hfa"
    1686216980
    1686316981
     
    1695217070test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
    1695317071
    16954 # Transform confdefs.h into DEFS.
    16955 # Protect against shell expansion while executing Makefile rules.
    16956 # Protect against Makefile macro expansion.
    16957 #
    16958 # If the first sed substitution is executed (which looks for macros that
    16959 # take arguments), then branch to the quote section.  Otherwise,
    16960 # look for a macro that doesn't take arguments.
    16961 ac_script='
    16962 :mline
    16963 /\\$/{
    16964  N
    16965  s,\\\n,,
    16966  b mline
    16967 }
    16968 t clear
    16969 :clear
    16970 s/^[     ]*#[    ]*define[       ][      ]*\([^  (][^    (]*([^)]*)\)[   ]*\(.*\)/-D\1=\2/g
    16971 t quote
    16972 s/^[     ]*#[    ]*define[       ][      ]*\([^  ][^     ]*\)[   ]*\(.*\)/-D\1=\2/g
    16973 t quote
    16974 b any
    16975 :quote
    16976 s/[      `~#$^&*(){}\\|;'\''"<>?]/\\&/g
    16977 s/\[/\\&/g
    16978 s/\]/\\&/g
    16979 s/\$/$$/g
    16980 H
    16981 :any
    16982 ${
    16983         g
    16984         s/^\n//
    16985         s/\n/ /g
    16986         p
    16987 }
    16988 '
    16989 DEFS=`sed -n "$ac_script" confdefs.h`
    16990 
     17072DEFS=-DHAVE_CONFIG_H
    1699117073
    1699217074ac_libobjs=
     
    1746617548esac
    1746717549
     17550case $ac_config_headers in *"
     17551"*) set x $ac_config_headers; shift; ac_config_headers=$*;;
     17552esac
    1746817553
    1746917554
     
    1747117556# Files that config.status was made for.
    1747217557config_files="$ac_config_files"
     17558config_headers="$ac_config_headers"
    1747317559config_commands="$ac_config_commands"
    1747417560
     
    1749217578      --file=FILE[:TEMPLATE]
    1749317579                   instantiate the configuration file FILE
     17580      --header=FILE[:TEMPLATE]
     17581                   instantiate the configuration header FILE
    1749417582
    1749517583Configuration files:
    1749617584$config_files
     17585
     17586Configuration headers:
     17587$config_headers
    1749717588
    1749817589Configuration commands:
     
    1756217653    as_fn_append CONFIG_FILES " '$ac_optarg'"
    1756317654    ac_need_defaults=false;;
    17564   --he | --h |  --help | --hel | -h )
     17655  --header | --heade | --head | --hea )
     17656    $ac_shift
     17657    case $ac_optarg in
     17658    *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
     17659    esac
     17660    as_fn_append CONFIG_HEADERS " '$ac_optarg'"
     17661    ac_need_defaults=false;;
     17662  --he | --h)
     17663    # Conflict between --help and --header
     17664    as_fn_error $? "ambiguous option: \`$1'
     17665Try \`$0 --help' for more information.";;
     17666  --help | --hel | -h )
    1756517667    $as_echo "$ac_cs_usage"; exit ;;
    1756617668  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
     
    1762517727macro_version='`$ECHO "$macro_version" | $SED "$delay_single_quote_subst"`'
    1762617728macro_revision='`$ECHO "$macro_revision" | $SED "$delay_single_quote_subst"`'
     17729enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`'
    1762717730enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`'
    17628 enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`'
    1762917731pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`'
    1763017732enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`'
     
    1800918111    "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;;
    1801018112    "prelude/Makefile") CONFIG_FILES="$CONFIG_FILES prelude/Makefile" ;;
     18113    "prelude/defines.hfa") CONFIG_HEADERS="$CONFIG_HEADERS prelude/defines.hfa" ;;
    1801118114
    1801218115  *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
     
    1802118124if $ac_need_defaults; then
    1802218125  test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
     18126  test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
    1802318127  test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands
    1802418128fi
     
    1820918313fi # test -n "$CONFIG_FILES"
    1821018314
    18211 
    18212 eval set X "  :F $CONFIG_FILES      :C $CONFIG_COMMANDS"
     18315# Set up the scripts for CONFIG_HEADERS section.
     18316# No need to generate them if there are no CONFIG_HEADERS.
     18317# This happens for instance with `./config.status Makefile'.
     18318if test -n "$CONFIG_HEADERS"; then
     18319cat >"$ac_tmp/defines.awk" <<\_ACAWK ||
     18320BEGIN {
     18321_ACEOF
     18322
     18323# Transform confdefs.h into an awk script `defines.awk', embedded as
     18324# here-document in config.status, that substitutes the proper values into
     18325# config.h.in to produce config.h.
     18326
     18327# Create a delimiter string that does not exist in confdefs.h, to ease
     18328# handling of long lines.
     18329ac_delim='%!_!# '
     18330for ac_last_try in false false :; do
     18331  ac_tt=`sed -n "/$ac_delim/p" confdefs.h`
     18332  if test -z "$ac_tt"; then
     18333    break
     18334  elif $ac_last_try; then
     18335    as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5
     18336  else
     18337    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
     18338  fi
     18339done
     18340
     18341# For the awk script, D is an array of macro values keyed by name,
     18342# likewise P contains macro parameters if any.  Preserve backslash
     18343# newline sequences.
     18344
     18345ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]*
     18346sed -n '
     18347s/.\{148\}/&'"$ac_delim"'/g
     18348t rset
     18349:rset
     18350s/^[     ]*#[    ]*define[       ][      ]*/ /
     18351t def
     18352d
     18353:def
     18354s/\\$//
     18355t bsnl
     18356s/["\\]/\\&/g
     18357s/^ \('"$ac_word_re"'\)\(([^()]*)\)[     ]*\(.*\)/P["\1"]="\2"\
     18358D["\1"]=" \3"/p
     18359s/^ \('"$ac_word_re"'\)[         ]*\(.*\)/D["\1"]=" \2"/p
     18360d
     18361:bsnl
     18362s/["\\]/\\&/g
     18363s/^ \('"$ac_word_re"'\)\(([^()]*)\)[     ]*\(.*\)/P["\1"]="\2"\
     18364D["\1"]=" \3\\\\\\n"\\/p
     18365t cont
     18366s/^ \('"$ac_word_re"'\)[         ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p
     18367t cont
     18368d
     18369:cont
     18370n
     18371s/.\{148\}/&'"$ac_delim"'/g
     18372t clear
     18373:clear
     18374s/\\$//
     18375t bsnlc
     18376s/["\\]/\\&/g; s/^/"/; s/$/"/p
     18377d
     18378:bsnlc
     18379s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p
     18380b cont
     18381' <confdefs.h | sed '
     18382s/'"$ac_delim"'/"\\\
     18383"/g' >>$CONFIG_STATUS || ac_write_fail=1
     18384
     18385cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
     18386  for (key in D) D_is_set[key] = 1
     18387  FS = ""
     18388}
     18389/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ {
     18390  line = \$ 0
     18391  split(line, arg, " ")
     18392  if (arg[1] == "#") {
     18393    defundef = arg[2]
     18394    mac1 = arg[3]
     18395  } else {
     18396    defundef = substr(arg[1], 2)
     18397    mac1 = arg[2]
     18398  }
     18399  split(mac1, mac2, "(") #)
     18400  macro = mac2[1]
     18401  prefix = substr(line, 1, index(line, defundef) - 1)
     18402  if (D_is_set[macro]) {
     18403    # Preserve the white space surrounding the "#".
     18404    print prefix "define", macro P[macro] D[macro]
     18405    next
     18406  } else {
     18407    # Replace #undef with comments.  This is necessary, for example,
     18408    # in the case of _POSIX_SOURCE, which is predefined and required
     18409    # on some systems where configure will not decide to define it.
     18410    if (defundef == "undef") {
     18411      print "/*", prefix defundef, macro, "*/"
     18412      next
     18413    }
     18414  }
     18415}
     18416{ print }
     18417_ACAWK
     18418_ACEOF
     18419cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
     18420  as_fn_error $? "could not setup config headers machinery" "$LINENO" 5
     18421fi # test -n "$CONFIG_HEADERS"
     18422
     18423
     18424eval set X "  :F $CONFIG_FILES  :H $CONFIG_HEADERS    :C $CONFIG_COMMANDS"
    1821318425shift
    1821418426for ac_tag
     
    1842918641  || as_fn_error $? "could not create $ac_file" "$LINENO" 5
    1843018642 ;;
    18431 
     18643  :H)
     18644  #
     18645  # CONFIG_HEADER
     18646  #
     18647  if test x"$ac_file" != x-; then
     18648    {
     18649      $as_echo "/* $configure_input  */" \
     18650      && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs"
     18651    } >"$ac_tmp/config.h" \
     18652      || as_fn_error $? "could not create $ac_file" "$LINENO" 5
     18653    if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then
     18654      { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5
     18655$as_echo "$as_me: $ac_file is unchanged" >&6;}
     18656    else
     18657      rm -f "$ac_file"
     18658      mv "$ac_tmp/config.h" "$ac_file" \
     18659        || as_fn_error $? "could not create $ac_file" "$LINENO" 5
     18660    fi
     18661  else
     18662    $as_echo "/* $configure_input  */" \
     18663      && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \
     18664      || as_fn_error $? "could not create -" "$LINENO" 5
     18665  fi
     18666# Compute "$ac_file"'s index in $config_headers.
     18667_am_arg="$ac_file"
     18668_am_stamp_count=1
     18669for _am_header in $config_headers :; do
     18670  case $_am_header in
     18671    $_am_arg | $_am_arg:* )
     18672      break ;;
     18673    * )
     18674      _am_stamp_count=`expr $_am_stamp_count + 1` ;;
     18675  esac
     18676done
     18677echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" ||
     18678$as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
     18679         X"$_am_arg" : 'X\(//\)[^/]' \| \
     18680         X"$_am_arg" : 'X\(//\)$' \| \
     18681         X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null ||
     18682$as_echo X"$_am_arg" |
     18683    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
     18684            s//\1/
     18685            q
     18686          }
     18687          /^X\(\/\/\)[^/].*/{
     18688            s//\1/
     18689            q
     18690          }
     18691          /^X\(\/\/\)$/{
     18692            s//\1/
     18693            q
     18694          }
     18695          /^X\(\/\).*/{
     18696            s//\1/
     18697            q
     18698          }
     18699          s/.*/./; q'`/stamp-h$_am_stamp_count
     18700 ;;
    1843218701
    1843318702  :C)  { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5
     
    1858718856macro_revision=$macro_revision
    1858818857
     18858# Whether or not to build static libraries.
     18859build_old_libs=$enable_static
     18860
    1858918861# Whether or not to build shared libraries.
    1859018862build_libtool_libs=$enable_shared
    18591 
    18592 # Whether or not to build static libraries.
    18593 build_old_libs=$enable_static
    1859418863
    1859518864# What type of objects to build.
  • libcfa/configure.ac

    rb7d6a36 r6a490b2  
    109109
    110110# Checks for programs.
    111 LT_INIT
     111LT_INIT([disable-static])
    112112
    113113AC_PROG_CXX
     
    118118AC_PROG_MAKE_SET
    119119
     120AC_CHECK_HEADERS([linux/io_uring.h])
     121AC_CHECK_FUNCS([preadv2 pwritev2])
     122
    120123AC_CONFIG_FILES([
    121124        Makefile
     
    124127        ])
    125128
     129AC_CONFIG_HEADERS(prelude/defines.hfa)
     130
    126131AC_OUTPUT()
    127132
  • libcfa/prelude/Makefile.am

    rb7d6a36 r6a490b2  
    2121# put into lib for now
    2222cfalibdir = ${CFA_LIBDIR}
    23 cfalib_DATA = gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c
     23cfalib_DATA = gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c defines.hfa
    2424
    2525CC = @LOCAL_CFACC@
  • libcfa/prelude/Makefile.in

    rb7d6a36 r6a490b2  
    1 # Makefile.in generated by automake 1.16.1 from Makefile.am.
     1# Makefile.in generated by automake 1.15 from Makefile.am.
    22# @configure_input@
    33
    4 # Copyright (C) 1994-2018 Free Software Foundation, Inc.
     4# Copyright (C) 1994-2014 Free Software Foundation, Inc.
    55
    66# This Makefile.in is free software; the Free Software Foundation
     
    104104DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
    105105mkinstalldirs = $(install_sh) -d
     106CONFIG_HEADER = defines.hfa
    106107CONFIG_CLEAN_FILES =
    107108CONFIG_CLEAN_VPATH_FILES =
     
    154155am__installdirs = "$(DESTDIR)$(cfalibdir)"
    155156DATA = $(cfalib_DATA)
    156 am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
    157 am__DIST_COMMON = $(srcdir)/Makefile.in
     157am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) \
     158        $(LISP)defines.hfa.in
     159# Read a list of newline-separated strings from the standard input,
     160# and print each of them once, without duplicates.  Input order is
     161# *not* preserved.
     162am__uniquify_input = $(AWK) '\
     163  BEGIN { nonempty = 0; } \
     164  { items[$$0] = 1; nonempty = 1; } \
     165  END { if (nonempty) { for (i in items) print i; }; } \
     166'
     167# Make sure the list of sources is unique.  This is necessary because,
     168# e.g., the same source file might be shared among _SOURCES variables
     169# for different programs/libraries.
     170am__define_uniq_tagged_files = \
     171  list='$(am__tagged_files)'; \
     172  unique=`for i in $$list; do \
     173    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
     174  done | $(am__uniquify_input)`
     175ETAGS = etags
     176CTAGS = ctags
     177am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/defines.hfa.in
    158178DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
    159179ACLOCAL = @ACLOCAL@
     
    306326# put into lib for now
    307327cfalibdir = ${CFA_LIBDIR}
    308 cfalib_DATA = gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c
     328cfalib_DATA = gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c defines.hfa
    309329AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC @ARCH_FLAGS@ @CONFIG_CFLAGS@
    310330AM_CFAFLAGS = @CONFIG_CFAFLAGS@
    311331MOSTLYCLEANFILES = bootloader.c builtins.cf extras.cf gcc-builtins.c gcc-builtins.cf prelude.cfa
    312332MAINTAINERCLEANFILES = ${addprefix ${libdir}/,${cfalib_DATA}} ${addprefix ${libdir}/,${lib_LIBRARIES}}
    313 all: all-am
     333all: defines.hfa
     334        $(MAKE) $(AM_MAKEFLAGS) all-am
    314335
    315336.SUFFIXES:
     
    331352            cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
    332353          *) \
    333             echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
    334             cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
     354            echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
     355            cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
    335356        esac;
    336357
     
    343364        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
    344365$(am__aclocal_m4_deps):
     366
     367defines.hfa: stamp-h1
     368        @test -f $@ || rm -f stamp-h1
     369        @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) stamp-h1
     370
     371stamp-h1: $(srcdir)/defines.hfa.in $(top_builddir)/config.status
     372        @rm -f stamp-h1
     373        cd $(top_builddir) && $(SHELL) ./config.status prelude/defines.hfa
     374$(srcdir)/defines.hfa.in:  $(am__configure_deps)
     375        ($(am__cd) $(top_srcdir) && $(AUTOHEADER))
     376        rm -f stamp-h1
     377        touch $@
     378
     379distclean-hdr:
     380        -rm -f defines.hfa stamp-h1
    345381
    346382mostlyclean-libtool:
     
    370406        files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
    371407        dir='$(DESTDIR)$(cfalibdir)'; $(am__uninstall_files_from_dir)
    372 tags TAGS:
    373 
    374 ctags CTAGS:
    375 
    376 cscope cscopelist:
    377 
    378 
    379 distdir: $(BUILT_SOURCES)
    380         $(MAKE) $(AM_MAKEFLAGS) distdir-am
    381 
    382 distdir-am: $(DISTFILES)
     408
     409ID: $(am__tagged_files)
     410        $(am__define_uniq_tagged_files); mkid -fID $$unique
     411tags: tags-am
     412TAGS: tags
     413
     414tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
     415        set x; \
     416        here=`pwd`; \
     417        $(am__define_uniq_tagged_files); \
     418        shift; \
     419        if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
     420          test -n "$$unique" || unique=$$empty_fix; \
     421          if test $$# -gt 0; then \
     422            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
     423              "$$@" $$unique; \
     424          else \
     425            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
     426              $$unique; \
     427          fi; \
     428        fi
     429ctags: ctags-am
     430
     431CTAGS: ctags
     432ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
     433        $(am__define_uniq_tagged_files); \
     434        test -z "$(CTAGS_ARGS)$$unique" \
     435          || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
     436             $$unique
     437
     438GTAGS:
     439        here=`$(am__cd) $(top_builddir) && pwd` \
     440          && $(am__cd) $(top_srcdir) \
     441          && gtags -i $(GTAGS_ARGS) "$$here"
     442cscopelist: cscopelist-am
     443
     444cscopelist-am: $(am__tagged_files)
     445        list='$(am__tagged_files)'; \
     446        case "$(srcdir)" in \
     447          [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
     448          *) sdir=$(subdir)/$(srcdir) ;; \
     449        esac; \
     450        for i in $$list; do \
     451          if test -f "$$i"; then \
     452            echo "$(subdir)/$$i"; \
     453          else \
     454            echo "$$sdir/$$i"; \
     455          fi; \
     456        done >> $(top_builddir)/cscope.files
     457
     458distclean-tags:
     459        -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
     460
     461distdir: $(DISTFILES)
    383462        @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
    384463        topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
     
    412491check-am: all-am
    413492check: check-am
    414 all-am: Makefile $(DATA)
     493all-am: Makefile $(DATA) defines.hfa
    415494installdirs:
    416495        for dir in "$(DESTDIR)$(cfalibdir)"; do \
     
    455534distclean: distclean-am
    456535        -rm -f Makefile
    457 distclean-am: clean-am distclean-generic
     536distclean-am: clean-am distclean-generic distclean-hdr distclean-tags
    458537
    459538dvi: dvi-am
     
    516595uninstall-am: uninstall-cfalibDATA
    517596
    518 .MAKE: install-am install-strip
    519 
    520 .PHONY: all all-am check check-am clean clean-generic clean-libtool \
    521         cscopelist-am ctags-am distclean distclean-generic \
    522         distclean-libtool distdir dvi dvi-am html html-am info info-am \
     597.MAKE: all install-am install-strip
     598
     599.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
     600        clean-libtool cscopelist-am ctags ctags-am distclean \
     601        distclean-generic distclean-hdr distclean-libtool \
     602        distclean-tags distdir dvi dvi-am html html-am info info-am \
    523603        install install-am install-cfalibDATA install-data \
    524604        install-data-am install-dvi install-dvi-am install-exec \
     
    529609        maintainer-clean-generic maintainer-clean-local mostlyclean \
    530610        mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
    531         tags-am uninstall uninstall-am uninstall-cfalibDATA
     611        tags tags-am uninstall uninstall-am uninstall-cfalibDATA
    532612
    533613.PRECIOUS: Makefile
  • libcfa/prelude/builtins.c

    rb7d6a36 r6a490b2  
    4848void exit( int status, const char fmt[], ... ) __attribute__ (( format(printf, 2, 3), __nothrow__, __leaf__, __noreturn__ ));
    4949void abort( const char fmt[], ... ) __attribute__ (( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ ));
     50
     51forall(dtype T)
     52static inline T & identity(T & i) {
     53        return i;
     54}
     55
     56// generator support
     57struct $generator {
     58        inline int;
     59};
     60
     61static inline void  ?{}($generator & this) { ((int&)this) = 0; }
     62static inline void ^?{}($generator &) {}
     63
     64trait is_generator(dtype T) {
     65      void main(T & this);
     66      $generator * get_generator(T & this);
     67};
     68
     69forall(dtype T | is_generator(T))
     70static inline T & resume(T & gen) {
     71        main(gen);
     72        return gen;
     73}
    5074
    5175// implicit increment, decrement if += defined, and implicit not if != defined
  • libcfa/src/Makefile.am

    rb7d6a36 r6a490b2  
    1111## Created On       : Sun May 31 08:54:01 2015
    1212## Last Modified By : Peter A. Buhr
    13 ## Last Modified On : Mon Jul 15 22:43:27 2019
    14 ## Update Count     : 241
     13## Last Modified On : Mon Mar 16 18:07:59 2020
     14## Update Count     : 242
    1515###############################################################################
    1616
     
    3333# The built sources must not depend on the installed headers
    3434AM_CFAFLAGS = -quiet -cfalib -I$(srcdir)/stdhdr $(if $(findstring ${gdbwaittarget}, ${@}), -XCFA --gdb) @CONFIG_CFAFLAGS@
    35 AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@
     35AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC -fexceptions -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@
    3636AM_CCASFLAGS = -g -Wall -Wno-unused-function @ARCH_FLAGS@ @CONFIG_CFLAGS@
    3737CFACC = @CFACC@
     
    3939#----------------------------------------------------------------------------------------------------------------
    4040if BUILDLIB
    41 headers_nosrc = math.hfa gmp.hfa time_t.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa
     41headers_nosrc = bitmanip.hfa math.hfa gmp.hfa time_t.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa containers/list.hfa
    4242headers = fstream.hfa iostream.hfa iterator.hfa limits.hfa rational.hfa time.hfa stdlib.hfa common.hfa \
    4343          containers/maybe.hfa containers/pair.hfa containers/result.hfa containers/vector.hfa
     
    4848thread_headers_nosrc = concurrency/invoke.h
    4949thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa
    50 thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa}
     50thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/io.cfa concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa}
    5151else
    5252headers =
  • libcfa/src/Makefile.in

    rb7d6a36 r6a490b2  
    105105        $(am__nobase_cfa_include_HEADERS_DIST) $(am__DIST_COMMON)
    106106mkinstalldirs = $(install_sh) -d
     107CONFIG_HEADER = $(top_builddir)/prelude/defines.hfa
    107108CONFIG_CLEAN_FILES =
    108109CONFIG_CLEAN_VPATH_FILES =
     
    164165am__libcfathread_la_SOURCES_DIST =  \
    165166        concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa \
    166         concurrency/invoke.c concurrency/preemption.cfa \
    167         concurrency/ready_queue.cfa concurrency/coroutine.cfa \
    168         concurrency/thread.cfa concurrency/kernel.cfa \
    169         concurrency/monitor.cfa concurrency/mutex.cfa
     167        concurrency/invoke.c concurrency/io.cfa \
     168        concurrency/preemption.cfa concurrency/ready_queue.cfa \
     169        concurrency/coroutine.cfa concurrency/thread.cfa \
     170        concurrency/kernel.cfa concurrency/monitor.cfa \
     171        concurrency/mutex.cfa
    170172@BUILDLIB_TRUE@am__objects_3 = concurrency/coroutine.lo \
    171173@BUILDLIB_TRUE@ concurrency/thread.lo concurrency/kernel.lo \
     
    174176@BUILDLIB_TRUE@ concurrency/CtxSwitch-@ARCHITECTURE@.lo \
    175177@BUILDLIB_TRUE@ concurrency/alarm.lo concurrency/invoke.lo \
    176 @BUILDLIB_TRUE@ concurrency/preemption.lo \
     178@BUILDLIB_TRUE@ concurrency/io.lo concurrency/preemption.lo \
    177179@BUILDLIB_TRUE@ concurrency/ready_queue.lo $(am__objects_3)
    178180am_libcfathread_la_OBJECTS = $(am__objects_4)
     
    194196am__v_at_0 = @
    195197am__v_at_1 =
    196 DEFAULT_INCLUDES = -I.@am__isrc@
     198DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/prelude
    197199depcomp = $(SHELL) $(top_srcdir)/automake/depcomp
    198200am__depfiles_maybe = depfiles
     
    238240        limits.hfa rational.hfa time.hfa stdlib.hfa common.hfa \
    239241        containers/maybe.hfa containers/pair.hfa containers/result.hfa \
    240         containers/vector.hfa math.hfa gmp.hfa time_t.hfa \
     242        containers/vector.hfa bitmanip.hfa math.hfa gmp.hfa time_t.hfa \
    241243        bits/align.hfa bits/containers.hfa bits/defs.hfa \
    242         bits/debug.hfa bits/locks.hfa concurrency/coroutine.hfa \
    243         concurrency/thread.hfa concurrency/kernel.hfa \
    244         concurrency/monitor.hfa concurrency/mutex.hfa \
    245         concurrency/invoke.h
     244        bits/debug.hfa bits/locks.hfa containers/list.hfa \
     245        concurrency/coroutine.hfa concurrency/thread.hfa \
     246        concurrency/kernel.hfa concurrency/monitor.hfa \
     247        concurrency/mutex.hfa concurrency/invoke.h
    246248HEADERS = $(nobase_cfa_include_HEADERS)
    247249am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
     
    433435am__v_GOC_0 = @echo "  GOC     " $@;
    434436am__v_GOC_1 =
     437AM_V_PY = $(am__v_PY_@AM_V@)
     438am__v_PY_ = $(am__v_PY_@AM_DEFAULT_V@)
     439am__v_PY_0 = @echo "  PYTHON  " $@;
     440am__v_PY_1 =
    435441AM_V_RUST = $(am__v_RUST_@AM_V@)
    436442am__v_RUST_ = $(am__v_RUST_@AM_DEFAULT_V@)
    437 am__v_RUST_0 = @echo "  RUST     " $@;
     443am__v_RUST_0 = @echo "  RUST    " $@;
    438444am__v_RUST_1 =
    439445AM_V_NODEJS = $(am__v_NODEJS_@AM_V@)
    440446am__v_NODEJS_ = $(am__v_NODEJS_@AM_DEFAULT_V@)
    441 am__v_NODEJS_0 = @echo "  NODEJS     " $@;
     447am__v_NODEJS_0 = @echo "  NODEJS  " $@;
    442448am__v_NODEJS_1 =
    443449AM_V_JAVAC = $(am__v_JAVAC_@AM_V@)
     
    453459# The built sources must not depend on the installed headers
    454460AM_CFAFLAGS = -quiet -cfalib -I$(srcdir)/stdhdr $(if $(findstring ${gdbwaittarget}, ${@}), -XCFA --gdb) @CONFIG_CFAFLAGS@
    455 AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@
     461AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC -fexceptions -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@
    456462AM_CCASFLAGS = -g -Wall -Wno-unused-function @ARCH_FLAGS@ @CONFIG_CFLAGS@
    457463@BUILDLIB_FALSE@headers_nosrc =
    458464
    459465#----------------------------------------------------------------------------------------------------------------
    460 @BUILDLIB_TRUE@headers_nosrc = math.hfa gmp.hfa time_t.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa
     466@BUILDLIB_TRUE@headers_nosrc = bitmanip.hfa math.hfa gmp.hfa time_t.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa containers/list.hfa
    461467@BUILDLIB_FALSE@headers =
    462468@BUILDLIB_TRUE@headers = fstream.hfa iostream.hfa iterator.hfa limits.hfa rational.hfa time.hfa stdlib.hfa common.hfa \
     
    471477@BUILDLIB_FALSE@thread_headers =
    472478@BUILDLIB_TRUE@thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa
    473 @BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa}
     479@BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/io.cfa concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa}
    474480
    475481#----------------------------------------------------------------------------------------------------------------
     
    605611        concurrency/$(DEPDIR)/$(am__dirstamp)
    606612concurrency/invoke.lo: concurrency/$(am__dirstamp) \
     613        concurrency/$(DEPDIR)/$(am__dirstamp)
     614concurrency/io.lo: concurrency/$(am__dirstamp) \
    607615        concurrency/$(DEPDIR)/$(am__dirstamp)
    608616concurrency/preemption.lo: concurrency/$(am__dirstamp) \
  • libcfa/src/bits/containers.hfa

    rb7d6a36 r6a490b2  
    146146        static inline forall( dtype T | is_node(T) ) {
    147147                void ?{}( __queue(T) & this ) with( this ) {
    148                         head{ 0p };
     148                        head{ 1p };
    149149                        tail{ &head };
     150                        verify(*tail == 1p);
    150151                }
    151152
    152153                void append( __queue(T) & this, T * val ) with( this ) {
    153154                        verify(tail != 0p);
     155                        verify(*tail == 1p);
    154156                        *tail = val;
    155157                        tail = &get_next( *val );
     158                        *tail = 1p;
    156159                }
    157160
    158161                T * pop_head( __queue(T) & this ) {
     162                        verify(*this.tail == 1p);
    159163                        T * head = this.head;
    160                         if( head ) {
     164                        if( head != 1p ) {
    161165                                this.head = get_next( *head );
    162                                 if( !get_next( *head ) ) {
     166                                if( get_next( *head ) == 1p ) {
    163167                                        this.tail = &this.head;
    164168                                }
    165169                                get_next( *head ) = 0p;
    166                         }
    167                         return head;
     170                                verify(*this.tail == 1p);
     171                                verify( get_next(*head) == 0p );
     172                                return head;
     173                        }
     174                        verify(*this.tail == 1p);
     175                        return 0p;
    168176                }
    169177
     
    180188                        get_next( *val ) = 0p;
    181189
    182                         verify( (head == 0p) == (&head == tail) );
    183                         verify( *tail == 0p );
     190                        verify( (head == 1p) == (&head == tail) );
     191                        verify( *tail == 1p );
    184192                        return val;
    185193                }
     
    266274                        return this.head != 0;
    267275                }
     276
     277                void move_to_front( __dllist(T) & src, __dllist(T) & dst, T & node ) {
     278                        remove    (src, node);
     279                        push_front(dst, node);
     280                }
    268281        }
    269282        #undef next
  • libcfa/src/bits/debug.hfa

    rb7d6a36 r6a490b2  
    99// Author           : Thierry Delisle
    1010// Created On       : Mon Nov 28 12:27:26 2016
    11 // Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Feb  4 12:29:21 2020
    13 // Update Count     : 9
     11// Last Modified By : Andrew Beach
     12// Last Modified On : Mon Apr 27 10:15:00 2020
     13// Update Count     : 10
    1414//
    1515
     
    2323        #define __cfaabi_dbg_ctx_param const char caller[]
    2424        #define __cfaabi_dbg_ctx_param2 , const char caller[]
     25        #define __cfaabi_dbg_ctx_fwd caller
     26        #define __cfaabi_dbg_ctx_fwd2 , caller
    2527#else
    2628        #define __cfaabi_dbg_debug_do(...)
     
    3032        #define __cfaabi_dbg_ctx_param
    3133        #define __cfaabi_dbg_ctx_param2
     34        #define __cfaabi_dbg_ctx_fwd
     35        #define __cfaabi_dbg_ctx_fwd2
    3236#endif
    3337
     
    3640#endif
    3741        #include <stdarg.h>
    38         #include <stdio.h>
    39         #include <unistd.h>
    4042
    4143        extern void __cfaabi_bits_write( int fd, const char buffer[], int len );
     
    4648        extern void __cfaabi_bits_print_vararg( int fd, const char fmt[], va_list arg );
    4749        extern void __cfaabi_bits_print_buffer( int fd, char buffer[], int buffer_size, const char fmt[], ... ) __attribute__(( format(printf, 4, 5) ));
     50
     51#if defined(__CFA_DEBUG_PRINT__) \
     52                || defined(__CFA_DEBUG_PRINT_IO__) || defined(__CFA_DEBUG_PRINT_IO_CORE__) \
     53                || defined(__CFA_DEBUG_PRINT_MONITOR__) || defined(__CFA_DEBUG_PRINT_PREEMPTION__) \
     54                || defined(__CFA_DEBUG_PRINT_RUNTIME_CORE__) || defined(__CFA_DEBUG_PRINT_EXCEPTION__)
     55        #include <stdio.h>
     56        #include <unistd.h>
     57#endif
    4858#ifdef __cforall
    4959}
    5060#endif
    5161
    52 // #define __CFA_DEBUG_PRINT__
    53 
     62// Deprecated: Use the versions with the new module names.
    5463#ifdef __CFA_DEBUG_PRINT__
    5564        #define __cfaabi_dbg_write( buffer, len )         __cfaabi_bits_write( STDERR_FILENO, buffer, len )
    5665        #define __cfaabi_dbg_acquire()                    __cfaabi_bits_acquire()
    5766        #define __cfaabi_dbg_release()                    __cfaabi_bits_release()
    58         #define __cfaabi_dbg_print_safe(...)              __cfaabi_bits_print_safe  ( STDERR_FILENO, __VA_ARGS__ )
    59         #define __cfaabi_dbg_print_nolock(...)            __cfaabi_bits_print_nolock( STDERR_FILENO, __VA_ARGS__ )
    60         #define __cfaabi_dbg_print_buffer(...)            __cfaabi_bits_print_buffer( STDERR_FILENO, __VA_ARGS__ )
     67        #define __cfaabi_dbg_print_safe(...)              __cfaabi_bits_print_safe   ( STDERR_FILENO, __VA_ARGS__ )
     68        #define __cfaabi_dbg_print_nolock(...)            __cfaabi_bits_print_nolock ( STDERR_FILENO, __VA_ARGS__ )
     69        #define __cfaabi_dbg_print_buffer(...)            __cfaabi_bits_print_buffer ( STDERR_FILENO, __VA_ARGS__ )
    6170        #define __cfaabi_dbg_print_buffer_decl(...)       char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( STDERR_FILENO, __dbg_text, __dbg_len );
    62         #define __cfaabi_dbg_print_buffer_local(...)      __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( STDERR_FILENO, __dbg_text, __dbg_len );
     71        #define __cfaabi_dbg_print_buffer_local(...)      __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_dbg_write( STDERR_FILENO, __dbg_text, __dbg_len );
    6372#else
    6473        #define __cfaabi_dbg_write(...)               ((void)0)
     
    7281#endif
    7382
     83// Debug print functions and statements:
     84// Most are wrappers around the bits printing function but are not always used.
     85// If they are used depends if the group (first argument) is active or not. The group must be one
     86// defined belowe. The other arguments depend on the wrapped function.
     87#define __cfadbg_write(group, buffer, len) \
     88        __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_write(STDERR_FILENO, buffer, len))
     89#define __cfadbg_acquire(group) \
     90        __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_acquire())
     91#define __cfadbg_release(group) \
     92        __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_release())
     93#define __cfadbg_print_safe(group, ...) \
     94        __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_print_safe(STDERR_FILENO, __VA_ARGS__))
     95#define __cfadbg_print_nolock(group, ...) \
     96        __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_print_nolock(STDERR_FILENO, __VA_ARGS__))
     97#define __cfadbg_print_buffer(group, ...) \
     98        __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_print_buffer(STDERR_FILENO, __VA_ARGS__))
     99#define __cfadbg_print_buffer_decl(group, ...) \
     100        __CFADBG_PRINT_GROUP_##group(char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( __dbg_text, __dbg_len ))
     101#define __cfadbg_print_buffer_local(group, ...) \
     102        __CFADBG_PRINT_GROUP_##group(__dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write(STDERR_FILENO, __dbg_text, __dbg_len))
     103
     104// The debug print groups:
     105#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_IO__)
     106#       define __CFADBG_PRINT_GROUP_io(...) __VA_ARGS__
     107#else
     108#       define __CFADBG_PRINT_GROUP_io(...) ((void)0)
     109#endif
     110#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_IO__) || defined(__CFA_DEBUG_PRINT_IO_CORE__)
     111#       define __CFADBG_PRINT_GROUP_io_core(...) __VA_ARGS__
     112#else
     113#       define __CFADBG_PRINT_GROUP_io_core(...) ((void)0)
     114#endif
     115#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_MONITOR__)
     116#       define __CFADBG_PRINT_GROUP_monitor(...) __VA_ARGS__
     117#else
     118#       define __CFADBG_PRINT_GROUP_monitor(...) ((void)0)
     119#endif
     120#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_PREEMPTION__)
     121#       define __CFADBG_PRINT_GROUP_preemption(...) __VA_ARGS__
     122#else
     123#       define __CFADBG_PRINT_GROUP_preemption(...) ((void)0)
     124#endif
     125#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_RUNTIME_CORE__)
     126#       define __CFADBG_PRINT_GROUP_runtime_core(...) __VA_ARGS__
     127#else
     128#       define __CFADBG_PRINT_GROUP_runtime_core(...) ((void)0)
     129#endif
     130#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
     131#       define __CFADBG_PRINT_GROUP_ready_queue(...) __VA_ARGS__
     132#else
     133#       define __CFADBG_PRINT_GROUP_ready_queue(...) ((void)0)
     134#endif
     135#if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_EXCEPTION__)
     136#       define __CFADBG_PRINT_GROUP_exception(...) __VA_ARGS__
     137#else
     138#       define __CFADBG_PRINT_GROUP_exception(...) ((void)0)
     139#endif
     140
    74141// Local Variables: //
    75142// mode: c //
  • libcfa/src/bits/locks.hfa

    rb7d6a36 r6a490b2  
    5454
    5555                #ifdef __CFA_DEBUG__
    56                         void __cfaabi_dbg_record(__spinlock_t & this, const char prev_name[]);
     56                        void __cfaabi_dbg_record_lock(__spinlock_t & this, const char prev_name[]);
    5757                #else
    58                         #define __cfaabi_dbg_record(x, y)
     58                        #define __cfaabi_dbg_record_lock(x, y)
    5959                #endif
    6060        }
    61 
    62         extern void yield( unsigned int );
    6361
    6462        static inline void ?{}( __spinlock_t & this ) {
     
    6866        // Lock the spinlock, return false if already acquired
    6967        static inline bool try_lock  ( __spinlock_t & this __cfaabi_dbg_ctx_param2 ) {
     68                disable_interrupts();
    7069                bool result = (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0);
    7170                if( result ) {
    72                         disable_interrupts();
    73                         __cfaabi_dbg_record( this, caller );
     71                        __cfaabi_dbg_record_lock( this, caller );
     72                } else {
     73                        enable_interrupts_noPoll();
    7474                }
    7575                return result;
     
    8383                #endif
    8484
     85                disable_interrupts();
    8586                for ( unsigned int i = 1;; i += 1 ) {
    8687                        if ( (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0) ) break;
     
    9899                        #endif
    99100                }
    100                 disable_interrupts();
    101                 __cfaabi_dbg_record( this, caller );
     101                __cfaabi_dbg_record_lock( this, caller );
    102102        }
    103103
    104104        static inline void unlock( __spinlock_t & this ) {
     105                __atomic_clear( &this.lock, __ATOMIC_RELEASE );
    105106                enable_interrupts_noPoll();
    106                 __atomic_clear( &this.lock, __ATOMIC_RELEASE );
    107107        }
    108108
     
    112112        #endif
    113113
     114        extern "C" {
     115                char * strerror(int);
     116        }
     117        #define CHECKED(x) { int err = x; if( err != 0 ) abort("KERNEL ERROR: Operation \"" #x "\" return error %d - %s\n", err, strerror(err)); }
     118
    114119        struct __bin_sem_t {
    115                 bool                    signaled;
    116120                pthread_mutex_t         lock;
    117121                pthread_cond_t          cond;
     122                int                     val;
    118123        };
    119124
    120125        static inline void ?{}(__bin_sem_t & this) with( this ) {
    121                 signaled = false;
    122                 pthread_mutex_init(&lock, NULL);
    123                 pthread_cond_init (&cond, NULL);
     126                // Create the mutex with error checking
     127                pthread_mutexattr_t mattr;
     128                pthread_mutexattr_init( &mattr );
     129                pthread_mutexattr_settype( &mattr, PTHREAD_MUTEX_ERRORCHECK_NP);
     130                pthread_mutex_init(&lock, &mattr);
     131
     132                pthread_cond_init (&cond, 0p);
     133                val = 0;
    124134        }
    125135
    126136        static inline void ^?{}(__bin_sem_t & this) with( this ) {
    127                 pthread_mutex_destroy(&lock);
    128                 pthread_cond_destroy (&cond);
     137                CHECKED( pthread_mutex_destroy(&lock) );
     138                CHECKED( pthread_cond_destroy (&cond) );
    129139        }
    130140
    131141        static inline void wait(__bin_sem_t & this) with( this ) {
    132142                verify(__cfaabi_dbg_in_kernel());
    133                 pthread_mutex_lock(&lock);
    134                         if(!signaled) {   // this must be a loop, not if!
     143                CHECKED( pthread_mutex_lock(&lock) );
     144                        while(val < 1) {
    135145                                pthread_cond_wait(&cond, &lock);
    136146                        }
    137                         signaled = false;
    138                 pthread_mutex_unlock(&lock);
     147                        val -= 1;
     148                CHECKED( pthread_mutex_unlock(&lock) );
    139149        }
    140150
    141         static inline void post(__bin_sem_t & this) with( this ) {
    142                 verify(__cfaabi_dbg_in_kernel());
     151        static inline bool post(__bin_sem_t & this) with( this ) {
     152                bool needs_signal = false;
    143153
    144                 pthread_mutex_lock(&lock);
    145                         bool needs_signal = !signaled;
    146                         signaled = true;
    147                 pthread_mutex_unlock(&lock);
     154                CHECKED( pthread_mutex_lock(&lock) );
     155                        if(val < 1) {
     156                                val += 1;
     157                                pthread_cond_signal(&cond);
     158                                needs_signal = true;
     159                        }
     160                CHECKED( pthread_mutex_unlock(&lock) );
    148161
    149                 if (needs_signal)
    150                         pthread_cond_signal(&cond);
     162                return needs_signal;
    151163        }
     164
     165        #undef CHECKED
    152166#endif
  • libcfa/src/bits/signal.hfa

    rb7d6a36 r6a490b2  
    5454                        sig, handler, flags, errno, strerror( errno )
    5555                );
    56                 _exit( EXIT_FAILURE );
     56                _Exit( EXIT_FAILURE );
    5757        } // if
    5858}
  • libcfa/src/concurrency/CtxSwitch-arm.S

    rb7d6a36 r6a490b2  
    1313        .text
    1414        .align  2
    15         .global CtxSwitch
    16         .type   CtxSwitch, %function
     15        .global __cfactx_switch
     16        .type   __cfactx_switch, %function
    1717
    18 CtxSwitch:
     18__cfactx_switch:
    1919        @ save callee-saved registers: r4-r8, r10, r11, r13(sp) (plus r9 depending on platform specification)
    2020        @ I've seen reference to 31 registers on 64-bit, if this is the case, more need to be saved
     
    5252        mov r15, r14
    5353        #endif // R9_SPECIAL
    54        
     54
    5555        .text
    5656        .align  2
    57         .global CtxInvokeStub
    58         .type   CtxInvokeStub, %function
     57        .global __cfactx_invoke_stub
     58        .type   __cfactx_invoke_stub, %function
    5959
    60 CtxInvokeStub:
     60__cfactx_invoke_stub:
    6161        ldmfd r13!, {r0-r1}
    6262        mov r15, r1
  • libcfa/src/concurrency/CtxSwitch-i386.S

    rb7d6a36 r6a490b2  
    4343        .text
    4444        .align 2
    45         .globl CtxSwitch
    46         .type  CtxSwitch, @function
    47 CtxSwitch:
     45        .globl __cfactx_switch
     46        .type  __cfactx_switch, @function
     47__cfactx_switch:
    4848
    4949        // Copy the "from" context argument from the stack to register eax
     
    8383
    8484        ret
    85         .size  CtxSwitch, .-CtxSwitch
     85        .size  __cfactx_switch, .-__cfactx_switch
    8686
    8787// Local Variables: //
  • libcfa/src/concurrency/CtxSwitch-x86_64.S

    rb7d6a36 r6a490b2  
    4444        .text
    4545        .align 2
    46         .globl CtxSwitch
    47         .type  CtxSwitch, @function
    48 CtxSwitch:
     46        .globl __cfactx_switch
     47        .type  __cfactx_switch, @function
     48__cfactx_switch:
    4949
    5050        // Save volatile registers on the stack.
     
    7777
    7878        ret
    79         .size  CtxSwitch, .-CtxSwitch
     79        .size  __cfactx_switch, .-__cfactx_switch
    8080
    8181//-----------------------------------------------------------------------------
     
    8383        .text
    8484        .align 2
    85         .globl CtxInvokeStub
    86         .type    CtxInvokeStub, @function
    87 CtxInvokeStub:
     85        .globl __cfactx_invoke_stub
     86        .type    __cfactx_invoke_stub, @function
     87__cfactx_invoke_stub:
    8888        movq %rbx, %rdi
    8989        movq %r12, %rsi
    9090        jmp *%r13
    91         .size  CtxInvokeStub, .-CtxInvokeStub
     91        .size  __cfactx_invoke_stub, .-__cfactx_invoke_stub
    9292
    9393// Local Variables: //
  • libcfa/src/concurrency/alarm.cfa

    rb7d6a36 r6a490b2  
    4747//=============================================================================================
    4848
    49 void ?{}( alarm_node_t & this, thread_desc * thrd, Time alarm, Duration period ) with( this ) {
     49void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period ) with( this ) {
    5050        this.thrd = thrd;
    5151        this.alarm = alarm;
    5252        this.period = period;
    53         next = 0;
    5453        set = false;
    5554        kernel_alarm = false;
     
    6059        this.alarm = alarm;
    6160        this.period = period;
    62         next = 0;
    6361        set = false;
    6462        kernel_alarm = true;
     
    7169}
    7270
    73 #if !defined(NDEBUG) && (defined(__CFA_DEBUG__) || defined(__CFA_VERIFY__))
    74 bool validate( alarm_list_t * this ) {
    75         alarm_node_t ** it = &this->head;
    76         while( (*it) ) {
    77                 it = &(*it)->next;
     71void insert( alarm_list_t * this, alarm_node_t * n ) {
     72        alarm_node_t * it = & (*this)`first;
     73        while( it && (n->alarm > it->alarm) ) {
     74                it = & (*it)`next;
     75        }
     76        if ( it ) {
     77                insert_before( *it, *n );
     78        } else {
     79                insert_last(*this, *n);
    7880        }
    7981
    80         return it == this->tail;
    81 }
    82 #endif
    83 
    84 static inline void insert_at( alarm_list_t * this, alarm_node_t * n, __alarm_it_t p ) {
    85         verify( !n->next );
    86         if( p == this->tail ) {
    87                 this->tail = &n->next;
    88         }
    89         else {
    90                 n->next = *p;
    91         }
    92         *p = n;
    93 
    94         verify( validate( this ) );
    95 }
    96 
    97 void insert( alarm_list_t * this, alarm_node_t * n ) {
    98         alarm_node_t ** it = &this->head;
    99         while( (*it) && (n->alarm > (*it)->alarm) ) {
    100                 it = &(*it)->next;
    101         }
    102 
    103         insert_at( this, n, it );
    104 
    105         verify( validate( this ) );
     82        verify( validate( *this ) );
    10683}
    10784
    10885alarm_node_t * pop( alarm_list_t * this ) {
    109         alarm_node_t * head = this->head;
     86        verify( validate( *this ) );
     87        alarm_node_t * head = & (*this)`first;
    11088        if( head ) {
    111                 this->head = head->next;
    112                 if( !head->next ) {
    113                         this->tail = &this->head;
    114                 }
    115                 head->next = 0p;
     89                remove(*head);
    11690        }
    117         verify( validate( this ) );
     91        verify( validate( *this ) );
    11892        return head;
    11993}
    12094
    121 static inline void remove_at( alarm_list_t * this, alarm_node_t * n, __alarm_it_t it ) {
    122         verify( it );
    123         verify( (*it) == n );
    124 
    125         (*it) = n->next;
    126         if( !n-> next ) {
    127                 this->tail = it;
    128         }
    129         n->next = 0p;
    130 
    131         verify( validate( this ) );
    132 }
    133 
    134 static inline void remove( alarm_list_t * this, alarm_node_t * n ) {
    135         alarm_node_t ** it = &this->head;
    136         while( (*it) && (*it) != n ) {
    137                 it = &(*it)->next;
    138         }
    139 
    140         verify( validate( this ) );
    141 
    142         if( *it ) { remove_at( this, n, it ); }
    143 
    144         verify( validate( this ) );
    145 }
    146 
    14795void register_self( alarm_node_t * this ) {
    148         alarm_list_t * alarms = &event_kernel->alarms;
     96        alarm_list_t & alarms = event_kernel->alarms;
    14997
    15098        disable_interrupts();
     
    152100        {
    153101                verify( validate( alarms ) );
    154                 bool first = !alarms->head;
     102                bool first = ! & alarms`first;
    155103
    156                 insert( alarms, this );
     104                insert( &alarms, this );
    157105                if( first ) {
    158                         __kernel_set_timer( alarms->head->alarm - __kernel_get_time() );
     106                        __kernel_set_timer( alarms`first.alarm - __kernel_get_time() );
    159107                }
    160108        }
     
    168116        lock( event_kernel->lock __cfaabi_dbg_ctx2 );
    169117        {
    170                 verify( validate( &event_kernel->alarms ) );
    171                 remove( &event_kernel->alarms, this );
     118                verify( validate( event_kernel->alarms ) );
     119                remove( *this );
    172120        }
    173121        unlock( event_kernel->lock );
     
    176124}
    177125
     126//=============================================================================================
     127// Utilities
     128//=============================================================================================
     129
     130void sleep( Duration duration ) {
     131        alarm_node_t node = { active_thread(), __kernel_get_time() + duration, 0`s };
     132
     133        register_self( &node );
     134        park( __cfaabi_dbg_ctx );
     135
     136        /* paranoid */ verify( !node.set );
     137        /* paranoid */ verify( & node`next == 0p );
     138        /* paranoid */ verify( & node`prev == 0p );
     139}
     140
    178141// Local Variables: //
    179142// mode: c //
  • libcfa/src/concurrency/alarm.hfa

    rb7d6a36 r6a490b2  
    2323#include "time.hfa"
    2424
    25 struct thread_desc;
     25#include <containers/list.hfa>
     26
     27struct $thread;
    2628struct processor;
    2729
     
    4042        Time alarm;                             // time when alarm goes off
    4143        Duration period;                        // if > 0 => period of alarm
    42         alarm_node_t * next;            // intrusive link list field
     44
     45        DLISTED_MGD_IMPL_IN(alarm_node_t)
    4346
    4447        union {
    45                 thread_desc * thrd;     // thrd who created event
     48                $thread * thrd; // thrd who created event
    4649                processor * proc;               // proc who created event
    4750        };
     
    5053        bool kernel_alarm       :1;             // true if this is not a user defined alarm
    5154};
     55DLISTED_MGD_IMPL_OUT(alarm_node_t)
    5256
    53 typedef alarm_node_t ** __alarm_it_t;
    54 
    55 void ?{}( alarm_node_t & this, thread_desc * thrd, Time alarm, Duration period );
     57void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period );
    5658void ?{}( alarm_node_t & this, processor   * proc, Time alarm, Duration period );
    5759void ^?{}( alarm_node_t & this );
    5860
    59 struct alarm_list_t {
    60         alarm_node_t * head;
    61         __alarm_it_t tail;
    62 };
    63 
    64 static inline void ?{}( alarm_list_t & this ) with( this ) {
    65         head = 0;
    66         tail = &head;
    67 }
     61typedef dlist(alarm_node_t, alarm_node_t) alarm_list_t;
    6862
    6963void insert( alarm_list_t * this, alarm_node_t * n );
  • libcfa/src/concurrency/coroutine.cfa

    rb7d6a36 r6a490b2  
    3737
    3838extern "C" {
    39         void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct coroutine_desc *) __attribute__ ((__noreturn__));
     39        void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct $coroutine *) __attribute__ ((__noreturn__));
    4040        static void _CtxCoroutine_UnwindCleanup(_Unwind_Reason_Code, struct _Unwind_Exception *) __attribute__ ((__noreturn__));
    4141        static void _CtxCoroutine_UnwindCleanup(_Unwind_Reason_Code, struct _Unwind_Exception *) {
     
    8989}
    9090
    91 void ?{}( coroutine_desc & this, const char name[], void * storage, size_t storageSize ) with( this ) {
     91void ?{}( $coroutine & this, const char name[], void * storage, size_t storageSize ) with( this ) {
    9292        (this.context){0p, 0p};
    9393        (this.stack){storage, storageSize};
     
    9999}
    100100
    101 void ^?{}(coroutine_desc& this) {
     101void ^?{}($coroutine& this) {
    102102        if(this.state != Halted && this.state != Start && this.state != Primed) {
    103                 coroutine_desc * src = TL_GET( this_thread )->curr_cor;
    104                 coroutine_desc * dst = &this;
     103                $coroutine * src = TL_GET( this_thread )->curr_cor;
     104                $coroutine * dst = &this;
    105105
    106106                struct _Unwind_Exception storage;
     
    115115                }
    116116
    117                 CoroutineCtxSwitch( src, dst );
     117                $ctx_switch( src, dst );
    118118        }
    119119}
     
    123123forall(dtype T | is_coroutine(T))
    124124void prime(T& cor) {
    125         coroutine_desc* this = get_coroutine(cor);
     125        $coroutine* this = get_coroutine(cor);
    126126        assert(this->state == Start);
    127127
     
    187187// is not inline (We can't inline Cforall in C)
    188188extern "C" {
    189         void __leave_coroutine( struct coroutine_desc * src ) {
    190                 coroutine_desc * starter = src->cancellation != 0 ? src->last : src->starter;
     189        void __cfactx_cor_leave( struct $coroutine * src ) {
     190                $coroutine * starter = src->cancellation != 0 ? src->last : src->starter;
    191191
    192192                src->state = Halted;
     
    201201                        src->name, src, starter->name, starter );
    202202
    203                 CoroutineCtxSwitch( src, starter );
    204         }
    205 
    206         struct coroutine_desc * __finish_coroutine(void) {
    207                 struct coroutine_desc * cor = kernelTLS.this_thread->curr_cor;
     203                $ctx_switch( src, starter );
     204        }
     205
     206        struct $coroutine * __cfactx_cor_finish(void) {
     207                struct $coroutine * cor = kernelTLS.this_thread->curr_cor;
    208208
    209209                if(cor->state == Primed) {
    210                         suspend();
     210                        __cfactx_suspend();
    211211                }
    212212
  • libcfa/src/concurrency/coroutine.hfa

    rb7d6a36 r6a490b2  
    2525trait is_coroutine(dtype T) {
    2626      void main(T & this);
    27       coroutine_desc * get_coroutine(T & this);
     27      $coroutine * get_coroutine(T & this);
    2828};
    2929
    30 #define DECL_COROUTINE(X) static inline coroutine_desc* get_coroutine(X& this) { return &this.__cor; } void main(X& this)
     30#define DECL_COROUTINE(X) static inline $coroutine* get_coroutine(X& this) { return &this.__cor; } void main(X& this)
    3131
    3232//-----------------------------------------------------------------------------
     
    3535// void ^?{}( coStack_t & this );
    3636
    37 void ?{}( coroutine_desc & this, const char name[], void * storage, size_t storageSize );
    38 void ^?{}( coroutine_desc & this );
     37void  ?{}( $coroutine & this, const char name[], void * storage, size_t storageSize );
     38void ^?{}( $coroutine & this );
    3939
    40 static inline void ?{}( coroutine_desc & this)                                       { this{ "Anonymous Coroutine", 0p, 0 }; }
    41 static inline void ?{}( coroutine_desc & this, size_t stackSize)                     { this{ "Anonymous Coroutine", 0p, stackSize }; }
    42 static inline void ?{}( coroutine_desc & this, void * storage, size_t storageSize )  { this{ "Anonymous Coroutine", storage, storageSize }; }
    43 static inline void ?{}( coroutine_desc & this, const char name[])                    { this{ name, 0p, 0 }; }
    44 static inline void ?{}( coroutine_desc & this, const char name[], size_t stackSize ) { this{ name, 0p, stackSize }; }
     40static inline void ?{}( $coroutine & this)                                       { this{ "Anonymous Coroutine", 0p, 0 }; }
     41static inline void ?{}( $coroutine & this, size_t stackSize)                     { this{ "Anonymous Coroutine", 0p, stackSize }; }
     42static inline void ?{}( $coroutine & this, void * storage, size_t storageSize )  { this{ "Anonymous Coroutine", storage, storageSize }; }
     43static inline void ?{}( $coroutine & this, const char name[])                    { this{ name, 0p, 0 }; }
     44static inline void ?{}( $coroutine & this, const char name[], size_t stackSize ) { this{ name, 0p, stackSize }; }
    4545
    4646//-----------------------------------------------------------------------------
    4747// Public coroutine API
    48 static inline void suspend(void);
    49 
    50 forall(dtype T | is_coroutine(T))
    51 static inline T & resume(T & cor);
    52 
    5348forall(dtype T | is_coroutine(T))
    5449void prime(T & cor);
    5550
    56 static inline struct coroutine_desc * active_coroutine() { return TL_GET( this_thread )->curr_cor; }
     51static inline struct $coroutine * active_coroutine() { return TL_GET( this_thread )->curr_cor; }
    5752
    5853//-----------------------------------------------------------------------------
     
    6156// Start coroutine routines
    6257extern "C" {
    63         void CtxInvokeCoroutine(void (*main)(void *), void * this);
     58        void __cfactx_invoke_coroutine(void (*main)(void *), void * this);
    6459
    6560        forall(dtype T)
    66         void CtxStart(void (*main)(T &), struct coroutine_desc * cor, T & this, void (*invoke)(void (*main)(void *), void *));
     61        void __cfactx_start(void (*main)(T &), struct $coroutine * cor, T & this, void (*invoke)(void (*main)(void *), void *));
    6762
    68         extern void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct coroutine_desc *) __attribute__ ((__noreturn__));
     63        extern void __cfactx_coroutine_unwind(struct _Unwind_Exception * storage, struct $coroutine *) __attribute__ ((__noreturn__));
    6964
    70         extern void CtxSwitch( struct __stack_context_t * from, struct __stack_context_t * to ) asm ("CtxSwitch");
     65        extern void __cfactx_switch( struct __stack_context_t * from, struct __stack_context_t * to ) asm ("__cfactx_switch");
    7166}
    7267
    7368// Private wrappers for context switch and stack creation
    7469// Wrapper for co
    75 static inline void CoroutineCtxSwitch(coroutine_desc* src, coroutine_desc* dst) {
     70static inline void $ctx_switch( $coroutine * src, $coroutine * dst ) __attribute__((nonnull (1, 2))) {
    7671        // set state of current coroutine to inactive
    77         src->state = src->state == Halted ? Halted : Inactive;
     72        src->state = src->state == Halted ? Halted : Blocked;
    7873
    7974        // set new coroutine that task is executing
     
    8277        // context switch to specified coroutine
    8378        verify( dst->context.SP );
    84         CtxSwitch( &src->context, &dst->context );
    85         // when CtxSwitch returns we are back in the src coroutine
     79        __cfactx_switch( &src->context, &dst->context );
     80        // when __cfactx_switch returns we are back in the src coroutine
    8681
    8782        // set state of new coroutine to active
     
    8984
    9085        if( unlikely(src->cancellation != 0p) ) {
    91                 _CtxCoroutine_Unwind(src->cancellation, src);
     86                __cfactx_coroutine_unwind(src->cancellation, src);
    9287        }
    9388}
     
    9691
    9792// Suspend implementation inlined for performance
    98 static inline void suspend(void) {
    99         // optimization : read TLS once and reuse it
    100         // Safety note: this is preemption safe since if
    101         // preemption occurs after this line, the pointer
    102         // will also migrate which means this value will
    103         // stay in syn with the TLS
    104         coroutine_desc * src = TL_GET( this_thread )->curr_cor;
     93extern "C" {
     94        static inline void __cfactx_suspend(void) {
     95                // optimization : read TLS once and reuse it
     96                // Safety note: this is preemption safe since if
     97                // preemption occurs after this line, the pointer
     98                // will also migrate which means this value will
     99                // stay in syn with the TLS
     100                $coroutine * src = TL_GET( this_thread )->curr_cor;
    105101
    106         assertf( src->last != 0,
    107                 "Attempt to suspend coroutine \"%.256s\" (%p) that has never been resumed.\n"
    108                 "Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
    109                 src->name, src );
    110         assertf( src->last->state != Halted,
    111                 "Attempt by coroutine \"%.256s\" (%p) to suspend back to terminated coroutine \"%.256s\" (%p).\n"
    112                 "Possible cause is terminated coroutine's main routine has already returned.",
    113                 src->name, src, src->last->name, src->last );
     102                assertf( src->last != 0,
     103                        "Attempt to suspend coroutine \"%.256s\" (%p) that has never been resumed.\n"
     104                        "Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
     105                        src->name, src );
     106                assertf( src->last->state != Halted,
     107                        "Attempt by coroutine \"%.256s\" (%p) to suspend back to terminated coroutine \"%.256s\" (%p).\n"
     108                        "Possible cause is terminated coroutine's main routine has already returned.",
     109                        src->name, src, src->last->name, src->last );
    114110
    115         CoroutineCtxSwitch( src, src->last );
     111                $ctx_switch( src, src->last );
     112        }
    116113}
    117114
     
    124121        // will also migrate which means this value will
    125122        // stay in syn with the TLS
    126         coroutine_desc * src = TL_GET( this_thread )->curr_cor;
    127         coroutine_desc * dst = get_coroutine(cor);
     123        $coroutine * src = TL_GET( this_thread )->curr_cor;
     124        $coroutine * dst = get_coroutine(cor);
    128125
    129126        if( unlikely(dst->context.SP == 0p) ) {
    130127                TL_GET( this_thread )->curr_cor = dst;
    131128                __stack_prepare(&dst->stack, 65000);
    132                 CtxStart(main, dst, cor, CtxInvokeCoroutine);
     129                __cfactx_start(main, dst, cor, __cfactx_invoke_coroutine);
    133130                TL_GET( this_thread )->curr_cor = src;
    134131        }
     
    147144
    148145        // always done for performance testing
    149         CoroutineCtxSwitch( src, dst );
     146        $ctx_switch( src, dst );
    150147
    151148        return cor;
    152149}
    153150
    154 static inline void resume(coroutine_desc * dst) {
     151static inline void resume( $coroutine * dst ) __attribute__((nonnull (1))) {
    155152        // optimization : read TLS once and reuse it
    156153        // Safety note: this is preemption safe since if
     
    158155        // will also migrate which means this value will
    159156        // stay in syn with the TLS
    160         coroutine_desc * src = TL_GET( this_thread )->curr_cor;
     157        $coroutine * src = TL_GET( this_thread )->curr_cor;
    161158
    162159        // not resuming self ?
     
    172169
    173170        // always done for performance testing
    174         CoroutineCtxSwitch( src, dst );
     171        $ctx_switch( src, dst );
    175172}
    176173
  • libcfa/src/concurrency/invoke.c

    rb7d6a36 r6a490b2  
    2929// Called from the kernel when starting a coroutine or task so must switch back to user mode.
    3030
    31 extern void __leave_coroutine ( struct coroutine_desc * );
    32 extern struct coroutine_desc * __finish_coroutine(void);
    33 extern void __leave_thread_monitor();
     31extern struct $coroutine * __cfactx_cor_finish(void);
     32extern void __cfactx_cor_leave ( struct $coroutine * );
     33extern void __cfactx_thrd_leave();
     34
    3435extern void disable_interrupts() OPTIONAL_THREAD;
    3536extern void enable_interrupts( __cfaabi_dbg_ctx_param );
    3637
    37 void CtxInvokeCoroutine(
     38void __cfactx_invoke_coroutine(
    3839        void (*main)(void *),
    3940        void *this
    4041) {
    4142        // Finish setting up the coroutine by setting its state
    42         struct coroutine_desc * cor = __finish_coroutine();
     43        struct $coroutine * cor = __cfactx_cor_finish();
    4344
    4445        // Call the main of the coroutine
     
    4647
    4748        //Final suspend, should never return
    48         __leave_coroutine( cor );
     49        __cfactx_cor_leave( cor );
    4950        __cabi_abort( "Resumed dead coroutine" );
    5051}
    5152
    52 static _Unwind_Reason_Code _CtxCoroutine_UnwindStop(
     53static _Unwind_Reason_Code __cfactx_coroutine_unwindstop(
    5354        __attribute((__unused__)) int version,
    5455        _Unwind_Action actions,
     
    6162                // We finished unwinding the coroutine,
    6263                // leave it
    63                 __leave_coroutine( param );
     64                __cfactx_cor_leave( param );
    6465                __cabi_abort( "Resumed dead coroutine" );
    6566        }
     
    6970}
    7071
    71 void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct coroutine_desc * cor) __attribute__ ((__noreturn__));
    72 void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct coroutine_desc * cor) {
    73         _Unwind_Reason_Code ret = _Unwind_ForcedUnwind( storage, _CtxCoroutine_UnwindStop, cor );
     72void __cfactx_coroutine_unwind(struct _Unwind_Exception * storage, struct $coroutine * cor) __attribute__ ((__noreturn__));
     73void __cfactx_coroutine_unwind(struct _Unwind_Exception * storage, struct $coroutine * cor) {
     74        _Unwind_Reason_Code ret = _Unwind_ForcedUnwind( storage, __cfactx_coroutine_unwindstop, cor );
    7475        printf("UNWIND ERROR %d after force unwind\n", ret);
    7576        abort();
    7677}
    7778
    78 void CtxInvokeThread(
     79void __cfactx_invoke_thread(
    7980        void (*main)(void *),
    8081        void *this
     
    9394        // The order of these 4 operations is very important
    9495        //Final suspend, should never return
    95         __leave_thread_monitor();
     96        __cfactx_thrd_leave();
    9697        __cabi_abort( "Resumed dead thread" );
    9798}
    9899
    99 void CtxStart(
     100void __cfactx_start(
    100101        void (*main)(void *),
    101         struct coroutine_desc * cor,
     102        struct $coroutine * cor,
    102103        void *this,
    103104        void (*invoke)(void *)
     
    139140
    140141        fs->dummyReturn = NULL;
    141         fs->rturn = CtxInvokeStub;
     142        fs->rturn = __cfactx_invoke_stub;
    142143        fs->fixedRegisters[0] = main;
    143144        fs->fixedRegisters[1] = this;
     
    157158        struct FakeStack *fs = (struct FakeStack *)cor->context.SP;
    158159
    159         fs->intRegs[8] = CtxInvokeStub;
     160        fs->intRegs[8] = __cfactx_invoke_stub;
    160161        fs->arg[0] = this;
    161162        fs->arg[1] = invoke;
  • libcfa/src/concurrency/invoke.h

    rb7d6a36 r6a490b2  
    4747        extern "Cforall" {
    4848                extern __attribute__((aligned(128))) thread_local struct KernelThreadData {
    49                         struct thread_desc    * volatile this_thread;
     49                        struct $thread    * volatile this_thread;
    5050                        struct processor      * volatile this_processor;
    5151
     
    9292        };
    9393
    94         enum coroutine_state { Halted, Start, Inactive, Active, Primed };
    95 
    96         struct coroutine_desc {
    97                 // context that is switch during a CtxSwitch
     94        enum coroutine_state { Halted, Start, Primed, Blocked, Ready, Active, Rerun };
     95        enum __Preemption_Reason { __NO_PREEMPTION, __ALARM_PREEMPTION, __POLL_PREEMPTION, __MANUAL_PREEMPTION };
     96
     97        struct $coroutine {
     98                // context that is switch during a __cfactx_switch
    9899                struct __stack_context_t context;
    99100
     
    108109
    109110                // first coroutine to resume this one
    110                 struct coroutine_desc * starter;
     111                struct $coroutine * starter;
    111112
    112113                // last coroutine to resume this one
    113                 struct coroutine_desc * last;
     114                struct $coroutine * last;
    114115
    115116                // If non-null stack must be unwound with this exception
     
    117118
    118119        };
     120
     121        static inline struct __stack_t * __get_stack( struct $coroutine * cor ) { return (struct __stack_t*)(((uintptr_t)cor->stack.storage) & ((uintptr_t)-2)); }
    119122
    120123        // struct which calls the monitor is accepting
     
    127130        };
    128131
    129         struct monitor_desc {
     132        struct $monitor {
    130133                // spinlock to protect internal data
    131134                struct __spinlock_t lock;
    132135
    133136                // current owner of the monitor
    134                 struct thread_desc * owner;
     137                struct $thread * owner;
    135138
    136139                // queue of threads that are blocked waiting for the monitor
    137                 __queue_t(struct thread_desc) entry_queue;
     140                __queue_t(struct $thread) entry_queue;
    138141
    139142                // stack of conditions to run next once we exit the monitor
     
    152155        struct __monitor_group_t {
    153156                // currently held monitors
    154                 __cfa_anonymous_object( __small_array_t(monitor_desc*) );
     157                __cfa_anonymous_object( __small_array_t($monitor*) );
    155158
    156159                // last function that acquired monitors
     
    161164        // instrusive link field for threads
    162165        struct __thread_desc_link {
    163                 struct thread_desc * next;
    164                 struct thread_desc * prev;
     166                struct $thread * next;
     167                struct $thread * prev;
    165168                unsigned long long ts;
    166169        };
    167170
    168         struct thread_desc {
     171        struct $thread {
    169172                // Core threading fields
    170                 // context that is switch during a CtxSwitch
     173                // context that is switch during a __cfactx_switch
    171174                struct __stack_context_t context;
    172175
    173176                // current execution status for coroutine
    174                 enum coroutine_state state;
     177                volatile int state;
     178                enum __Preemption_Reason preempted;
    175179
    176180                //SKULLDUGGERY errno is not save in the thread data structure because returnToKernel appears to be the only function to require saving and restoring it
    177181
    178182                // coroutine body used to store context
    179                 struct coroutine_desc  self_cor;
     183                struct $coroutine  self_cor;
    180184
    181185                // current active context
    182                 struct coroutine_desc * curr_cor;
     186                struct $coroutine * curr_cor;
    183187
    184188                // monitor body used for mutual exclusion
    185                 struct monitor_desc    self_mon;
     189                struct $monitor    self_mon;
    186190
    187191                // pointer to monitor with sufficient lifetime for current monitors
    188                 struct monitor_desc *  self_mon_p;
     192                struct $monitor *  self_mon_p;
    189193
    190194                // pointer to the cluster on which the thread is running
     
    199203
    200204                struct {
    201                         struct thread_desc * next;
    202                         struct thread_desc * prev;
     205                        struct $thread * next;
     206                        struct $thread * prev;
    203207                } node;
    204         };
     208
     209                #ifdef __CFA_DEBUG__
     210                        // previous function to park/unpark the thread
     211                        const char * park_caller;
     212                        enum coroutine_state park_result;
     213                        bool park_stale;
     214                        const char * unpark_caller;
     215                        enum coroutine_state unpark_result;
     216                        bool unpark_stale;
     217                #endif
     218        };
     219
     220        #ifdef __CFA_DEBUG__
     221                void __cfaabi_dbg_record_thrd($thread & this, bool park, const char prev_name[]);
     222        #else
     223                #define __cfaabi_dbg_record_thrd(x, y, z)
     224        #endif
    205225
    206226        #ifdef __cforall
    207227        extern "Cforall" {
    208                 static inline thread_desc *& get_next( thread_desc & this ) {
     228
     229                static inline $thread *& get_next( $thread & this ) __attribute__((const)) {
    209230                        return this.link.next;
    210231                }
    211232
    212                 static inline [thread_desc *&, thread_desc *& ] __get( thread_desc & this ) {
     233                static inline [$thread *&, $thread *& ] __get( $thread & this ) __attribute__((const)) {
    213234                        return this.node.[next, prev];
    214235                }
     
    220241                }
    221242
    222                 static inline void ?{}(__monitor_group_t & this, struct monitor_desc ** data, __lock_size_t size, fptr_t func) {
     243                static inline void ?{}(__monitor_group_t & this, struct $monitor ** data, __lock_size_t size, fptr_t func) {
    223244                        (this.data){data};
    224245                        (this.size){size};
     
    226247                }
    227248
    228                 static inline bool ?==?( const __monitor_group_t & lhs, const __monitor_group_t & rhs ) {
     249                static inline bool ?==?( const __monitor_group_t & lhs, const __monitor_group_t & rhs ) __attribute__((const)) {
    229250                        if( (lhs.data != 0) != (rhs.data != 0) ) return false;
    230251                        if( lhs.size != rhs.size ) return false;
     
    260281
    261282        // assembler routines that performs the context switch
    262         extern void CtxInvokeStub( void );
    263         extern void CtxSwitch( struct __stack_context_t * from, struct __stack_context_t * to ) asm ("CtxSwitch");
     283        extern void __cfactx_invoke_stub( void );
     284        extern void __cfactx_switch( struct __stack_context_t * from, struct __stack_context_t * to ) asm ("__cfactx_switch");
    264285        // void CtxStore ( void * this ) asm ("CtxStore");
    265286        // void CtxRet   ( void * dst  ) asm ("CtxRet");
  • libcfa/src/concurrency/kernel.cfa

    rb7d6a36 r6a490b2  
    1515
    1616#define __cforall_thread__
     17// #define __CFA_DEBUG_PRINT_RUNTIME_CORE__
    1718
    1819//C Includes
     
    4041#include "invoke.h"
    4142
     43
    4244//-----------------------------------------------------------------------------
    4345// Some assembly required
     
    110112//-----------------------------------------------------------------------------
    111113//Start and stop routine for the kernel, declared first to make sure they run first
    112 static void kernel_startup(void)  __attribute__(( constructor( STARTUP_PRIORITY_KERNEL ) ));
    113 static void kernel_shutdown(void) __attribute__(( destructor ( STARTUP_PRIORITY_KERNEL ) ));
     114static void __kernel_startup (void) __attribute__(( constructor( STARTUP_PRIORITY_KERNEL ) ));
     115static void __kernel_shutdown(void) __attribute__(( destructor ( STARTUP_PRIORITY_KERNEL ) ));
     116
     117//-----------------------------------------------------------------------------
     118// Kernel Scheduling logic
     119static $thread * __next_thread(cluster * this);
     120static void __run_thread(processor * this, $thread * dst);
     121static $thread * __halt(processor * this);
     122static bool __wake_one(cluster * cltr, bool was_empty);
     123static bool __wake_proc(processor *);
    114124
    115125//-----------------------------------------------------------------------------
     
    117127KERNEL_STORAGE(cluster,         mainCluster);
    118128KERNEL_STORAGE(processor,       mainProcessor);
    119 KERNEL_STORAGE(thread_desc,     mainThread);
     129KERNEL_STORAGE($thread, mainThread);
    120130KERNEL_STORAGE(__stack_t,       mainThreadCtx);
    121131
    122132cluster     * mainCluster;
    123133processor   * mainProcessor;
    124 thread_desc * mainThread;
     134$thread * mainThread;
    125135
    126136extern "C" {
     
    164174// Main thread construction
    165175
    166 void ?{}( coroutine_desc & this, current_stack_info_t * info) with( this ) {
     176void ?{}( $coroutine & this, current_stack_info_t * info) with( this ) {
    167177        stack.storage = info->storage;
    168178        with(*stack.storage) {
     
    179189}
    180190
    181 void ?{}( thread_desc & this, current_stack_info_t * info) with( this ) {
     191void ?{}( $thread & this, current_stack_info_t * info) with( this ) {
    182192        state = Start;
    183193        self_cor{ info };
     
    209219}
    210220
    211 static void start(processor * this);
     221static void * __invoke_processor(void * arg);
     222
    212223void ?{}(processor & this, const char name[], cluster & cltr) with( this ) {
    213224        this.name = name;
     
    215226        id = -1u;
    216227        terminated{ 0 };
     228        destroyer = 0p;
    217229        do_terminate = false;
    218230        preemption_alarm = 0p;
     
    220232        runner.proc = &this;
    221233
    222         idleLock{};
    223 
    224         start( &this );
     234        idle{};
     235
     236        __cfadbg_print_safe(runtime_core, "Kernel : Starting core %p\n", &this);
     237
     238        this.stack = __create_pthread( &this.kernel_thread, __invoke_processor, (void *)&this );
     239
     240        __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this);
    225241}
    226242
    227243void ^?{}(processor & this) with( this ){
    228244        if( ! __atomic_load_n(&do_terminate, __ATOMIC_ACQUIRE) ) {
    229                 __cfaabi_dbg_print_safe("Kernel : core %p signaling termination\n", &this);
     245                __cfadbg_print_safe(runtime_core, "Kernel : core %p signaling termination\n", &this);
    230246
    231247                __atomic_store_n(&do_terminate, true, __ATOMIC_RELAXED);
    232                 wake( &this );
     248                __wake_proc( &this );
    233249
    234250                P( terminated );
     
    236252        }
    237253
    238         pthread_join( kernel_thread, 0p );
     254        int err = pthread_join( kernel_thread, 0p );
     255        if( err != 0 ) abort("KERNEL ERROR: joining processor %p caused error %s\n", &this, strerror(err));
     256
    239257        free( this.stack );
    240258}
    241259
    242 void ?{}(cluster & this, const char name[], Duration preemption_rate) with( this ) {
     260void ?{}(cluster & this, const char name[], Duration preemption_rate, int io_flags) with( this ) {
    243261        this.name = name;
    244262        this.preemption_rate = preemption_rate;
     
    246264        ready_lock{};
    247265
     266        #if !defined(__CFA_NO_STATISTICS__)
     267                print_stats = false;
     268        #endif
     269
     270        procs{ __get };
    248271        idles{ __get };
    249272        threads{ __get };
    250273
     274        __kernel_io_startup( this, io_flags, &this == mainCluster );
     275
    251276        doregister(this);
    252277}
    253278
    254279void ^?{}(cluster & this) {
     280        __kernel_io_shutdown( this, &this == mainCluster );
     281
    255282        unregister(this);
    256283}
     
    259286// Kernel Scheduling logic
    260287//=============================================================================================
    261 static void runThread(processor * this, thread_desc * dst);
    262 static void finishRunning(processor * this);
    263 static void halt(processor * this);
    264 
    265288//Main of the processor contexts
    266289void main(processorCtx_t & runner) {
     
    272295        verify(this);
    273296
    274         __cfaabi_dbg_print_safe("Kernel : core %p starting\n", this);
     297        __cfadbg_print_safe(runtime_core, "Kernel : core %p starting\n", this);
    275298
    276299        // register the processor unless it's the main thread which is handled in the boot sequence
     
    285308                preemption_scope scope = { this };
    286309
    287                 __cfaabi_dbg_print_safe("Kernel : core %p started\n", this);
    288 
    289                 thread_desc * readyThread = 0p;
     310                __cfadbg_print_safe(runtime_core, "Kernel : core %p started\n", this);
     311
     312                $thread * readyThread = 0p;
    290313                for( unsigned int spin_count = 0; ! __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST); spin_count++ ) {
    291                         readyThread = nextThread( this->cltr );
    292 
    293                         if(readyThread) {
    294                                 verify( ! kernelTLS.preemption_state.enabled );
    295 
    296                                 runThread(this, readyThread);
    297 
    298                                 verify( ! kernelTLS.preemption_state.enabled );
    299 
    300                                 //Some actions need to be taken from the kernel
    301                                 finishRunning(this);
    302 
    303                                 spin_count = 0;
    304                         } else {
    305                                 // spin(this, &spin_count);
    306                                 halt(this);
     314                        // Try to get the next thread
     315                        readyThread = __next_thread( this->cltr );
     316
     317                        // If no ready thread
     318                        if( readyThread == 0p ) {
     319                                // Block until a thread is ready
     320                                readyThread = __halt(this);
     321                        }
     322
     323                        // Check if we actually found a thread
     324                        if( readyThread ) {
     325                                /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     326                                /* paranoid */ verifyf( readyThread->state == Ready || readyThread->preempted != __NO_PREEMPTION, "state : %d, preempted %d\n", readyThread->state, readyThread->preempted);
     327                                /* paranoid */ verifyf( readyThread->next == 0p, "Expected null got %p", readyThread->next );
     328
     329                                // We found a thread run it
     330                                __run_thread(this, readyThread);
     331
     332                                /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    307333                        }
    308334                }
    309335
    310                 __cfaabi_dbg_print_safe("Kernel : core %p stopping\n", this);
     336                __cfadbg_print_safe(runtime_core, "Kernel : core %p stopping\n", this);
    311337        }
    312338
    313339        V( this->terminated );
    314 
    315340
    316341        // unregister the processor unless it's the main thread which is handled in the boot sequence
     
    319344                unregister(this->cltr, this);
    320345        }
    321 
    322         __cfaabi_dbg_print_safe("Kernel : core %p terminated\n", this);
     346        else {
     347                // HACK : the coroutine context switch expects this_thread to be set
     348                // and it make sense for it to be set in all other cases except here
     349                // fake it
     350                kernelTLS.this_thread = mainThread;
     351        }
     352
     353        __cfadbg_print_safe(runtime_core, "Kernel : core %p terminated\n", this);
    323354
    324355        stats_tls_tally(this->cltr);
     
    331362// runThread runs a thread by context switching
    332363// from the processor coroutine to the target thread
    333 static void runThread(processor * this, thread_desc * thrd_dst) {
    334         coroutine_desc * proc_cor = get_coroutine(this->runner);
    335 
    336         // Reset the terminating actions here
    337         this->finish.action_code = No_Action;
     364static void __run_thread(processor * this, $thread * thrd_dst) {
     365        $coroutine * proc_cor = get_coroutine(this->runner);
    338366
    339367        // Update global state
    340368        kernelTLS.this_thread = thrd_dst;
    341369
    342         // set state of processor coroutine to inactive and the thread to active
    343         proc_cor->state = proc_cor->state == Halted ? Halted : Inactive;
    344         thrd_dst->state = Active;
    345 
    346         // set context switch to the thread that the processor is executing
    347         verify( thrd_dst->context.SP );
    348         CtxSwitch( &proc_cor->context, &thrd_dst->context );
    349         // when CtxSwitch returns we are back in the processor coroutine
    350 
    351         // set state of processor coroutine to active and the thread to inactive
    352         thrd_dst->state = thrd_dst->state == Halted ? Halted : Inactive;
     370        // set state of processor coroutine to inactive
     371        verify(proc_cor->state == Active);
     372        proc_cor->state = Blocked;
     373
     374        // Actually run the thread
     375        RUNNING:  while(true) {
     376                if(unlikely(thrd_dst->preempted)) {
     377                        thrd_dst->preempted = __NO_PREEMPTION;
     378                        verify(thrd_dst->state == Active  || thrd_dst->state == Rerun);
     379                } else {
     380                        verify(thrd_dst->state == Blocked || thrd_dst->state == Ready); // Ready means scheduled normally, blocked means rerun
     381                        thrd_dst->state = Active;
     382                }
     383
     384                __cfaabi_dbg_debug_do(
     385                        thrd_dst->park_stale   = true;
     386                        thrd_dst->unpark_stale = true;
     387                )
     388
     389                /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     390                /* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
     391                /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ) || thrd_dst->curr_cor == proc_cor, "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst ); // add escape condition if we are setting up the processor
     392                /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit) || thrd_dst->curr_cor == proc_cor, "ERROR : Destination $thread %p has been corrupted.\n StackPointer too large.\n", thrd_dst ); // add escape condition if we are setting up the processor
     393
     394                // set context switch to the thread that the processor is executing
     395                verify( thrd_dst->context.SP );
     396                __cfactx_switch( &proc_cor->context, &thrd_dst->context );
     397                // when __cfactx_switch returns we are back in the processor coroutine
     398
     399                /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too large.\n", thrd_dst );
     400                /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst );
     401                /* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
     402                /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     403
     404
     405                // We just finished running a thread, there are a few things that could have happened.
     406                // 1 - Regular case : the thread has blocked and now one has scheduled it yet.
     407                // 2 - Racy case    : the thread has blocked but someone has already tried to schedule it.
     408                // 4 - Preempted
     409                // In case 1, we may have won a race so we can't write to the state again.
     410                // In case 2, we lost the race so we now own the thread.
     411
     412                if(unlikely(thrd_dst->preempted != __NO_PREEMPTION)) {
     413                        // The thread was preempted, reschedule it and reset the flag
     414                        __schedule_thread( thrd_dst );
     415                        break RUNNING;
     416                }
     417
     418                // set state of processor coroutine to active and the thread to inactive
     419                static_assert(sizeof(thrd_dst->state) == sizeof(int));
     420                enum coroutine_state old_state = __atomic_exchange_n(&thrd_dst->state, Blocked, __ATOMIC_SEQ_CST);
     421                __cfaabi_dbg_debug_do( thrd_dst->park_result = old_state; )
     422                switch(old_state) {
     423                        case Halted:
     424                                // The thread has halted, it should never be scheduled/run again, leave it back to Halted and move on
     425                                thrd_dst->state = Halted;
     426
     427                                // We may need to wake someone up here since
     428                                unpark( this->destroyer __cfaabi_dbg_ctx2 );
     429                                this->destroyer = 0p;
     430                                break RUNNING;
     431                        case Active:
     432                                // This is case 1, the regular case, nothing more is needed
     433                                break RUNNING;
     434                        case Rerun:
     435                                // This is case 2, the racy case, someone tried to run this thread before it finished blocking
     436                                // In this case, just run it again.
     437                                continue RUNNING;
     438                        default:
     439                                // This makes no sense, something is wrong abort
     440                                abort("Finished running a thread that was Blocked/Start/Primed %d\n", old_state);
     441                }
     442        }
     443
     444        // Just before returning to the processor, set the processor coroutine to active
    353445        proc_cor->state = Active;
     446        kernelTLS.this_thread = 0p;
    354447}
    355448
    356449// KERNEL_ONLY
    357 static void returnToKernel() {
    358         coroutine_desc * proc_cor = get_coroutine(kernelTLS.this_processor->runner);
    359         thread_desc * thrd_src = kernelTLS.this_thread;
    360 
    361         // set state of current coroutine to inactive
    362         thrd_src->state = thrd_src->state == Halted ? Halted : Inactive;
    363         proc_cor->state = Active;
    364         int local_errno = *__volatile_errno();
    365         #if defined( __i386 ) || defined( __x86_64 )
    366                 __x87_store;
    367         #endif
    368 
    369         // set new coroutine that the processor is executing
    370         // and context switch to it
    371         verify( proc_cor->context.SP );
    372         CtxSwitch( &thrd_src->context, &proc_cor->context );
    373 
    374         // set state of new coroutine to active
    375         proc_cor->state = proc_cor->state == Halted ? Halted : Inactive;
    376         thrd_src->state = Active;
    377 
    378         #if defined( __i386 ) || defined( __x86_64 )
    379                 __x87_load;
    380         #endif
    381         *__volatile_errno() = local_errno;
    382 }
    383 
    384 // KERNEL_ONLY
    385 // Once a thread has finished running, some of
    386 // its final actions must be executed from the kernel
    387 static void finishRunning(processor * this) with( this->finish ) {
    388         verify( ! kernelTLS.preemption_state.enabled );
    389         choose( action_code ) {
    390         case No_Action:
    391                 break;
    392         case Release:
    393                 unlock( *lock );
    394         case Schedule:
    395                 ScheduleThread( thrd );
    396         case Release_Schedule:
    397                 unlock( *lock );
    398                 ScheduleThread( thrd );
    399         case Release_Multi:
    400                 for(int i = 0; i < lock_count; i++) {
    401                         unlock( *locks[i] );
    402                 }
    403         case Release_Multi_Schedule:
    404                 for(int i = 0; i < lock_count; i++) {
    405                         unlock( *locks[i] );
    406                 }
    407                 for(int i = 0; i < thrd_count; i++) {
    408                         ScheduleThread( thrds[i] );
    409                 }
    410         case Callback:
    411                 callback();
    412         default:
    413                 abort("KERNEL ERROR: Unexpected action to run after thread");
    414         }
     450void returnToKernel() {
     451        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     452        $coroutine * proc_cor = get_coroutine(kernelTLS.this_processor->runner);
     453        $thread * thrd_src = kernelTLS.this_thread;
     454
     455        // Run the thread on this processor
     456        {
     457                int local_errno = *__volatile_errno();
     458                #if defined( __i386 ) || defined( __x86_64 )
     459                        __x87_store;
     460                #endif
     461                verify( proc_cor->context.SP );
     462                __cfactx_switch( &thrd_src->context, &proc_cor->context );
     463                #if defined( __i386 ) || defined( __x86_64 )
     464                        __x87_load;
     465                #endif
     466                *__volatile_errno() = local_errno;
     467        }
     468
     469        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     470        /* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) < ((uintptr_t)__get_stack(thrd_src->curr_cor)->base ), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too small.\n", thrd_src );
     471        /* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) > ((uintptr_t)__get_stack(thrd_src->curr_cor)->limit), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too large.\n", thrd_src );
    415472}
    416473
     
    419476// This is the entry point for processors (kernel threads)
    420477// It effectively constructs a coroutine by stealing the pthread stack
    421 static void * CtxInvokeProcessor(void * arg) {
     478static void * __invoke_processor(void * arg) {
    422479        processor * proc = (processor *) arg;
    423480        kernelTLS.this_processor = proc;
     
    438495
    439496        //We now have a proper context from which to schedule threads
    440         __cfaabi_dbg_print_safe("Kernel : core %p created (%p, %p)\n", proc, &proc->runner, &ctx);
     497        __cfadbg_print_safe(runtime_core, "Kernel : core %p created (%p, %p)\n", proc, &proc->runner, &ctx);
    441498
    442499        // SKULLDUGGERY: Since the coroutine doesn't have its own stack, we can't
     
    449506
    450507        // Main routine of the core returned, the core is now fully terminated
    451         __cfaabi_dbg_print_safe("Kernel : core %p main ended (%p)\n", proc, &proc->runner);
     508        __cfadbg_print_safe(runtime_core, "Kernel : core %p main ended (%p)\n", proc, &proc->runner);
    452509
    453510        return 0p;
     
    460517} // Abort
    461518
    462 void * create_pthread( pthread_t * pthread, void * (*start)(void *), void * arg ) {
     519void * __create_pthread( pthread_t * pthread, void * (*start)(void *), void * arg ) {
    463520        pthread_attr_t attr;
    464521
     
    488545}
    489546
    490 static void start(processor * this) {
    491         __cfaabi_dbg_print_safe("Kernel : Starting core %p\n", this);
    492 
    493         this->stack = create_pthread( &this->kernel_thread, CtxInvokeProcessor, (void *)this );
    494 
    495         __cfaabi_dbg_print_safe("Kernel : core %p started\n", this);
    496 }
    497 
    498547// KERNEL_ONLY
    499 void kernel_first_resume( processor * this ) {
    500         thread_desc * src = mainThread;
    501         coroutine_desc * dst = get_coroutine(this->runner);
     548static void __kernel_first_resume( processor * this ) {
     549        $thread * src = mainThread;
     550        $coroutine * dst = get_coroutine(this->runner);
    502551
    503552        verify( ! kernelTLS.preemption_state.enabled );
     
    505554        kernelTLS.this_thread->curr_cor = dst;
    506555        __stack_prepare( &dst->stack, 65000 );
    507         CtxStart(main, dst, this->runner, CtxInvokeCoroutine);
     556        __cfactx_start(main, dst, this->runner, __cfactx_invoke_coroutine);
    508557
    509558        verify( ! kernelTLS.preemption_state.enabled );
     
    512561        dst->starter = dst->starter ? dst->starter : &src->self_cor;
    513562
    514         // set state of current coroutine to inactive
    515         src->state = src->state == Halted ? Halted : Inactive;
     563        // make sure the current state is still correct
     564        /* paranoid */ verify(src->state == Ready);
    516565
    517566        // context switch to specified coroutine
    518567        verify( dst->context.SP );
    519         CtxSwitch( &src->context, &dst->context );
    520         // when CtxSwitch returns we are back in the src coroutine
     568        __cfactx_switch( &src->context, &dst->context );
     569        // when __cfactx_switch returns we are back in the src coroutine
    521570
    522571        mainThread->curr_cor = &mainThread->self_cor;
    523572
    524         // set state of new coroutine to active
    525         src->state = Active;
     573        // make sure the current state has been update
     574        /* paranoid */ verify(src->state == Active);
    526575
    527576        verify( ! kernelTLS.preemption_state.enabled );
     
    529578
    530579// KERNEL_ONLY
    531 void kernel_last_resume( processor * this ) {
    532         coroutine_desc * src = &mainThread->self_cor;
    533         coroutine_desc * dst = get_coroutine(this->runner);
     580static void __kernel_last_resume( processor * this ) {
     581        $coroutine * src = &mainThread->self_cor;
     582        $coroutine * dst = get_coroutine(this->runner);
    534583
    535584        verify( ! kernelTLS.preemption_state.enabled );
     
    537586        verify( dst->context.SP );
    538587
     588        // SKULLDUGGERY in debug the processors check that the
     589        // stack is still within the limit of the stack limits after running a thread.
     590        // that check doesn't make sense if we context switch to the processor using the
     591        // coroutine semantics. Since this is a special case, use the current context
     592        // info to populate these fields.
     593        __cfaabi_dbg_debug_do(
     594                __stack_context_t ctx;
     595                CtxGet( ctx );
     596                mainThread->context.SP = ctx.SP;
     597                mainThread->context.FP = ctx.FP;
     598        )
     599
    539600        // context switch to the processor
    540         CtxSwitch( &src->context, &dst->context );
     601        __cfactx_switch( &src->context, &dst->context );
    541602}
    542603
    543604//-----------------------------------------------------------------------------
    544605// Scheduler routines
    545 
    546606// KERNEL ONLY
    547 void ScheduleThread( thread_desc * thrd ) {
    548         verify( thrd );
    549         verify( thrd->state != Halted );
    550 
    551         verify( ! kernelTLS.preemption_state.enabled );
    552 
    553         verifyf( thrd->link.next == 0p, "Expected null got %p", thrd->link.next );
    554 
     607void __schedule_thread( $thread * thrd ) {
     608        /* paranoid */ verify( thrd );
     609        /* paranoid */ verify( thrd->state != Halted );
     610        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     611        /* paranoid */ #if defined( __CFA_WITH_VERIFY__ )
     612        /* paranoid */ if( thrd->state == Blocked || thrd->state == Start ) assertf( thrd->preempted == __NO_PREEMPTION,
     613                          "Error inactive thread marked as preempted, state %d, preemption %d\n", thrd->state, thrd->preempted );
     614        /* paranoid */ if( thrd->preempted != __NO_PREEMPTION ) assertf(thrd->state == Active || thrd->state == Rerun,
     615                          "Error preempted thread marked as not currently running, state %d, preemption %d\n", thrd->state, thrd->preempted );
     616        /* paranoid */ #endif
     617        /* paranoid */ verifyf( thrd->link.next == 0p, "Expected null got %p", thrd->link.next );
     618
     619        if (thrd->preempted == __NO_PREEMPTION) thrd->state = Ready;
    555620
    556621        ready_schedule_lock(thrd->curr_cluster, kernelTLS.this_processor);
     
    558623        ready_schedule_unlock(thrd->curr_cluster, kernelTLS.this_processor);
    559624
    560         with( *thrd->curr_cluster ) {
    561                 // if(was_empty) {
    562                 //      lock      (proc_list_lock __cfaabi_dbg_ctx2);
    563                 //      if(idles) {
    564                 //              wake_fast(idles.head);
    565                 //      }
    566                 //      unlock    (proc_list_lock);
    567                 // }
    568                 // else if( struct processor * idle = idles.head ) {
    569                 //      wake_fast(idle);
    570                 // }
    571         }
    572 
    573         verify( ! kernelTLS.preemption_state.enabled );
     625        __wake_one(thrd->curr_cluster, was_empty);
     626
     627        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    574628}
    575629
    576630// KERNEL ONLY
    577 thread_desc * nextThread(cluster * this) with( *this ) {
    578         verify( ! kernelTLS.preemption_state.enabled );
     631static $thread * __next_thread(cluster * this) with( *this ) {
     632        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    579633
    580634        ready_schedule_lock(this, kernelTLS.this_processor);
    581                 thread_desc * head = pop( this );
     635                $thread * head = pop( this );
    582636        ready_schedule_unlock(this, kernelTLS.this_processor);
    583637
    584         verify( ! kernelTLS.preemption_state.enabled );
     638        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    585639        return head;
    586640}
    587641
    588 void BlockInternal() {
     642// KERNEL ONLY unpark with out disabling interrupts
     643void __unpark( $thread * thrd __cfaabi_dbg_ctx_param2 ) {
     644        static_assert(sizeof(thrd->state) == sizeof(int));
     645
     646        // record activity
     647        __cfaabi_dbg_record_thrd( *thrd, false, caller );
     648
     649        enum coroutine_state old_state = __atomic_exchange_n(&thrd->state, Rerun, __ATOMIC_SEQ_CST);
     650        __cfaabi_dbg_debug_do( thrd->unpark_result = old_state; )
     651        switch(old_state) {
     652                case Active:
     653                        // Wake won the race, the thread will reschedule/rerun itself
     654                        break;
     655                case Blocked:
     656                        /* paranoid */ verify( ! thrd->preempted != __NO_PREEMPTION );
     657
     658                        // Wake lost the race,
     659                        thrd->state = Blocked;
     660                        __schedule_thread( thrd );
     661                        break;
     662                case Rerun:
     663                        abort("More than one thread attempted to schedule thread %p\n", thrd);
     664                        break;
     665                case Halted:
     666                case Start:
     667                case Primed:
     668                default:
     669                        // This makes no sense, something is wrong abort
     670                        abort();
     671        }
     672}
     673
     674void unpark( $thread * thrd __cfaabi_dbg_ctx_param2 ) {
     675        if( !thrd ) return;
     676
    589677        disable_interrupts();
    590         verify( ! kernelTLS.preemption_state.enabled );
     678        __unpark( thrd __cfaabi_dbg_ctx_fwd2 );
     679        enable_interrupts( __cfaabi_dbg_ctx );
     680}
     681
     682void park( __cfaabi_dbg_ctx_param ) {
     683        /* paranoid */ verify( kernelTLS.preemption_state.enabled );
     684        disable_interrupts();
     685        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     686        /* paranoid */ verify( kernelTLS.this_thread->preempted == __NO_PREEMPTION );
     687
     688        // record activity
     689        __cfaabi_dbg_record_thrd( *kernelTLS.this_thread, true, caller );
     690
    591691        returnToKernel();
    592         verify( ! kernelTLS.preemption_state.enabled );
     692
     693        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    593694        enable_interrupts( __cfaabi_dbg_ctx );
    594 }
    595 
    596 void BlockInternal( __spinlock_t * lock ) {
     695        /* paranoid */ verify( kernelTLS.preemption_state.enabled );
     696
     697}
     698
     699// KERNEL ONLY
     700void __leave_thread() {
     701        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     702        returnToKernel();
     703        abort();
     704}
     705
     706// KERNEL ONLY
     707bool force_yield( __Preemption_Reason reason ) {
     708        /* paranoid */ verify( kernelTLS.preemption_state.enabled );
    597709        disable_interrupts();
    598         with( *kernelTLS.this_processor ) {
    599                 finish.action_code = Release;
    600                 finish.lock        = lock;
    601         }
    602 
    603         verify( ! kernelTLS.preemption_state.enabled );
    604         returnToKernel();
    605         verify( ! kernelTLS.preemption_state.enabled );
    606 
    607         enable_interrupts( __cfaabi_dbg_ctx );
    608 }
    609 
    610 void BlockInternal( thread_desc * thrd ) {
    611         disable_interrupts();
    612         with( * kernelTLS.this_processor ) {
    613                 finish.action_code = Schedule;
    614                 finish.thrd        = thrd;
    615         }
    616 
    617         verify( ! kernelTLS.preemption_state.enabled );
    618         returnToKernel();
    619         verify( ! kernelTLS.preemption_state.enabled );
    620 
    621         enable_interrupts( __cfaabi_dbg_ctx );
    622 }
    623 
    624 void BlockInternal( __spinlock_t * lock, thread_desc * thrd ) {
    625         assert(thrd);
    626         disable_interrupts();
    627         with( * kernelTLS.this_processor ) {
    628                 finish.action_code = Release_Schedule;
    629                 finish.lock        = lock;
    630                 finish.thrd        = thrd;
    631         }
    632 
    633         verify( ! kernelTLS.preemption_state.enabled );
    634         returnToKernel();
    635         verify( ! kernelTLS.preemption_state.enabled );
    636 
    637         enable_interrupts( __cfaabi_dbg_ctx );
    638 }
    639 
    640 void BlockInternal(__spinlock_t * locks [], unsigned short count) {
    641         disable_interrupts();
    642         with( * kernelTLS.this_processor ) {
    643                 finish.action_code = Release_Multi;
    644                 finish.locks       = locks;
    645                 finish.lock_count  = count;
    646         }
    647 
    648         verify( ! kernelTLS.preemption_state.enabled );
    649         returnToKernel();
    650         verify( ! kernelTLS.preemption_state.enabled );
    651 
    652         enable_interrupts( __cfaabi_dbg_ctx );
    653 }
    654 
    655 void BlockInternal(__spinlock_t * locks [], unsigned short lock_count, thread_desc * thrds [], unsigned short thrd_count) {
    656         disable_interrupts();
    657         with( *kernelTLS.this_processor ) {
    658                 finish.action_code = Release_Multi_Schedule;
    659                 finish.locks       = locks;
    660                 finish.lock_count  = lock_count;
    661                 finish.thrds       = thrds;
    662                 finish.thrd_count  = thrd_count;
    663         }
    664 
    665         verify( ! kernelTLS.preemption_state.enabled );
    666         returnToKernel();
    667         verify( ! kernelTLS.preemption_state.enabled );
    668 
    669         enable_interrupts( __cfaabi_dbg_ctx );
    670 }
    671 
    672 void BlockInternal(__finish_callback_fptr_t callback) {
    673         disable_interrupts();
    674         with( *kernelTLS.this_processor ) {
    675                 finish.action_code = Callback;
    676                 finish.callback    = callback;
    677         }
    678 
    679         verify( ! kernelTLS.preemption_state.enabled );
    680         returnToKernel();
    681         verify( ! kernelTLS.preemption_state.enabled );
    682 
    683         enable_interrupts( __cfaabi_dbg_ctx );
    684 }
    685 
    686 // KERNEL ONLY
    687 void LeaveThread(__spinlock_t * lock, thread_desc * thrd) {
    688         verify( ! kernelTLS.preemption_state.enabled );
    689         with( * kernelTLS.this_processor ) {
    690                 finish.action_code = thrd ? Release_Schedule : Release;
    691                 finish.lock        = lock;
    692                 finish.thrd        = thrd;
    693         }
    694 
    695         returnToKernel();
     710        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     711
     712        $thread * thrd = kernelTLS.this_thread;
     713        /* paranoid */ verify(thrd->state == Active || thrd->state == Rerun);
     714
     715        // SKULLDUGGERY: It is possible that we are preempting this thread just before
     716        // it was going to park itself. If that is the case and it is already using the
     717        // intrusive fields then we can't use them to preempt the thread
     718        // If that is the case, abandon the preemption.
     719        bool preempted = false;
     720        if(thrd->next == 0p) {
     721                preempted = true;
     722                thrd->preempted = reason;
     723                returnToKernel();
     724        }
     725
     726        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     727        enable_interrupts_noPoll();
     728        /* paranoid */ verify( kernelTLS.preemption_state.enabled );
     729
     730        return preempted;
    696731}
    697732
     
    701736//-----------------------------------------------------------------------------
    702737// Kernel boot procedures
    703 static void kernel_startup(void) {
     738static void __kernel_startup(void) {
    704739        verify( ! kernelTLS.preemption_state.enabled );
    705         __cfaabi_dbg_print_safe("Kernel : Starting\n");
     740        __cfadbg_print_safe(runtime_core, "Kernel : Starting\n");
    706741
    707742        __page_size = sysconf( _SC_PAGESIZE );
     
    714749        (*mainCluster){"Main Cluster"};
    715750
    716         __cfaabi_dbg_print_safe("Kernel : Main cluster ready\n");
     751        __cfadbg_print_safe(runtime_core, "Kernel : Main cluster ready\n");
    717752
    718753        // Start by initializing the main thread
    719754        // SKULLDUGGERY: the mainThread steals the process main thread
    720755        // which will then be scheduled by the mainProcessor normally
    721         mainThread = (thread_desc *)&storage_mainThread;
     756        mainThread = ($thread *)&storage_mainThread;
    722757        current_stack_info_t info;
    723758        info.storage = (__stack_t*)&storage_mainThreadCtx;
    724759        (*mainThread){ &info };
    725760
    726         __cfaabi_dbg_print_safe("Kernel : Main thread ready\n");
     761        __cfadbg_print_safe(runtime_core, "Kernel : Main thread ready\n");
    727762
    728763
     
    746781
    747782                runner{ &this };
    748                 __cfaabi_dbg_print_safe("Kernel : constructed main processor context %p\n", &runner);
     783                __cfadbg_print_safe(runtime_core, "Kernel : constructed main processor context %p\n", &runner);
    749784        }
    750785
     
    765800        // Add the main thread to the ready queue
    766801        // once resume is called on mainProcessor->runner the mainThread needs to be scheduled like any normal thread
    767         ScheduleThread(mainThread);
     802        __schedule_thread(mainThread);
    768803
    769804        // SKULLDUGGERY: Force a context switch to the main processor to set the main thread's context to the current UNIX
    770         // context. Hence, the main thread does not begin through CtxInvokeThread, like all other threads. The trick here is that
     805        // context. Hence, the main thread does not begin through __cfactx_invoke_thread, like all other threads. The trick here is that
    771806        // mainThread is on the ready queue when this call is made.
    772         kernel_first_resume( kernelTLS.this_processor );
    773 
     807        __kernel_first_resume( kernelTLS.this_processor );
    774808
    775809
    776810        // THE SYSTEM IS NOW COMPLETELY RUNNING
    777         __cfaabi_dbg_print_safe("Kernel : Started\n--------------------------------------------------\n\n");
     811
     812
     813        // Now that the system is up, finish creating systems that need threading
     814        __kernel_io_finish_start( *mainCluster );
     815
     816
     817        __cfadbg_print_safe(runtime_core, "Kernel : Started\n--------------------------------------------------\n\n");
    778818
    779819        verify( ! kernelTLS.preemption_state.enabled );
     
    782822}
    783823
    784 static void kernel_shutdown(void) {
    785         __cfaabi_dbg_print_safe("\n--------------------------------------------------\nKernel : Shutting down\n");
    786 
    787         verify( TL_GET( preemption_state.enabled ) );
     824static void __kernel_shutdown(void) {
     825        //Before we start shutting things down, wait for systems that need threading to shutdown
     826        __kernel_io_prepare_stop( *mainCluster );
     827
     828        /* paranoid */ verify( TL_GET( preemption_state.enabled ) );
    788829        disable_interrupts();
    789         verify( ! kernelTLS.preemption_state.enabled );
     830        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     831
     832        __cfadbg_print_safe(runtime_core, "\n--------------------------------------------------\nKernel : Shutting down\n");
    790833
    791834        // SKULLDUGGERY: Notify the mainProcessor it needs to terminates.
     
    793836        // which is currently here
    794837        __atomic_store_n(&mainProcessor->do_terminate, true, __ATOMIC_RELEASE);
    795         kernel_last_resume( kernelTLS.this_processor );
     838        __kernel_last_resume( kernelTLS.this_processor );
    796839        mainThread->self_cor.state = Halted;
    797840
     
    805848        // Destroy the main processor and its context in reverse order of construction
    806849        // These were manually constructed so we need manually destroy them
    807         void ^?{}(processor & this) with( this ) {
    808                 //don't join the main thread here, that wouldn't make any sense
     850        void ^?{}(processor & this) with( this ){
     851                /* paranoid */ verify( this.do_terminate == true );
    809852                __cfaabi_dbg_print_safe("Kernel : destroyed main processor context %p\n", &runner);
    810853        }
     
    813856
    814857        // Final step, destroy the main thread since it is no longer needed
    815         // Since we provided a stack to this task it will not destroy anything
     858
     859        // Since we provided a stack to this taxk it will not destroy anything
     860        /* paranoid */ verify(mainThread->self_cor.stack.storage == (__stack_t*)(((uintptr_t)&storage_mainThreadCtx)| 0x1));
    816861        ^(*mainThread){};
    817862
     
    821866        ^(__cfa_dbg_global_clusters.lock){};
    822867
    823         __cfaabi_dbg_print_safe("Kernel : Shutdown complete\n");
     868        __cfadbg_print_safe(runtime_core, "Kernel : Shutdown complete\n");
    824869}
    825870
    826871//=============================================================================================
    827 // Kernel Quiescing
     872// Kernel Idle Sleep
    828873//=============================================================================================
    829 static void halt(processor * this) with( *this ) {
    830         // // verify( ! __atomic_load_n(&do_terminate, __ATOMIC_SEQ_CST) );
    831 
    832         // with( *cltr ) {
    833         //      lock      (proc_list_lock __cfaabi_dbg_ctx2);
    834         //      push_front(idles, *this);
    835         //      unlock    (proc_list_lock);
    836         // }
    837 
    838         // __cfaabi_dbg_print_safe("Kernel : Processor %p ready to sleep\n", this);
    839 
    840         // wait( idleLock );
    841 
    842         // __cfaabi_dbg_print_safe("Kernel : Processor %p woke up and ready to run\n", this);
    843 
    844         // with( *cltr ) {
    845         //      lock      (proc_list_lock __cfaabi_dbg_ctx2);
    846         //      remove    (idles, *this);
    847         //      unlock    (proc_list_lock);
    848         // }
     874static $thread * __halt(processor * this) with( *this ) {
     875        if( do_terminate ) return 0p;
     876
     877        // First, lock the cluster idle
     878        lock( cltr->idle_lock __cfaabi_dbg_ctx2 );
     879
     880        // Check if we can find a thread
     881        if( $thread * found = __next_thread( cltr ) ) {
     882                unlock( cltr->idle_lock );
     883                return found;
     884        }
     885
     886        // Move this processor from the active list to the idle list
     887        move_to_front(cltr->procs, cltr->idles, *this);
     888
     889        // Unlock the idle lock so we don't go to sleep with a lock
     890        unlock    (cltr->idle_lock);
     891
     892        // We are ready to sleep
     893        __cfadbg_print_safe(runtime_core, "Kernel : Processor %p ready to sleep\n", this);
     894        wait( idle );
     895
     896        // We have woken up
     897        __cfadbg_print_safe(runtime_core, "Kernel : Processor %p woke up and ready to run\n", this);
     898
     899        // Get ourself off the idle list
     900        with( *cltr ) {
     901                lock  (idle_lock __cfaabi_dbg_ctx2);
     902                move_to_front(idles, procs, *this);
     903                unlock(idle_lock);
     904        }
     905
     906        // Don't check the ready queue again, we may not be in a position to run a thread
     907        return 0p;
     908}
     909
     910// Wake a thread from the front if there are any
     911static bool __wake_one(cluster * this, __attribute__((unused)) bool force) {
     912        // if we don't want to force check if we know it's false
     913        // if( !this->idles.head && !force ) return false;
     914
     915        // First, lock the cluster idle
     916        lock( this->idle_lock __cfaabi_dbg_ctx2 );
     917
     918        // Check if there is someone to wake up
     919        if( !this->idles.head ) {
     920                // Nope unlock and return false
     921                unlock( this->idle_lock );
     922                return false;
     923        }
     924
     925        // Wake them up
     926        __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this->idles.head);
     927        /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     928        post( this->idles.head->idle );
     929
     930        // Unlock and return true
     931        unlock( this->idle_lock );
     932        return true;
     933}
     934
     935// Unconditionnaly wake a thread
     936static bool __wake_proc(processor * this) {
     937        __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this);
     938
     939        disable_interrupts();
     940                /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     941                bool ret = post( this->idle );
     942        enable_interrupts( __cfaabi_dbg_ctx );
     943
     944        return ret;
    849945}
    850946
     
    880976
    881977void kernel_abort_msg( void * kernel_data, char * abort_text, int abort_text_size ) {
    882         thread_desc * thrd = kernel_data;
     978        $thread * thrd = kernel_data;
    883979
    884980        if(thrd) {
     
    9281024void ^?{}(semaphore & this) {}
    9291025
    930 void P(semaphore & this) with( this ){
     1026bool P(semaphore & this) with( this ){
    9311027        lock( lock __cfaabi_dbg_ctx2 );
    9321028        count -= 1;
     
    9361032
    9371033                // atomically release spin lock and block
    938                 BlockInternal( &lock );
     1034                unlock( lock );
     1035                park( __cfaabi_dbg_ctx );
     1036                return true;
    9391037        }
    9401038        else {
    9411039            unlock( lock );
    942         }
    943 }
    944 
    945 void V(semaphore & this) with( this ) {
    946         thread_desc * thrd = 0p;
     1040            return false;
     1041        }
     1042}
     1043
     1044bool V(semaphore & this) with( this ) {
     1045        $thread * thrd = 0p;
    9471046        lock( lock __cfaabi_dbg_ctx2 );
    9481047        count += 1;
     
    9551054
    9561055        // make new owner
    957         WakeThread( thrd );
     1056        unpark( thrd __cfaabi_dbg_ctx2 );
     1057
     1058        return thrd != 0p;
     1059}
     1060
     1061bool V(semaphore & this, unsigned diff) with( this ) {
     1062        $thread * thrd = 0p;
     1063        lock( lock __cfaabi_dbg_ctx2 );
     1064        int release = max(-count, (int)diff);
     1065        count += diff;
     1066        for(release) {
     1067                unpark( pop_head( waiting ) __cfaabi_dbg_ctx2 );
     1068        }
     1069
     1070        unlock( lock );
     1071
     1072        return thrd != 0p;
    9581073}
    9591074
     
    9721087}
    9731088
    974 void doregister( cluster * cltr, thread_desc & thrd ) {
     1089void doregister( cluster * cltr, $thread & thrd ) {
    9751090        lock      (cltr->thread_list_lock __cfaabi_dbg_ctx2);
    9761091        cltr->nthreads += 1;
     
    9791094}
    9801095
    981 void unregister( cluster * cltr, thread_desc & thrd ) {
     1096void unregister( cluster * cltr, $thread & thrd ) {
    9821097        lock  (cltr->thread_list_lock __cfaabi_dbg_ctx2);
    9831098        remove(cltr->threads, thrd );
     
    9901105__cfaabi_dbg_debug_do(
    9911106        extern "C" {
    992                 void __cfaabi_dbg_record(__spinlock_t & this, const char prev_name[]) {
     1107                void __cfaabi_dbg_record_lock(__spinlock_t & this, const char prev_name[]) {
    9931108                        this.prev_name = prev_name;
    9941109                        this.prev_thrd = kernelTLS.this_thread;
    9951110                }
     1111
     1112                void __cfaabi_dbg_record_thrd($thread & this, bool park, const char prev_name[]) {
     1113                        if(park) {
     1114                                this.park_caller   = prev_name;
     1115                                this.park_stale    = false;
     1116                        }
     1117                        else {
     1118                                this.unpark_caller = prev_name;
     1119                                this.unpark_stale  = false;
     1120                        }
     1121                }
    9961122        }
    9971123)
     
    9991125//-----------------------------------------------------------------------------
    10001126// Debug
    1001 bool threading_enabled(void) {
     1127bool threading_enabled(void) __attribute__((const)) {
    10021128        return true;
    10031129}
  • libcfa/src/concurrency/kernel.hfa

    rb7d6a36 r6a490b2  
    1717
    1818#include <stdbool.h>
     19#include <stdint.h>
    1920
    2021#include "invoke.h"
     
    3233        __spinlock_t lock;
    3334        int count;
    34         __queue_t(thread_desc) waiting;
     35        __queue_t($thread) waiting;
    3536};
    3637
    3738void  ?{}(semaphore & this, int count = 1);
    3839void ^?{}(semaphore & this);
    39 void   P (semaphore & this);
    40 void   V (semaphore & this);
     40bool   P (semaphore & this);
     41bool   V (semaphore & this);
     42bool   V (semaphore & this, unsigned count);
    4143
    4244
     
    4446// Processor
    4547extern struct cluster * mainCluster;
    46 
    47 enum FinishOpCode { No_Action, Release, Schedule, Release_Schedule, Release_Multi, Release_Multi_Schedule, Callback };
    48 
    49 typedef void (*__finish_callback_fptr_t)(void);
    50 
    51 //TODO use union, many of these fields are mutually exclusive (i.e. MULTI vs NOMULTI)
    52 struct FinishAction {
    53         FinishOpCode action_code;
    54         /*
    55         // Union of possible actions
    56         union {
    57                 // Option 1 : locks and threads
    58                 struct {
    59                         // 1 thread or N thread
    60                         union {
    61                                 thread_desc * thrd;
    62                                 struct {
    63                                         thread_desc ** thrds;
    64                                         unsigned short thrd_count;
    65                                 };
    66                         };
    67                         // 1 lock or N lock
    68                         union {
    69                                 __spinlock_t * lock;
    70                                 struct {
    71                                         __spinlock_t ** locks;
    72                                         unsigned short lock_count;
    73                                 };
    74                         };
    75                 };
    76                 // Option 2 : action pointer
    77                 __finish_callback_fptr_t callback;
    78         };
    79         /*/
    80         thread_desc * thrd;
    81         thread_desc ** thrds;
    82         unsigned short thrd_count;
    83         __spinlock_t * lock;
    84         __spinlock_t ** locks;
    85         unsigned short lock_count;
    86         __finish_callback_fptr_t callback;
    87         //*/
    88 };
    89 static inline void ?{}(FinishAction & this) {
    90         this.action_code = No_Action;
    91         this.thrd = 0p;
    92         this.lock = 0p;
    93 }
    94 static inline void ^?{}(FinishAction &) {}
    9548
    9649// Processor
     
    11770        // RunThread data
    11871        // Action to do after a thread is ran
    119         struct FinishAction finish;
     72        $thread * destroyer;
    12073
    12174        // Preemption data
     
    12679        bool pending_preemption;
    12780
    128         // Idle lock
    129         __bin_sem_t idleLock;
     81        // Idle lock (kernel semaphore)
     82        __bin_sem_t idle;
    13083
    13184        // Termination
     
    13386        volatile bool do_terminate;
    13487
    135         // Termination synchronisation
     88        // Termination synchronisation (user semaphore)
    13689        semaphore terminated;
    13790
     
    158111static inline void  ?{}(processor & this, const char name[]) { this{name, *mainCluster }; }
    159112
    160 static inline [processor *&, processor *& ] __get( processor & this ) {
    161         return this.node.[next, prev];
    162 }
     113static inline [processor *&, processor *& ] __get( processor & this ) __attribute__((const)) { return this.node.[next, prev]; }
     114
     115//-----------------------------------------------------------------------------
     116// I/O
     117struct __io_data;
     118
     119#define CFA_CLUSTER_IO_POLLER_USER_THREAD 1 << 0
     120// #define CFA_CLUSTER_IO_POLLER_KERNEL_SIDE 1 << 1
    163121
    164122
     
    333291        // List of threads
    334292        __spinlock_t thread_list_lock;
    335         __dllist_t(struct thread_desc) threads;
     293        __dllist_t(struct $thread) threads;
    336294        unsigned int nthreads;
    337295
     
    341299                cluster * prev;
    342300        } node;
     301
     302        struct __io_data * io;
     303
     304        #if !defined(__CFA_NO_STATISTICS__)
     305                bool print_stats;
     306        #endif
    343307};
    344308extern Duration default_preemption();
    345309
    346 void ?{} (cluster & this, const char name[], Duration preemption_rate);
     310void ?{} (cluster & this, const char name[], Duration preemption_rate, int flags);
    347311void ^?{}(cluster & this);
    348312
    349 static inline void ?{} (cluster & this)                           { this{"Anonymous Cluster", default_preemption()}; }
    350 static inline void ?{} (cluster & this, Duration preemption_rate) { this{"Anonymous Cluster", preemption_rate}; }
    351 static inline void ?{} (cluster & this, const char name[])        { this{name, default_preemption()}; }
    352 
    353 static inline [cluster *&, cluster *& ] __get( cluster & this ) {
    354         return this.node.[next, prev];
    355 }
     313static inline void ?{} (cluster & this)                                      { this{"Anonymous Cluster", default_preemption(), 0}; }
     314static inline void ?{} (cluster & this, Duration preemption_rate)            { this{"Anonymous Cluster", preemption_rate, 0}; }
     315static inline void ?{} (cluster & this, const char name[])                   { this{name, default_preemption(), 0}; }
     316static inline void ?{} (cluster & this, int flags)                           { this{"Anonymous Cluster", default_preemption(), flags}; }
     317static inline void ?{} (cluster & this, Duration preemption_rate, int flags) { this{"Anonymous Cluster", preemption_rate, flags}; }
     318static inline void ?{} (cluster & this, const char name[], int flags)        { this{name, default_preemption(), flags}; }
     319
     320static inline [cluster *&, cluster *& ] __get( cluster & this ) __attribute__((const)) { return this.node.[next, prev]; }
    356321
    357322static inline struct processor * active_processor() { return TL_GET( this_processor ); } // UNSAFE
    358323static inline struct cluster   * active_cluster  () { return TL_GET( this_processor )->cltr; }
     324
     325#if !defined(__CFA_NO_STATISTICS__)
     326        static inline void print_stats_at_exit( cluster & this ) {
     327                this.print_stats = true;
     328        }
     329#endif
    359330
    360331// Local Variables: //
  • libcfa/src/concurrency/kernel_private.hfa

    rb7d6a36 r6a490b2  
    3131}
    3232
    33 void ScheduleThread( thread_desc * );
    34 static inline void WakeThread( thread_desc * thrd ) {
    35         if( !thrd ) return;
    36 
    37         verify(thrd->state == Inactive);
    38 
    39         disable_interrupts();
    40         ScheduleThread( thrd );
    41         enable_interrupts( __cfaabi_dbg_ctx );
    42 }
    43 thread_desc * nextThread(cluster * this);
     33void __schedule_thread( $thread * ) __attribute__((nonnull (1)));
    4434
    4535//Block current thread and release/wake-up the following resources
    46 void BlockInternal(void);
    47 void BlockInternal(__spinlock_t * lock);
    48 void BlockInternal(thread_desc * thrd);
    49 void BlockInternal(__spinlock_t * lock, thread_desc * thrd);
    50 void BlockInternal(__spinlock_t * locks [], unsigned short count);
    51 void BlockInternal(__spinlock_t * locks [], unsigned short count, thread_desc * thrds [], unsigned short thrd_count);
    52 void BlockInternal(__finish_callback_fptr_t callback);
    53 void LeaveThread(__spinlock_t * lock, thread_desc * thrd);
     36void __leave_thread() __attribute__((noreturn));
    5437
    5538//-----------------------------------------------------------------------------
     
    5740void main(processorCtx_t *);
    5841
    59 void * create_pthread( pthread_t *, void * (*)(void *), void * );
    60 
    61 static inline void wake_fast(processor * this) {
    62         __cfaabi_dbg_print_safe("Kernel : Waking up processor %p\n", this);
    63         post( this->idleLock );
    64 }
    65 
    66 static inline void wake(processor * this) {
    67         disable_interrupts();
    68         wake_fast(this);
    69         enable_interrupts( __cfaabi_dbg_ctx );
    70 }
     42void * __create_pthread( pthread_t *, void * (*)(void *), void * );
     43
     44
    7145
    7246struct event_kernel_t {
     
    8559extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" )));
    8660
     61extern cluster * mainCluster;
     62
    8763//-----------------------------------------------------------------------------
    8864// Threads
    8965extern "C" {
    90       void CtxInvokeThread(void (*main)(void *), void * this);
    91 }
    92 
    93 extern void ThreadCtxSwitch(coroutine_desc * src, coroutine_desc * dst);
     66      void __cfactx_invoke_thread(void (*main)(void *), void * this);
     67}
    9468
    9569__cfaabi_dbg_debug_do(
    96         extern void __cfaabi_dbg_thread_register  ( thread_desc * thrd );
    97         extern void __cfaabi_dbg_thread_unregister( thread_desc * thrd );
     70        extern void __cfaabi_dbg_thread_register  ( $thread * thrd );
     71        extern void __cfaabi_dbg_thread_unregister( $thread * thrd );
    9872)
     73
     74// KERNEL ONLY unpark with out disabling interrupts
     75void __unpark( $thread * thrd __cfaabi_dbg_ctx_param2 );
     76
     77//-----------------------------------------------------------------------------
     78// I/O
     79void __kernel_io_startup     ( cluster &, int, bool );
     80void __kernel_io_finish_start( cluster & );
     81void __kernel_io_prepare_stop( cluster & );
     82void __kernel_io_shutdown    ( cluster &, bool );
    9983
    10084//-----------------------------------------------------------------------------
     
    10286#define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]
    10387
    104 static inline uint32_t tls_rand() {
     88static inline uint32_t __tls_rand() {
    10589        kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6;
    10690        kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21;
     
    11397void unregister( struct cluster & cltr );
    11498
    115 void doregister( struct cluster * cltr, struct thread_desc & thrd );
    116 void unregister( struct cluster * cltr, struct thread_desc & thrd );
     99void doregister( struct cluster * cltr, struct $thread & thrd );
     100void unregister( struct cluster * cltr, struct $thread & thrd );
    117101
    118102//=======================================================================
  • libcfa/src/concurrency/monitor.cfa

    rb7d6a36 r6a490b2  
    55// file "LICENCE" distributed with Cforall.
    66//
    7 // monitor_desc.c --
     7// $monitor.c --
    88//
    99// Author           : Thierry Delisle
     
    2727//-----------------------------------------------------------------------------
    2828// Forward declarations
    29 static inline void set_owner ( monitor_desc * this, thread_desc * owner );
    30 static inline void set_owner ( monitor_desc * storage [], __lock_size_t count, thread_desc * owner );
    31 static inline void set_mask  ( monitor_desc * storage [], __lock_size_t count, const __waitfor_mask_t & mask );
    32 static inline void reset_mask( monitor_desc * this );
    33 
    34 static inline thread_desc * next_thread( monitor_desc * this );
    35 static inline bool is_accepted( monitor_desc * this, const __monitor_group_t & monitors );
     29static inline void __set_owner ( $monitor * this, $thread * owner );
     30static inline void __set_owner ( $monitor * storage [], __lock_size_t count, $thread * owner );
     31static inline void set_mask  ( $monitor * storage [], __lock_size_t count, const __waitfor_mask_t & mask );
     32static inline void reset_mask( $monitor * this );
     33
     34static inline $thread * next_thread( $monitor * this );
     35static inline bool is_accepted( $monitor * this, const __monitor_group_t & monitors );
    3636
    3737static inline void lock_all  ( __spinlock_t * locks [], __lock_size_t count );
    38 static inline void lock_all  ( monitor_desc * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count );
     38static inline void lock_all  ( $monitor * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count );
    3939static inline void unlock_all( __spinlock_t * locks [], __lock_size_t count );
    40 static inline void unlock_all( monitor_desc * locks [], __lock_size_t count );
    41 
    42 static inline void save   ( monitor_desc * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*out*/ recursions [], __waitfor_mask_t /*out*/ masks [] );
    43 static inline void restore( monitor_desc * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*in */ recursions [], __waitfor_mask_t /*in */ masks [] );
    44 
    45 static inline void init     ( __lock_size_t count, monitor_desc * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] );
    46 static inline void init_push( __lock_size_t count, monitor_desc * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] );
    47 
    48 static inline thread_desc *        check_condition   ( __condition_criterion_t * );
     40static inline void unlock_all( $monitor * locks [], __lock_size_t count );
     41
     42static inline void save   ( $monitor * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*out*/ recursions [], __waitfor_mask_t /*out*/ masks [] );
     43static inline void restore( $monitor * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*in */ recursions [], __waitfor_mask_t /*in */ masks [] );
     44
     45static inline void init     ( __lock_size_t count, $monitor * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] );
     46static inline void init_push( __lock_size_t count, $monitor * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] );
     47
     48static inline $thread *        check_condition   ( __condition_criterion_t * );
    4949static inline void                 brand_condition   ( condition & );
    50 static inline [thread_desc *, int] search_entry_queue( const __waitfor_mask_t &, monitor_desc * monitors [], __lock_size_t count );
     50static inline [$thread *, int] search_entry_queue( const __waitfor_mask_t &, $monitor * monitors [], __lock_size_t count );
    5151
    5252forall(dtype T | sized( T ))
    5353static inline __lock_size_t insert_unique( T * array [], __lock_size_t & size, T * val );
    5454static inline __lock_size_t count_max    ( const __waitfor_mask_t & mask );
    55 static inline __lock_size_t aggregate    ( monitor_desc * storage [], const __waitfor_mask_t & mask );
     55static inline __lock_size_t aggregate    ( $monitor * storage [], const __waitfor_mask_t & mask );
    5656
    5757//-----------------------------------------------------------------------------
     
    6868
    6969#define monitor_ctx( mons, cnt )                                /* Define that create the necessary struct for internal/external scheduling operations */ \
    70         monitor_desc ** monitors = mons;                          /* Save the targeted monitors                                                          */ \
     70        $monitor ** monitors = mons;                          /* Save the targeted monitors                                                          */ \
    7171        __lock_size_t count = cnt;                                /* Save the count to a local variable                                                  */ \
    7272        unsigned int recursions[ count ];                         /* Save the current recursion levels to restore them later                             */ \
     
    8080//-----------------------------------------------------------------------------
    8181// Enter/Leave routines
    82 
    83 
    84 extern "C" {
    85         // Enter single monitor
    86         static void __enter_monitor_desc( monitor_desc * this, const __monitor_group_t & group ) {
    87                 // Lock the monitor spinlock
    88                 lock( this->lock __cfaabi_dbg_ctx2 );
    89                 // Interrupts disable inside critical section
    90                 thread_desc * thrd = kernelTLS.this_thread;
    91 
    92                 __cfaabi_dbg_print_safe( "Kernel : %10p Entering mon %p (%p)\n", thrd, this, this->owner);
    93 
    94                 if( !this->owner ) {
    95                         // No one has the monitor, just take it
    96                         set_owner( this, thrd );
    97 
    98                         __cfaabi_dbg_print_safe( "Kernel :  mon is free \n" );
    99                 }
    100                 else if( this->owner == thrd) {
    101                         // We already have the monitor, just note how many times we took it
    102                         this->recursion += 1;
    103 
    104                         __cfaabi_dbg_print_safe( "Kernel :  mon already owned \n" );
    105                 }
    106                 else if( is_accepted( this, group) ) {
    107                         // Some one was waiting for us, enter
    108                         set_owner( this, thrd );
    109 
    110                         // Reset mask
    111                         reset_mask( this );
    112 
    113                         __cfaabi_dbg_print_safe( "Kernel :  mon accepts \n" );
    114                 }
    115                 else {
    116                         __cfaabi_dbg_print_safe( "Kernel :  blocking \n" );
    117 
    118                         // Some one else has the monitor, wait in line for it
    119                         append( this->entry_queue, thrd );
    120 
    121                         BlockInternal( &this->lock );
    122 
    123                         __cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
    124 
    125                         // BlockInternal will unlock spinlock, no need to unlock ourselves
    126                         return;
    127                 }
     82// Enter single monitor
     83static void __enter( $monitor * this, const __monitor_group_t & group ) {
     84        // Lock the monitor spinlock
     85        lock( this->lock __cfaabi_dbg_ctx2 );
     86        // Interrupts disable inside critical section
     87        $thread * thrd = kernelTLS.this_thread;
     88
     89        __cfaabi_dbg_print_safe( "Kernel : %10p Entering mon %p (%p)\n", thrd, this, this->owner);
     90
     91        if( !this->owner ) {
     92                // No one has the monitor, just take it
     93                __set_owner( this, thrd );
     94
     95                __cfaabi_dbg_print_safe( "Kernel :  mon is free \n" );
     96        }
     97        else if( this->owner == thrd) {
     98                // We already have the monitor, just note how many times we took it
     99                this->recursion += 1;
     100
     101                __cfaabi_dbg_print_safe( "Kernel :  mon already owned \n" );
     102        }
     103        else if( is_accepted( this, group) ) {
     104                // Some one was waiting for us, enter
     105                __set_owner( this, thrd );
     106
     107                // Reset mask
     108                reset_mask( this );
     109
     110                __cfaabi_dbg_print_safe( "Kernel :  mon accepts \n" );
     111        }
     112        else {
     113                __cfaabi_dbg_print_safe( "Kernel :  blocking \n" );
     114
     115                // Some one else has the monitor, wait in line for it
     116                /* paranoid */ verify( thrd->next == 0p );
     117                append( this->entry_queue, thrd );
     118                /* paranoid */ verify( thrd->next == 1p );
     119
     120                unlock( this->lock );
     121                park( __cfaabi_dbg_ctx );
    128122
    129123                __cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
    130124
    131                 // Release the lock and leave
     125                /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     126                return;
     127        }
     128
     129        __cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
     130
     131        /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     132        /* paranoid */ verify( this->lock.lock );
     133
     134        // Release the lock and leave
     135        unlock( this->lock );
     136        return;
     137}
     138
     139static void __dtor_enter( $monitor * this, fptr_t func ) {
     140        // Lock the monitor spinlock
     141        lock( this->lock __cfaabi_dbg_ctx2 );
     142        // Interrupts disable inside critical section
     143        $thread * thrd = kernelTLS.this_thread;
     144
     145        __cfaabi_dbg_print_safe( "Kernel : %10p Entering dtor for mon %p (%p)\n", thrd, this, this->owner);
     146
     147
     148        if( !this->owner ) {
     149                __cfaabi_dbg_print_safe( "Kernel : Destroying free mon %p\n", this);
     150
     151                // No one has the monitor, just take it
     152                __set_owner( this, thrd );
     153
     154                verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     155
    132156                unlock( this->lock );
    133157                return;
    134158        }
    135 
    136         static void __enter_monitor_dtor( monitor_desc * this, fptr_t func ) {
    137                 // Lock the monitor spinlock
    138                 lock( this->lock __cfaabi_dbg_ctx2 );
    139                 // Interrupts disable inside critical section
    140                 thread_desc * thrd = kernelTLS.this_thread;
    141 
    142                 __cfaabi_dbg_print_safe( "Kernel : %10p Entering dtor for mon %p (%p)\n", thrd, this, this->owner);
    143 
    144 
    145                 if( !this->owner ) {
    146                         __cfaabi_dbg_print_safe( "Kernel : Destroying free mon %p\n", this);
    147 
    148                         // No one has the monitor, just take it
    149                         set_owner( this, thrd );
    150 
    151                         unlock( this->lock );
    152                         return;
     159        else if( this->owner == thrd) {
     160                // We already have the monitor... but where about to destroy it so the nesting will fail
     161                // Abort!
     162                abort( "Attempt to destroy monitor %p by thread \"%.256s\" (%p) in nested mutex.", this, thrd->self_cor.name, thrd );
     163        }
     164
     165        __lock_size_t count = 1;
     166        $monitor ** monitors = &this;
     167        __monitor_group_t group = { &this, 1, func };
     168        if( is_accepted( this, group) ) {
     169                __cfaabi_dbg_print_safe( "Kernel :  mon accepts dtor, block and signal it \n" );
     170
     171                // Wake the thread that is waiting for this
     172                __condition_criterion_t * urgent = pop( this->signal_stack );
     173                /* paranoid */ verify( urgent );
     174
     175                // Reset mask
     176                reset_mask( this );
     177
     178                // Create the node specific to this wait operation
     179                wait_ctx_primed( thrd, 0 )
     180
     181                // Some one else has the monitor, wait for him to finish and then run
     182                unlock( this->lock );
     183
     184                // Release the next thread
     185                /* paranoid */ verifyf( urgent->owner->waiting_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     186                unpark( urgent->owner->waiting_thread __cfaabi_dbg_ctx2 );
     187
     188                // Park current thread waiting
     189                park( __cfaabi_dbg_ctx );
     190
     191                // Some one was waiting for us, enter
     192                /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     193        }
     194        else {
     195                __cfaabi_dbg_print_safe( "Kernel :  blocking \n" );
     196
     197                wait_ctx( thrd, 0 )
     198                this->dtor_node = &waiter;
     199
     200                // Some one else has the monitor, wait in line for it
     201                /* paranoid */ verify( thrd->next == 0p );
     202                append( this->entry_queue, thrd );
     203                /* paranoid */ verify( thrd->next == 1p );
     204                unlock( this->lock );
     205
     206                // Park current thread waiting
     207                park( __cfaabi_dbg_ctx );
     208
     209                /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     210                return;
     211        }
     212
     213        __cfaabi_dbg_print_safe( "Kernel : Destroying %p\n", this);
     214
     215}
     216
     217// Leave single monitor
     218void __leave( $monitor * this ) {
     219        // Lock the monitor spinlock
     220        lock( this->lock __cfaabi_dbg_ctx2 );
     221
     222        __cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", kernelTLS.this_thread, this, this->owner);
     223
     224        /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     225
     226        // Leaving a recursion level, decrement the counter
     227        this->recursion -= 1;
     228
     229        // If we haven't left the last level of recursion
     230        // it means we don't need to do anything
     231        if( this->recursion != 0) {
     232                __cfaabi_dbg_print_safe( "Kernel :  recursion still %d\n", this->recursion);
     233                unlock( this->lock );
     234                return;
     235        }
     236
     237        // Get the next thread, will be null on low contention monitor
     238        $thread * new_owner = next_thread( this );
     239
     240        // Check the new owner is consistent with who we wake-up
     241        // new_owner might be null even if someone owns the monitor when the owner is still waiting for another monitor
     242        /* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
     243
     244        // We can now let other threads in safely
     245        unlock( this->lock );
     246
     247        //We need to wake-up the thread
     248        /* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
     249        unpark( new_owner __cfaabi_dbg_ctx2 );
     250}
     251
     252// Leave single monitor for the last time
     253void __dtor_leave( $monitor * this ) {
     254        __cfaabi_dbg_debug_do(
     255                if( TL_GET( this_thread ) != this->owner ) {
     256                        abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, TL_GET( this_thread ), this->owner);
    153257                }
    154                 else if( this->owner == thrd) {
    155                         // We already have the monitor... but where about to destroy it so the nesting will fail
    156                         // Abort!
    157                         abort( "Attempt to destroy monitor %p by thread \"%.256s\" (%p) in nested mutex.", this, thrd->self_cor.name, thrd );
     258                if( this->recursion != 1 ) {
     259                        abort( "Destroyed monitor %p has %d outstanding nested calls.\n", this, this->recursion - 1);
    158260                }
    159 
    160                 __lock_size_t count = 1;
    161                 monitor_desc ** monitors = &this;
    162                 __monitor_group_t group = { &this, 1, func };
    163                 if( is_accepted( this, group) ) {
    164                         __cfaabi_dbg_print_safe( "Kernel :  mon accepts dtor, block and signal it \n" );
    165 
    166                         // Wake the thread that is waiting for this
    167                         __condition_criterion_t * urgent = pop( this->signal_stack );
    168                         verify( urgent );
    169 
    170                         // Reset mask
    171                         reset_mask( this );
    172 
    173                         // Create the node specific to this wait operation
    174                         wait_ctx_primed( thrd, 0 )
    175 
    176                         // Some one else has the monitor, wait for him to finish and then run
    177                         BlockInternal( &this->lock, urgent->owner->waiting_thread );
    178 
    179                         // Some one was waiting for us, enter
    180                         set_owner( this, thrd );
    181                 }
    182                 else {
    183                         __cfaabi_dbg_print_safe( "Kernel :  blocking \n" );
    184 
    185                         wait_ctx( thrd, 0 )
    186                         this->dtor_node = &waiter;
    187 
    188                         // Some one else has the monitor, wait in line for it
    189                         append( this->entry_queue, thrd );
    190                         BlockInternal( &this->lock );
    191 
    192                         // BlockInternal will unlock spinlock, no need to unlock ourselves
    193                         return;
    194                 }
    195 
    196                 __cfaabi_dbg_print_safe( "Kernel : Destroying %p\n", this);
    197 
    198         }
    199 
    200         // Leave single monitor
    201         void __leave_monitor_desc( monitor_desc * this ) {
    202                 // Lock the monitor spinlock
    203                 lock( this->lock __cfaabi_dbg_ctx2 );
    204 
    205                 __cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", kernelTLS.this_thread, this, this->owner);
    206 
    207                 verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
    208 
    209                 // Leaving a recursion level, decrement the counter
    210                 this->recursion -= 1;
    211 
    212                 // If we haven't left the last level of recursion
    213                 // it means we don't need to do anything
    214                 if( this->recursion != 0) {
    215                         __cfaabi_dbg_print_safe( "Kernel :  recursion still %d\n", this->recursion);
    216                         unlock( this->lock );
    217                         return;
    218                 }
    219 
    220                 // Get the next thread, will be null on low contention monitor
    221                 thread_desc * new_owner = next_thread( this );
    222 
    223                 // We can now let other threads in safely
    224                 unlock( this->lock );
    225 
    226                 //We need to wake-up the thread
    227                 WakeThread( new_owner );
    228         }
    229 
    230         // Leave single monitor for the last time
    231         void __leave_dtor_monitor_desc( monitor_desc * this ) {
    232                 __cfaabi_dbg_debug_do(
    233                         if( TL_GET( this_thread ) != this->owner ) {
    234                                 abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, TL_GET( this_thread ), this->owner);
    235                         }
    236                         if( this->recursion != 1 ) {
    237                                 abort( "Destroyed monitor %p has %d outstanding nested calls.\n", this, this->recursion - 1);
    238                         }
    239                 )
    240         }
    241 
     261        )
     262}
     263
     264extern "C" {
    242265        // Leave the thread monitor
    243266        // last routine called by a thread.
    244267        // Should never return
    245         void __leave_thread_monitor() {
    246                 thread_desc * thrd = TL_GET( this_thread );
    247                 monitor_desc * this = &thrd->self_mon;
     268        void __cfactx_thrd_leave() {
     269                $thread * thrd = TL_GET( this_thread );
     270                $monitor * this = &thrd->self_mon;
    248271
    249272                // Lock the monitor now
     
    252275                disable_interrupts();
    253276
    254                 thrd->self_cor.state = Halted;
    255 
    256                 verifyf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", thrd, this->owner, this->recursion, this );
     277                thrd->state = Halted;
     278
     279                /* paranoid */ verifyf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", thrd, this->owner, this->recursion, this );
    257280
    258281                // Leaving a recursion level, decrement the counter
     
    264287
    265288                // Fetch the next thread, can be null
    266                 thread_desc * new_owner = next_thread( this );
    267 
    268                 // Leave the thread, this will unlock the spinlock
    269                 // Use leave thread instead of BlockInternal which is
    270                 // specialized for this case and supports null new_owner
    271                 LeaveThread( &this->lock, new_owner );
     289                $thread * new_owner = next_thread( this );
     290
     291                // Release the monitor lock
     292                unlock( this->lock );
     293
     294                // Unpark the next owner if needed
     295                /* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
     296                /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     297                /* paranoid */ verify( ! kernelTLS.this_processor->destroyer );
     298                /* paranoid */ verify( thrd->state == Halted );
     299
     300                kernelTLS.this_processor->destroyer = new_owner;
     301
     302                // Leave the thread
     303                __leave_thread();
    272304
    273305                // Control flow should never reach here!
     
    279311static inline void enter( __monitor_group_t monitors ) {
    280312        for( __lock_size_t i = 0; i < monitors.size; i++) {
    281                 __enter_monitor_desc( monitors[i], monitors );
     313                __enter( monitors[i], monitors );
    282314        }
    283315}
     
    285317// Leave multiple monitor
    286318// relies on the monitor array being sorted
    287 static inline void leave(monitor_desc * monitors [], __lock_size_t count) {
     319static inline void leave($monitor * monitors [], __lock_size_t count) {
    288320        for( __lock_size_t i = count - 1; i >= 0; i--) {
    289                 __leave_monitor_desc( monitors[i] );
     321                __leave( monitors[i] );
    290322        }
    291323}
     
    293325// Ctor for monitor guard
    294326// Sorts monitors before entering
    295 void ?{}( monitor_guard_t & this, monitor_desc * m [], __lock_size_t count, fptr_t func ) {
    296         thread_desc * thrd = TL_GET( this_thread );
     327void ?{}( monitor_guard_t & this, $monitor * m [], __lock_size_t count, fptr_t func ) {
     328        $thread * thrd = TL_GET( this_thread );
    297329
    298330        // Store current array
     
    334366// Ctor for monitor guard
    335367// Sorts monitors before entering
    336 void ?{}( monitor_dtor_guard_t & this, monitor_desc * m [], fptr_t func ) {
     368void ?{}( monitor_dtor_guard_t & this, $monitor * m [], fptr_t func ) {
    337369        // optimization
    338         thread_desc * thrd = TL_GET( this_thread );
     370        $thread * thrd = TL_GET( this_thread );
    339371
    340372        // Store current array
     
    347379        (thrd->monitors){m, 1, func};
    348380
    349         __enter_monitor_dtor( this.m, func );
     381        __dtor_enter( this.m, func );
    350382}
    351383
     
    353385void ^?{}( monitor_dtor_guard_t & this ) {
    354386        // Leave the monitors in order
    355         __leave_dtor_monitor_desc( this.m );
     387        __dtor_leave( this.m );
    356388
    357389        // Restore thread context
     
    361393//-----------------------------------------------------------------------------
    362394// Internal scheduling types
    363 void ?{}(__condition_node_t & this, thread_desc * waiting_thread, __lock_size_t count, uintptr_t user_info ) {
     395void ?{}(__condition_node_t & this, $thread * waiting_thread, __lock_size_t count, uintptr_t user_info ) {
    364396        this.waiting_thread = waiting_thread;
    365397        this.count = count;
     
    375407}
    376408
    377 void ?{}(__condition_criterion_t & this, monitor_desc * target, __condition_node_t & owner ) {
     409void ?{}(__condition_criterion_t & this, $monitor * target, __condition_node_t & owner ) {
    378410        this.ready  = false;
    379411        this.target = target;
     
    400432        // Append the current wait operation to the ones already queued on the condition
    401433        // We don't need locks for that since conditions must always be waited on inside monitor mutual exclusion
     434        /* paranoid */ verify( waiter.next == 0p );
    402435        append( this.blocked, &waiter );
     436        /* paranoid */ verify( waiter.next == 1p );
    403437
    404438        // Lock all monitors (aggregates the locks as well)
     
    407441        // Find the next thread(s) to run
    408442        __lock_size_t thread_count = 0;
    409         thread_desc * threads[ count ];
     443        $thread * threads[ count ];
    410444        __builtin_memset( threads, 0, sizeof( threads ) );
    411445
     
    415449        // Remove any duplicate threads
    416450        for( __lock_size_t i = 0; i < count; i++) {
    417                 thread_desc * new_owner = next_thread( monitors[i] );
     451                $thread * new_owner = next_thread( monitors[i] );
    418452                insert_unique( threads, thread_count, new_owner );
    419453        }
    420454
     455        // Unlock the locks, we don't need them anymore
     456        for(int i = 0; i < count; i++) {
     457                unlock( *locks[i] );
     458        }
     459
     460        // Wake the threads
     461        for(int i = 0; i < thread_count; i++) {
     462                unpark( threads[i] __cfaabi_dbg_ctx2 );
     463        }
     464
    421465        // Everything is ready to go to sleep
    422         BlockInternal( locks, count, threads, thread_count );
     466        park( __cfaabi_dbg_ctx );
    423467
    424468        // We are back, restore the owners and recursions
     
    435479        //Some more checking in debug
    436480        __cfaabi_dbg_debug_do(
    437                 thread_desc * this_thrd = TL_GET( this_thread );
     481                $thread * this_thrd = TL_GET( this_thread );
    438482                if ( this.monitor_count != this_thrd->monitors.size ) {
    439483                        abort( "Signal on condition %p made with different number of monitor(s), expected %zi got %zi", &this, this.monitor_count, this_thrd->monitors.size );
     
    489533
    490534        //Find the thread to run
    491         thread_desc * signallee = pop_head( this.blocked )->waiting_thread;
    492         set_owner( monitors, count, signallee );
     535        $thread * signallee = pop_head( this.blocked )->waiting_thread;
     536        __set_owner( monitors, count, signallee );
    493537
    494538        __cfaabi_dbg_print_buffer_decl( "Kernel : signal_block condition %p (s: %p)\n", &this, signallee );
    495539
     540        // unlock all the monitors
     541        unlock_all( locks, count );
     542
     543        // unpark the thread we signalled
     544        unpark( signallee __cfaabi_dbg_ctx2 );
     545
    496546        //Everything is ready to go to sleep
    497         BlockInternal( locks, count, &signallee, 1 );
     547        park( __cfaabi_dbg_ctx );
    498548
    499549
     
    536586        // Create one!
    537587        __lock_size_t max = count_max( mask );
    538         monitor_desc * mon_storage[max];
     588        $monitor * mon_storage[max];
    539589        __builtin_memset( mon_storage, 0, sizeof( mon_storage ) );
    540590        __lock_size_t actual_count = aggregate( mon_storage, mask );
     
    554604        {
    555605                // Check if the entry queue
    556                 thread_desc * next; int index;
     606                $thread * next; int index;
    557607                [next, index] = search_entry_queue( mask, monitors, count );
    558608
     
    564614                                verifyf( accepted.size == 1,  "ERROR: Accepted dtor has more than 1 mutex parameter." );
    565615
    566                                 monitor_desc * mon2dtor = accepted[0];
     616                                $monitor * mon2dtor = accepted[0];
    567617                                verifyf( mon2dtor->dtor_node, "ERROR: Accepted monitor has no dtor_node." );
    568618
     
    590640
    591641                                // Set the owners to be the next thread
    592                                 set_owner( monitors, count, next );
    593 
    594                                 // Everything is ready to go to sleep
    595                                 BlockInternal( locks, count, &next, 1 );
     642                                __set_owner( monitors, count, next );
     643
     644                                // unlock all the monitors
     645                                unlock_all( locks, count );
     646
     647                                // unpark the thread we signalled
     648                                unpark( next __cfaabi_dbg_ctx2 );
     649
     650                                //Everything is ready to go to sleep
     651                                park( __cfaabi_dbg_ctx );
    596652
    597653                                // We are back, restore the owners and recursions
     
    631687        }
    632688
     689        // unlock all the monitors
     690        unlock_all( locks, count );
     691
    633692        //Everything is ready to go to sleep
    634         BlockInternal( locks, count );
     693        park( __cfaabi_dbg_ctx );
    635694
    636695
     
    649708// Utilities
    650709
    651 static inline void set_owner( monitor_desc * this, thread_desc * owner ) {
    652         // __cfaabi_dbg_print_safe( "Kernal :   Setting owner of %p to %p ( was %p)\n", this, owner, this->owner );
     710static inline void __set_owner( $monitor * this, $thread * owner ) {
     711        /* paranoid */ verify( this->lock.lock );
    653712
    654713        //Pass the monitor appropriately
     
    659718}
    660719
    661 static inline void set_owner( monitor_desc * monitors [], __lock_size_t count, thread_desc * owner ) {
    662         monitors[0]->owner     = owner;
    663         monitors[0]->recursion = 1;
     720static inline void __set_owner( $monitor * monitors [], __lock_size_t count, $thread * owner ) {
     721        /* paranoid */ verify ( monitors[0]->lock.lock );
     722        /* paranoid */ verifyf( monitors[0]->owner == kernelTLS.this_thread, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, monitors[0]->owner, monitors[0]->recursion, monitors[0] );
     723        monitors[0]->owner        = owner;
     724        monitors[0]->recursion    = 1;
    664725        for( __lock_size_t i = 1; i < count; i++ ) {
    665                 monitors[i]->owner     = owner;
    666                 monitors[i]->recursion = 0;
    667         }
    668 }
    669 
    670 static inline void set_mask( monitor_desc * storage [], __lock_size_t count, const __waitfor_mask_t & mask ) {
     726                /* paranoid */ verify ( monitors[i]->lock.lock );
     727                /* paranoid */ verifyf( monitors[i]->owner == kernelTLS.this_thread, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, monitors[i]->owner, monitors[i]->recursion, monitors[i] );
     728                monitors[i]->owner        = owner;
     729                monitors[i]->recursion    = 0;
     730        }
     731}
     732
     733static inline void set_mask( $monitor * storage [], __lock_size_t count, const __waitfor_mask_t & mask ) {
    671734        for( __lock_size_t i = 0; i < count; i++) {
    672735                storage[i]->mask = mask;
     
    674737}
    675738
    676 static inline void reset_mask( monitor_desc * this ) {
     739static inline void reset_mask( $monitor * this ) {
    677740        this->mask.accepted = 0p;
    678741        this->mask.data = 0p;
     
    680743}
    681744
    682 static inline thread_desc * next_thread( monitor_desc * this ) {
     745static inline $thread * next_thread( $monitor * this ) {
    683746        //Check the signaller stack
    684747        __cfaabi_dbg_print_safe( "Kernel :  mon %p AS-stack top %p\n", this, this->signal_stack.top);
     
    688751                //regardless of if we are ready to baton pass,
    689752                //we need to set the monitor as in use
    690                 set_owner( this,  urgent->owner->waiting_thread );
     753                /* paranoid */ verifyf( !this->owner || kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     754                __set_owner( this,  urgent->owner->waiting_thread );
    691755
    692756                return check_condition( urgent );
     
    695759        // No signaller thread
    696760        // Get the next thread in the entry_queue
    697         thread_desc * new_owner = pop_head( this->entry_queue );
    698         set_owner( this, new_owner );
     761        $thread * new_owner = pop_head( this->entry_queue );
     762        /* paranoid */ verifyf( !this->owner || kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     763        /* paranoid */ verify( !new_owner || new_owner->next == 0p );
     764        __set_owner( this, new_owner );
    699765
    700766        return new_owner;
    701767}
    702768
    703 static inline bool is_accepted( monitor_desc * this, const __monitor_group_t & group ) {
     769static inline bool is_accepted( $monitor * this, const __monitor_group_t & group ) {
    704770        __acceptable_t * it = this->mask.data; // Optim
    705771        __lock_size_t count = this->mask.size;
     
    723789}
    724790
    725 static inline void init( __lock_size_t count, monitor_desc * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] ) {
     791static inline void init( __lock_size_t count, $monitor * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] ) {
    726792        for( __lock_size_t i = 0; i < count; i++) {
    727793                (criteria[i]){ monitors[i], waiter };
     
    731797}
    732798
    733 static inline void init_push( __lock_size_t count, monitor_desc * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] ) {
     799static inline void init_push( __lock_size_t count, $monitor * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] ) {
    734800        for( __lock_size_t i = 0; i < count; i++) {
    735801                (criteria[i]){ monitors[i], waiter };
     
    747813}
    748814
    749 static inline void lock_all( monitor_desc * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count ) {
     815static inline void lock_all( $monitor * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count ) {
    750816        for( __lock_size_t i = 0; i < count; i++ ) {
    751817                __spinlock_t * l = &source[i]->lock;
     
    761827}
    762828
    763 static inline void unlock_all( monitor_desc * locks [], __lock_size_t count ) {
     829static inline void unlock_all( $monitor * locks [], __lock_size_t count ) {
    764830        for( __lock_size_t i = 0; i < count; i++ ) {
    765831                unlock( locks[i]->lock );
     
    768834
    769835static inline void save(
    770         monitor_desc * ctx [],
     836        $monitor * ctx [],
    771837        __lock_size_t count,
    772838        __attribute((unused)) __spinlock_t * locks [],
     
    781847
    782848static inline void restore(
    783         monitor_desc * ctx [],
     849        $monitor * ctx [],
    784850        __lock_size_t count,
    785851        __spinlock_t * locks [],
     
    799865// 2 - Checks if all the monitors are ready to run
    800866//     if so return the thread to run
    801 static inline thread_desc * check_condition( __condition_criterion_t * target ) {
     867static inline $thread * check_condition( __condition_criterion_t * target ) {
    802868        __condition_node_t * node = target->owner;
    803869        unsigned short count = node->count;
     
    822888
    823889static inline void brand_condition( condition & this ) {
    824         thread_desc * thrd = TL_GET( this_thread );
     890        $thread * thrd = TL_GET( this_thread );
    825891        if( !this.monitors ) {
    826892                // __cfaabi_dbg_print_safe( "Branding\n" );
     
    828894                this.monitor_count = thrd->monitors.size;
    829895
    830                 this.monitors = (monitor_desc **)malloc( this.monitor_count * sizeof( *this.monitors ) );
     896                this.monitors = ($monitor **)malloc( this.monitor_count * sizeof( *this.monitors ) );
    831897                for( int i = 0; i < this.monitor_count; i++ ) {
    832898                        this.monitors[i] = thrd->monitors[i];
     
    835901}
    836902
    837 static inline [thread_desc *, int] search_entry_queue( const __waitfor_mask_t & mask, monitor_desc * monitors [], __lock_size_t count ) {
    838 
    839         __queue_t(thread_desc) & entry_queue = monitors[0]->entry_queue;
     903static inline [$thread *, int] search_entry_queue( const __waitfor_mask_t & mask, $monitor * monitors [], __lock_size_t count ) {
     904
     905        __queue_t($thread) & entry_queue = monitors[0]->entry_queue;
    840906
    841907        // For each thread in the entry-queue
    842         for(    thread_desc ** thrd_it = &entry_queue.head;
     908        for(    $thread ** thrd_it = &entry_queue.head;
    843909                *thrd_it;
    844910                thrd_it = &(*thrd_it)->link.next
     
    884950}
    885951
    886 static inline __lock_size_t aggregate( monitor_desc * storage [], const __waitfor_mask_t & mask ) {
     952static inline __lock_size_t aggregate( $monitor * storage [], const __waitfor_mask_t & mask ) {
    887953        __lock_size_t size = 0;
    888954        for( __lock_size_t i = 0; i < mask.size; i++ ) {
  • libcfa/src/concurrency/monitor.hfa

    rb7d6a36 r6a490b2  
    2323
    2424trait is_monitor(dtype T) {
    25         monitor_desc * get_monitor( T & );
     25        $monitor * get_monitor( T & );
    2626        void ^?{}( T & mutex );
    2727};
    2828
    29 static inline void ?{}(monitor_desc & this) with( this ) {
     29static inline void ?{}($monitor & this) with( this ) {
    3030        lock{};
    3131        entry_queue{};
     
    3939}
    4040
    41 static inline void ^?{}(monitor_desc & ) {}
     41static inline void ^?{}($monitor & ) {}
    4242
    4343struct monitor_guard_t {
    44         monitor_desc **         m;
     44        $monitor **     m;
    4545        __lock_size_t           count;
    4646        __monitor_group_t prev;
    4747};
    4848
    49 void ?{}( monitor_guard_t & this, monitor_desc ** m, __lock_size_t count, void (*func)() );
     49void ?{}( monitor_guard_t & this, $monitor ** m, __lock_size_t count, void (*func)() );
    5050void ^?{}( monitor_guard_t & this );
    5151
    5252struct monitor_dtor_guard_t {
    53         monitor_desc *    m;
     53        $monitor *    m;
    5454        __monitor_group_t prev;
    5555};
    5656
    57 void ?{}( monitor_dtor_guard_t & this, monitor_desc ** m, void (*func)() );
     57void ?{}( monitor_dtor_guard_t & this, $monitor ** m, void (*func)() );
    5858void ^?{}( monitor_dtor_guard_t & this );
    5959
     
    7272
    7373        // The monitor this criterion concerns
    74         monitor_desc * target;
     74        $monitor * target;
    7575
    7676        // The parent node to which this criterion belongs
     
    8787struct __condition_node_t {
    8888        // Thread that needs to be woken when all criteria are met
    89         thread_desc * waiting_thread;
     89        $thread * waiting_thread;
    9090
    9191        // Array of criteria (Criterions are contiguous in memory)
     
    106106}
    107107
    108 void ?{}(__condition_node_t & this, thread_desc * waiting_thread, __lock_size_t count, uintptr_t user_info );
     108void ?{}(__condition_node_t & this, $thread * waiting_thread, __lock_size_t count, uintptr_t user_info );
    109109void ?{}(__condition_criterion_t & this );
    110 void ?{}(__condition_criterion_t & this, monitor_desc * target, __condition_node_t * owner );
     110void ?{}(__condition_criterion_t & this, $monitor * target, __condition_node_t * owner );
    111111
    112112struct condition {
     
    115115
    116116        // Array of monitor pointers (Monitors are NOT contiguous in memory)
    117         monitor_desc ** monitors;
     117        $monitor ** monitors;
    118118
    119119        // Number of monitors in the array
     
    133133              bool signal      ( condition & this );
    134134              bool signal_block( condition & this );
    135 static inline bool is_empty    ( condition & this ) { return !this.blocked.head; }
     135static inline bool is_empty    ( condition & this ) { return this.blocked.head == 1p; }
    136136         uintptr_t front       ( condition & this );
    137137
  • libcfa/src/concurrency/mutex.cfa

    rb7d6a36 r6a490b2  
    4040        if( is_locked ) {
    4141                append( blocked_threads, kernelTLS.this_thread );
    42                 BlockInternal( &lock );
     42                unlock( lock );
     43                park( __cfaabi_dbg_ctx );
    4344        }
    4445        else {
     
    6263        lock( this.lock __cfaabi_dbg_ctx2 );
    6364        this.is_locked = (this.blocked_threads != 0);
    64         WakeThread(
    65                 pop_head( this.blocked_threads )
     65        unpark(
     66                pop_head( this.blocked_threads ) __cfaabi_dbg_ctx2
    6667        );
    6768        unlock( this.lock );
     
    9495        else {
    9596                append( blocked_threads, kernelTLS.this_thread );
    96                 BlockInternal( &lock );
     97                unlock( lock );
     98                park( __cfaabi_dbg_ctx );
    9799        }
    98100}
     
    118120        recursion_count--;
    119121        if( recursion_count == 0 ) {
    120                 thread_desc * thrd = pop_head( blocked_threads );
     122                $thread * thrd = pop_head( blocked_threads );
    121123                owner = thrd;
    122124                recursion_count = (thrd ? 1 : 0);
    123                 WakeThread( thrd );
     125                unpark( thrd __cfaabi_dbg_ctx2 );
    124126        }
    125127        unlock( lock );
     
    138140void notify_one(condition_variable & this) with(this) {
    139141        lock( lock __cfaabi_dbg_ctx2 );
    140         WakeThread(
    141                 pop_head( this.blocked_threads )
     142        unpark(
     143                pop_head( this.blocked_threads ) __cfaabi_dbg_ctx2
    142144        );
    143145        unlock( lock );
     
    147149        lock( lock __cfaabi_dbg_ctx2 );
    148150        while(this.blocked_threads) {
    149                 WakeThread(
    150                         pop_head( this.blocked_threads )
     151                unpark(
     152                        pop_head( this.blocked_threads ) __cfaabi_dbg_ctx2
    151153                );
    152154        }
     
    157159        lock( this.lock __cfaabi_dbg_ctx2 );
    158160        append( this.blocked_threads, kernelTLS.this_thread );
    159         BlockInternal( &this.lock );
     161        unlock( this.lock );
     162        park( __cfaabi_dbg_ctx );
    160163}
    161164
     
    164167        lock( this.lock __cfaabi_dbg_ctx2 );
    165168        append( this.blocked_threads, kernelTLS.this_thread );
    166         void __unlock(void) {
    167                 unlock(l);
    168                 unlock(this.lock);
    169         }
    170         BlockInternal( __unlock );
     169        unlock(l);
     170        unlock(this.lock);
     171        park( __cfaabi_dbg_ctx );
    171172        lock(l);
    172173}
  • libcfa/src/concurrency/mutex.hfa

    rb7d6a36 r6a490b2  
    3636
    3737        // List of blocked threads
    38         __queue_t(struct thread_desc) blocked_threads;
     38        __queue_t(struct $thread) blocked_threads;
    3939
    4040        // Locked flag
     
    5555
    5656        // List of blocked threads
    57         __queue_t(struct thread_desc) blocked_threads;
     57        __queue_t(struct $thread) blocked_threads;
    5858
    5959        // Current thread owning the lock
    60         struct thread_desc * owner;
     60        struct $thread * owner;
    6161
    6262        // Number of recursion level
     
    8383
    8484        // List of blocked threads
    85         __queue_t(struct thread_desc) blocked_threads;
     85        __queue_t(struct $thread) blocked_threads;
    8686};
    8787
  • libcfa/src/concurrency/preemption.cfa

    rb7d6a36 r6a490b2  
    3939// FwdDeclarations : timeout handlers
    4040static void preempt( processor   * this );
    41 static void timeout( thread_desc * this );
     41static void timeout( $thread * this );
    4242
    4343// FwdDeclarations : Signal handlers
    4444static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ );
     45static void sigHandler_alarm    ( __CFA_SIGPARMS__ );
    4546static void sigHandler_segv     ( __CFA_SIGPARMS__ );
    4647static void sigHandler_ill      ( __CFA_SIGPARMS__ );
     
    8384// Get next expired node
    8485static inline alarm_node_t * get_expired( alarm_list_t * alarms, Time currtime ) {
    85         if( !alarms->head ) return 0p;                                          // If no alarms return null
    86         if( alarms->head->alarm >= currtime ) return 0p;        // If alarms head not expired return null
     86        if( ! & (*alarms)`first ) return 0p;                                            // If no alarms return null
     87        if( (*alarms)`first.alarm >= currtime ) return 0p;      // If alarms head not expired return null
    8788        return pop(alarms);                                                                     // Otherwise just pop head
    8889}
     
    9798        while( node = get_expired( alarms, currtime ) ) {
    9899                // __cfaabi_dbg_print_buffer_decl( " KERNEL: preemption tick.\n" );
     100                Duration period = node->period;
     101                if( period == 0) {
     102                        node->set = false;                  // Node is one-shot, just mark it as not pending
     103                }
    99104
    100105                // Check if this is a kernel
     
    107112
    108113                // Check if this is a periodic alarm
    109                 Duration period = node->period;
    110114                if( period > 0 ) {
    111115                        // __cfaabi_dbg_print_buffer_local( " KERNEL: alarm period is %lu.\n", period.tv );
     
    113117                        insert( alarms, node );             // Reinsert the node for the next time it triggers
    114118                }
    115                 else {
    116                         node->set = false;                  // Node is one-shot, just mark it as not pending
    117                 }
    118119        }
    119120
    120121        // If there are still alarms pending, reset the timer
    121         if( alarms->head ) {
    122                 // __cfaabi_dbg_print_buffer_decl( " KERNEL: @%ju(%ju) resetting alarm to %ju.\n", currtime.tv, __kernel_get_time().tv, (alarms->head->alarm - currtime).tv);
    123                 Duration delta = alarms->head->alarm - currtime;
    124                 Duration caped = max(delta, 50`us);
     122        if( & (*alarms)`first ) {
     123                __cfadbg_print_buffer_decl(preemption, " KERNEL: @%ju(%ju) resetting alarm to %ju.\n", currtime.tv, __kernel_get_time().tv, (alarms->head->alarm - currtime).tv);
     124                Duration delta = (*alarms)`first.alarm - currtime;
     125                Duration capped = max(delta, 50`us);
    125126                // itimerval tim  = { caped };
    126127                // __cfaabi_dbg_print_buffer_local( "    Values are %lu, %lu, %lu %lu.\n", delta.tv, caped.tv, tim.it_value.tv_sec, tim.it_value.tv_usec);
    127128
    128                 __kernel_set_timer( caped );
     129                __kernel_set_timer( capped );
    129130        }
    130131}
     
    184185
    185186        // Enable interrupts by decrementing the counter
    186         // If counter reaches 0, execute any pending CtxSwitch
     187        // If counter reaches 0, execute any pending __cfactx_switch
    187188        void enable_interrupts( __cfaabi_dbg_ctx_param ) {
    188189                processor   * proc = kernelTLS.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
    189                 thread_desc * thrd = kernelTLS.this_thread;       // Cache the thread now since interrupts can start happening after the atomic store
    190190
    191191                with( kernelTLS.preemption_state ){
     
    209209                                if( proc->pending_preemption ) {
    210210                                        proc->pending_preemption = false;
    211                                         BlockInternal( thrd );
     211                                        force_yield( __POLL_PREEMPTION );
    212212                                }
    213213                        }
     
    219219
    220220        // Disable interrupts by incrementint the counter
    221         // Don't execute any pending CtxSwitch even if counter reaches 0
     221        // Don't execute any pending __cfactx_switch even if counter reaches 0
    222222        void enable_interrupts_noPoll() {
    223223                unsigned short prev = kernelTLS.preemption_state.disable_count;
     
    257257
    258258        if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
    259             abort( "internal error, pthread_sigmask" );
     259                abort( "internal error, pthread_sigmask" );
    260260        }
    261261}
     
    268268
    269269// reserved for future use
    270 static void timeout( thread_desc * this ) {
    271         //TODO : implement waking threads
     270static void timeout( $thread * this ) {
     271        __unpark( this __cfaabi_dbg_ctx2 );
    272272}
    273273
    274274// KERNEL ONLY
    275 // Check if a CtxSwitch signal handler shoud defer
     275// Check if a __cfactx_switch signal handler shoud defer
    276276// If true  : preemption is safe
    277277// If false : preemption is unsafe and marked as pending
     
    303303
    304304        // Setup proper signal handlers
    305         __cfaabi_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO | SA_RESTART ); // CtxSwitch handler
     305        __cfaabi_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO | SA_RESTART ); // __cfactx_switch handler
     306        __cfaabi_sigaction( SIGALRM, sigHandler_alarm    , SA_SIGINFO | SA_RESTART ); // debug handler
    306307
    307308        signal_block( SIGALRM );
    308309
    309         alarm_stack = create_pthread( &alarm_thread, alarm_loop, 0p );
     310        alarm_stack = __create_pthread( &alarm_thread, alarm_loop, 0p );
    310311}
    311312
     
    394395        // Preemption can occur here
    395396
    396         BlockInternal( kernelTLS.this_thread ); // Do the actual CtxSwitch
     397        force_yield( __ALARM_PREEMPTION ); // Do the actual __cfactx_switch
     398}
     399
     400static void sigHandler_alarm( __CFA_SIGPARMS__ ) {
     401        abort("SIGALRM should never reach the signal handler");
    397402}
    398403
  • libcfa/src/concurrency/thread.cfa

    rb7d6a36 r6a490b2  
    2323#include "invoke.h"
    2424
    25 extern "C" {
    26         #include <fenv.h>
    27         #include <stddef.h>
    28 }
    29 
    30 //extern volatile thread_local processor * this_processor;
    31 
    3225//-----------------------------------------------------------------------------
    3326// Thread ctors and dtors
    34 void ?{}(thread_desc & this, const char * const name, cluster & cl, void * storage, size_t storageSize ) with( this ) {
     27void ?{}($thread & this, const char * const name, cluster & cl, void * storage, size_t storageSize ) with( this ) {
    3528        context{ 0p, 0p };
    3629        self_cor{ name, storage, storageSize };
    3730        state = Start;
     31        preempted = __NO_PREEMPTION;
    3832        curr_cor = &self_cor;
    3933        self_mon.owner = &this;
     
    5145}
    5246
    53 void ^?{}(thread_desc& this) with( this ) {
     47void ^?{}($thread& this) with( this ) {
    5448        unregister(curr_cluster, this);
    5549        ^self_cor{};
    5650}
    5751
     52//-----------------------------------------------------------------------------
     53// Starting and stopping threads
     54forall( dtype T | is_thread(T) )
     55void __thrd_start( T & this, void (*main_p)(T &) ) {
     56        $thread * this_thrd = get_thread(this);
     57
     58        disable_interrupts();
     59        __cfactx_start(main_p, get_coroutine(this), this, __cfactx_invoke_thread);
     60
     61        this_thrd->context.[SP, FP] = this_thrd->self_cor.context.[SP, FP];
     62        verify( this_thrd->context.SP );
     63
     64        __schedule_thread(this_thrd);
     65        enable_interrupts( __cfaabi_dbg_ctx );
     66}
     67
     68//-----------------------------------------------------------------------------
     69// Support for threads that don't ues the thread keyword
    5870forall( dtype T | sized(T) | is_thread(T) | { void ?{}(T&); } )
    5971void ?{}( scoped(T)& this ) with( this ) {
     
    7385}
    7486
    75 //-----------------------------------------------------------------------------
    76 // Starting and stopping threads
    77 forall( dtype T | is_thread(T) )
    78 void __thrd_start( T & this, void (*main_p)(T &) ) {
    79         thread_desc * this_thrd = get_thread(this);
    80 
    81         disable_interrupts();
    82         CtxStart(main_p, get_coroutine(this), this, CtxInvokeThread);
    83 
    84         this_thrd->context.[SP, FP] = this_thrd->self_cor.context.[SP, FP];
    85         verify( this_thrd->context.SP );
    86 
    87         ScheduleThread(this_thrd);
    88         enable_interrupts( __cfaabi_dbg_ctx );
    89 }
    90 
    91 void yield( void ) {
    92         // Safety note : This could cause some false positives due to preemption
    93       verify( TL_GET( preemption_state.enabled ) );
    94         BlockInternal( TL_GET( this_thread ) );
    95         // Safety note : This could cause some false positives due to preemption
    96       verify( TL_GET( preemption_state.enabled ) );
    97 }
    98 
    99 void yield( unsigned times ) {
    100         for( unsigned i = 0; i < times; i++ ) {
    101                 yield();
    102         }
    103 }
    104 
    10587// Local Variables: //
    10688// mode: c //
  • libcfa/src/concurrency/thread.hfa

    rb7d6a36 r6a490b2  
    2828      void ^?{}(T& mutex this);
    2929      void main(T& this);
    30       thread_desc* get_thread(T& this);
     30      $thread* get_thread(T& this);
    3131};
    3232
    33 #define DECL_THREAD(X) thread_desc* get_thread(X& this) { return &this.__thrd; } void main(X& this)
     33// define that satisfies the trait without using the thread keyword
     34#define DECL_THREAD(X) $thread* get_thread(X& this) __attribute__((const)) { return &this.__thrd; } void main(X& this)
     35
     36// Inline getters for threads/coroutines/monitors
     37forall( dtype T | is_thread(T) )
     38static inline $coroutine* get_coroutine(T & this) __attribute__((const)) { return &get_thread(this)->self_cor; }
    3439
    3540forall( dtype T | is_thread(T) )
    36 static inline coroutine_desc* get_coroutine(T & this) {
    37         return &get_thread(this)->self_cor;
    38 }
     41static inline $monitor  * get_monitor  (T & this) __attribute__((const)) { return &get_thread(this)->self_mon; }
    3942
    40 forall( dtype T | is_thread(T) )
    41 static inline monitor_desc* get_monitor(T & this) {
    42         return &get_thread(this)->self_mon;
    43 }
     43static inline $coroutine* get_coroutine($thread * this) __attribute__((const)) { return &this->self_cor; }
     44static inline $monitor  * get_monitor  ($thread * this) __attribute__((const)) { return &this->self_mon; }
    4445
    45 static inline coroutine_desc* get_coroutine(thread_desc * this) {
    46         return &this->self_cor;
    47 }
    48 
    49 static inline monitor_desc* get_monitor(thread_desc * this) {
    50         return &this->self_mon;
    51 }
    52 
     46//-----------------------------------------------------------------------------
     47// forward declarations needed for threads
    5348extern struct cluster * mainCluster;
    5449
     
    5853//-----------------------------------------------------------------------------
    5954// Ctors and dtors
    60 void ?{}(thread_desc & this, const char * const name, struct cluster & cl, void * storage, size_t storageSize );
    61 void ^?{}(thread_desc & this);
     55void ?{}($thread & this, const char * const name, struct cluster & cl, void * storage, size_t storageSize );
     56void ^?{}($thread & this);
    6257
    63 static inline void ?{}(thread_desc & this)                                                                  { this{ "Anonymous Thread", *mainCluster, 0p, 65000 }; }
    64 static inline void ?{}(thread_desc & this, size_t stackSize )                                               { this{ "Anonymous Thread", *mainCluster, 0p, stackSize }; }
    65 static inline void ?{}(thread_desc & this, void * storage, size_t storageSize )                             { this{ "Anonymous Thread", *mainCluster, storage, storageSize }; }
    66 static inline void ?{}(thread_desc & this, struct cluster & cl )                                            { this{ "Anonymous Thread", cl, 0p, 65000 }; }
    67 static inline void ?{}(thread_desc & this, struct cluster & cl, size_t stackSize )                          { this{ "Anonymous Thread", cl, 0p, stackSize }; }
    68 static inline void ?{}(thread_desc & this, struct cluster & cl, void * storage, size_t storageSize )        { this{ "Anonymous Thread", cl, storage, storageSize }; }
    69 static inline void ?{}(thread_desc & this, const char * const name)                                         { this{ name, *mainCluster, 0p, 65000 }; }
    70 static inline void ?{}(thread_desc & this, const char * const name, struct cluster & cl )                   { this{ name, cl, 0p, 65000 }; }
    71 static inline void ?{}(thread_desc & this, const char * const name, struct cluster & cl, size_t stackSize ) { this{ name, cl, 0p, stackSize }; }
     58static inline void ?{}($thread & this)                                                                  { this{ "Anonymous Thread", *mainCluster, 0p, 65000 }; }
     59static inline void ?{}($thread & this, size_t stackSize )                                               { this{ "Anonymous Thread", *mainCluster, 0p, stackSize }; }
     60static inline void ?{}($thread & this, void * storage, size_t storageSize )                             { this{ "Anonymous Thread", *mainCluster, storage, storageSize }; }
     61static inline void ?{}($thread & this, struct cluster & cl )                                            { this{ "Anonymous Thread", cl, 0p, 65000 }; }
     62static inline void ?{}($thread & this, struct cluster & cl, size_t stackSize )                          { this{ "Anonymous Thread", cl, 0p, stackSize }; }
     63static inline void ?{}($thread & this, struct cluster & cl, void * storage, size_t storageSize )        { this{ "Anonymous Thread", cl, storage, storageSize }; }
     64static inline void ?{}($thread & this, const char * const name)                                         { this{ name, *mainCluster, 0p, 65000 }; }
     65static inline void ?{}($thread & this, const char * const name, struct cluster & cl )                   { this{ name, cl, 0p, 65000 }; }
     66static inline void ?{}($thread & this, const char * const name, struct cluster & cl, size_t stackSize ) { this{ name, cl, 0p, stackSize }; }
    7267
    7368//-----------------------------------------------------------------------------
     
    8883void ^?{}( scoped(T)& this );
    8984
    90 void yield();
    91 void yield( unsigned times );
     85//-----------------------------------------------------------------------------
     86// Thread getters
     87static inline struct $thread * active_thread () { return TL_GET( this_thread ); }
    9288
    93 static inline struct thread_desc * active_thread () { return TL_GET( this_thread ); }
     89//-----------------------------------------------------------------------------
     90// Scheduler API
     91
     92//----------
     93// Park thread: block until corresponding call to unpark, won't block if unpark is already called
     94void park( __cfaabi_dbg_ctx_param );
     95
     96//----------
     97// Unpark a thread, if the thread is already blocked, schedule it
     98//                  if the thread is not yet block, signal that it should rerun immediately
     99void unpark( $thread * this __cfaabi_dbg_ctx_param2 );
     100
     101forall( dtype T | is_thread(T) )
     102static inline void unpark( T & this __cfaabi_dbg_ctx_param2 ) { if(!&this) return; unpark( get_thread( this ) __cfaabi_dbg_ctx_fwd2 );}
     103
     104//----------
     105// Yield: force thread to block and be rescheduled
     106bool force_yield( enum __Preemption_Reason );
     107
     108static inline void yield() {
     109        force_yield(__MANUAL_PREEMPTION);
     110}
     111
     112// Yield: yield N times
     113static inline void yield( unsigned times ) {
     114        for( times ) {
     115                yield();
     116        }
     117}
     118
     119//----------
     120// sleep: force thread to block and be rescheduled after Duration duration
     121void sleep( Duration duration );
    94122
    95123// Local Variables: //
  • libcfa/src/exception.c

    rb7d6a36 r6a490b2  
    99// Author           : Andrew Beach
    1010// Created On       : Mon Jun 26 15:13:00 2017
    11 // Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Feb 22 18:17:34 2018
    13 // Update Count     : 11
     11// Last Modified By : Andrew Beach
     12// Last Modified On : Tue Apr 14 12:01:00 2020
     13// Update Count     : 18
    1414//
    1515
     16// Normally we would get this from the CFA prelude.
    1617#include <stddef.h> // for size_t
    1718
    1819#include "exception.h"
    1920
    20 // Implementation of the secret header.
     21// Implementation of the secret header is hardware dependent.
     22#if !( defined( __x86_64 ) || defined( __i386 ) )
     23#error Exception Handling: No known architecture detected.
     24#endif
    2125
    2226#include <stdlib.h>
     
    2428#include <unwind.h>
    2529#include <bits/debug.hfa>
     30#include "stdhdr/assert.h"
    2631
    2732// FIX ME: temporary hack to keep ARM build working
    2833#ifndef _URC_FATAL_PHASE1_ERROR
    29 #define _URC_FATAL_PHASE1_ERROR 2
     34#define _URC_FATAL_PHASE1_ERROR 3
    3035#endif // ! _URC_FATAL_PHASE1_ERROR
    3136#ifndef _URC_FATAL_PHASE2_ERROR
     
    3540#include "lsda.h"
    3641
     42/* The exception class for our exceptions. Because of the vendor component
     43 * its value would not be standard.
     44 * Vendor: UWPL
     45 * Language: CFA\0
     46 */
     47const _Unwind_Exception_Class __cfaehm_exception_class = 0x4c50575500414643;
    3748
    3849// Base exception vtable is abstract, you should not have base exceptions.
    39 struct __cfaabi_ehm__base_exception_t_vtable
    40                 ___cfaabi_ehm__base_exception_t_vtable_instance = {
     50struct __cfaehm_base_exception_t_vtable
     51                ___cfaehm_base_exception_t_vtable_instance = {
    4152        .parent = NULL,
    4253        .size = 0,
     
    4960// Temperary global exception context. Does not work with concurency.
    5061struct exception_context_t {
    51     struct __cfaabi_ehm__try_resume_node * top_resume;
    52     struct __cfaabi_ehm__try_resume_node * current_resume;
    53 
    54     exception_t * current_exception;
    55     int current_handler_index;
    56 } shared_stack = {NULL, NULL, 0, 0};
     62        struct __cfaehm_try_resume_node * top_resume;
     63
     64        exception_t * current_exception;
     65        int current_handler_index;
     66} static shared_stack = {NULL, NULL, 0};
    5767
    5868// Get the current exception context.
     
    6272        return &shared_stack;
    6373}
    64 //#define SAVE_EXCEPTION_CONTEXT(to_name)
    65 //struct exception_context_t * to_name = this_exception_context();
    66 //exception * this_exception() {
    67 //    return this_exception_context()->current_exception;
    68 //}
    69 
    70 
    71 // This macro should be the only thing that needs to change across machines.
    72 // Used in the personality function, way down in termination.
    73 // struct _Unwind_Context * -> _Unwind_Reason_Code(*)(exception_t *)
    74 #define MATCHER_FROM_CONTEXT(ptr_to_context) \
    75         (*(_Unwind_Reason_Code(**)(exception_t *))(_Unwind_GetCFA(ptr_to_context) + 8))
    7674
    7775
    7876// RESUMPTION ================================================================
    7977
    80 void __cfaabi_ehm__throw_resume(exception_t * except) {
    81 
    82         __cfaabi_dbg_print_safe("Throwing resumption exception\n");
    83 
    84         struct __cfaabi_ehm__try_resume_node * original_head = shared_stack.current_resume;
    85         struct __cfaabi_ehm__try_resume_node * current =
    86                 (original_head) ? original_head->next : shared_stack.top_resume;
     78static void reset_top_resume(struct __cfaehm_try_resume_node ** store) {
     79        this_exception_context()->top_resume = *store;
     80}
     81
     82void __cfaehm_throw_resume(exception_t * except) {
     83        struct exception_context_t * context = this_exception_context();
     84
     85        __cfadbg_print_safe(exception, "Throwing resumption exception\n");
     86
     87        __attribute__((cleanup(reset_top_resume)))
     88        struct __cfaehm_try_resume_node * original_head = context->top_resume;
     89        struct __cfaehm_try_resume_node * current = context->top_resume;
    8790
    8891        for ( ; current ; current = current->next) {
    89                 shared_stack.current_resume = current;
     92                context->top_resume = current->next;
    9093                if (current->handler(except)) {
    91                         shared_stack.current_resume = original_head;
    9294                        return;
    9395                }
    9496        }
    9597
    96         __cfaabi_dbg_print_safe("Unhandled exception\n");
    97         shared_stack.current_resume = original_head;
     98        __cfadbg_print_safe(exception, "Unhandled exception\n");
    9899
    99100        // Fall back to termination:
    100         __cfaabi_ehm__throw_terminate(except);
     101        __cfaehm_throw_terminate(except);
    101102        // TODO: Default handler for resumption.
    102103}
     
    106107// be added after the node is built but before it is made the top node.
    107108
    108 void __cfaabi_ehm__try_resume_setup(struct __cfaabi_ehm__try_resume_node * node,
     109void __cfaehm_try_resume_setup(struct __cfaehm_try_resume_node * node,
    109110                        _Bool (*handler)(exception_t * except)) {
    110         node->next = shared_stack.top_resume;
     111        struct exception_context_t * context = this_exception_context();
     112        node->next = context->top_resume;
    111113        node->handler = handler;
    112         shared_stack.top_resume = node;
    113 }
    114 
    115 void __cfaabi_ehm__try_resume_cleanup(struct __cfaabi_ehm__try_resume_node * node) {
    116         shared_stack.top_resume = node->next;
     114        context->top_resume = node;
     115}
     116
     117void __cfaehm_try_resume_cleanup(struct __cfaehm_try_resume_node * node) {
     118        struct exception_context_t * context = this_exception_context();
     119        context->top_resume = node->next;
    117120}
    118121
     
    123126// May have to move to cfa for constructors and destructors (references).
    124127
    125 struct __cfaabi_ehm__node {
    126         struct __cfaabi_ehm__node * next;
     128// How to clean up an exception in various situations.
     129static void __cfaehm_exception_cleanup(
     130                _Unwind_Reason_Code reason,
     131                struct _Unwind_Exception * exception) {
     132        switch (reason) {
     133        case _URC_FOREIGN_EXCEPTION_CAUGHT:
     134                // This one we could clean-up to allow cross-language exceptions.
     135        case _URC_FATAL_PHASE1_ERROR:
     136        case _URC_FATAL_PHASE2_ERROR:
     137        default:
     138                abort();
     139        }
     140}
     141
     142// We need a piece of storage to raise the exception, for now its a single
     143// piece.
     144static struct _Unwind_Exception this_exception_storage;
     145
     146struct __cfaehm_node {
     147        struct __cfaehm_node * next;
    127148};
    128149
    129150#define NODE_TO_EXCEPT(node) ((exception_t *)(1 + (node)))
    130 #define EXCEPT_TO_NODE(except) ((struct __cfaabi_ehm__node *)(except) - 1)
     151#define EXCEPT_TO_NODE(except) ((struct __cfaehm_node *)(except) - 1)
    131152
    132153// Creates a copy of the indicated exception and sets current_exception to it.
    133 static void __cfaabi_ehm__allocate_exception( exception_t * except ) {
     154static void __cfaehm_allocate_exception( exception_t * except ) {
    134155        struct exception_context_t * context = this_exception_context();
    135156
    136157        // Allocate memory for the exception.
    137         struct __cfaabi_ehm__node * store = malloc(
    138                 sizeof( struct __cfaabi_ehm__node ) + except->virtual_table->size );
     158        struct __cfaehm_node * store = malloc(
     159                sizeof( struct __cfaehm_node ) + except->virtual_table->size );
    139160
    140161        if ( ! store ) {
     
    149170        // Copy the exception to storage.
    150171        except->virtual_table->copy( context->current_exception, except );
     172
     173        // Set up the exception storage.
     174        this_exception_storage.exception_class = __cfaehm_exception_class;
     175        this_exception_storage.exception_cleanup = __cfaehm_exception_cleanup;
    151176}
    152177
    153178// Delete the provided exception, unsetting current_exception if relivant.
    154 static void __cfaabi_ehm__delete_exception( exception_t * except ) {
     179static void __cfaehm_delete_exception( exception_t * except ) {
    155180        struct exception_context_t * context = this_exception_context();
    156181
    157         __cfaabi_dbg_print_safe("Deleting Exception\n");
     182        __cfadbg_print_safe(exception, "Deleting Exception\n");
    158183
    159184        // Remove the exception from the list.
    160         struct __cfaabi_ehm__node * to_free = EXCEPT_TO_NODE(except);
    161         struct __cfaabi_ehm__node * node;
     185        struct __cfaehm_node * to_free = EXCEPT_TO_NODE(except);
     186        struct __cfaehm_node * node;
    162187
    163188        if ( context->current_exception == except ) {
     
    167192                node = EXCEPT_TO_NODE(context->current_exception);
    168193                // It may always be in the first or second position.
    169                 while( to_free != node->next ) {
     194                while ( to_free != node->next ) {
    170195                        node = node->next;
    171196                }
     
    179204
    180205// If this isn't a rethrow (*except==0), delete the provided exception.
    181 void __cfaabi_ehm__cleanup_terminate( void * except ) {
    182         if ( *(void**)except ) __cfaabi_ehm__delete_exception( *(exception_t **)except );
    183 }
    184 
    185 
    186 // We need a piece of storage to raise the exception
    187 struct _Unwind_Exception this_exception_storage;
     206void __cfaehm_cleanup_terminate( void * except ) {
     207        if ( *(void**)except ) __cfaehm_delete_exception( *(exception_t **)except );
     208}
    188209
    189210// Function needed by force unwind
     
    192213                int version,
    193214                _Unwind_Action actions,
    194                 _Unwind_Exception_Class exceptionClass,
     215                _Unwind_Exception_Class exception_class,
    195216                struct _Unwind_Exception * unwind_exception,
    196                 struct _Unwind_Context * context,
    197                 void * some_param) {
    198         if( actions & _UA_END_OF_STACK  ) exit(1);
    199         if( actions & _UA_CLEANUP_PHASE ) return _URC_NO_REASON;
    200 
    201         return _URC_FATAL_PHASE2_ERROR;
     217                struct _Unwind_Context * unwind_context,
     218                void * stop_param) {
     219        // Verify actions follow the rules we expect.
     220        verify((actions & _UA_CLEANUP_PHASE) && (actions & _UA_FORCE_UNWIND));
     221        verify(!(actions & (_UA_SEARCH_PHASE | _UA_HANDER_FRAME)));
     222
     223        if ( actions & _UA_END_OF_STACK ) {
     224                exit(1);
     225        } else {
     226                return _URC_NO_REASON;
     227        }
    202228}
    203229
    204230// The exception that is being thrown must already be stored.
    205 __attribute__((noreturn)) void __cfaabi_ehm__begin_unwind(void) {
     231static __attribute__((noreturn)) void __cfaehm_begin_unwind(void) {
    206232        if ( ! this_exception_context()->current_exception ) {
    207233                printf("UNWIND ERROR missing exception in begin unwind\n");
    208234                abort();
    209235        }
    210 
    211236
    212237        // Call stdlibc to raise the exception
     
    220245        // the whole stack.
    221246
    222         if( ret == _URC_END_OF_STACK ) {
     247        if ( ret == _URC_END_OF_STACK ) {
    223248                // No proper handler was found. This can be handled in many ways, C++ calls std::terminate.
    224249                // Here we force unwind the stack, basically raising a cancellation.
     
    235260}
    236261
    237 void __cfaabi_ehm__throw_terminate( exception_t * val ) {
    238         __cfaabi_dbg_print_safe("Throwing termination exception\n");
    239 
    240         __cfaabi_ehm__allocate_exception( val );
    241         __cfaabi_ehm__begin_unwind();
    242 }
    243 
    244 void __cfaabi_ehm__rethrow_terminate(void) {
    245         __cfaabi_dbg_print_safe("Rethrowing termination exception\n");
    246 
    247         __cfaabi_ehm__begin_unwind();
    248 }
    249 
    250 #pragma GCC push_options
    251 #pragma GCC optimize("O0")
     262void __cfaehm_throw_terminate( exception_t * val ) {
     263        __cfadbg_print_safe(exception, "Throwing termination exception\n");
     264
     265        __cfaehm_allocate_exception( val );
     266        __cfaehm_begin_unwind();
     267}
     268
     269void __cfaehm_rethrow_terminate(void) {
     270        __cfadbg_print_safe(exception, "Rethrowing termination exception\n");
     271
     272        __cfaehm_begin_unwind();
     273}
    252274
    253275// This is our personality routine. For every stack frame annotated with
    254276// ".cfi_personality 0x3,__gcfa_personality_v0" this function will be called twice when unwinding.
    255277//  Once in the search phase and once in the cleanup phase.
    256 _Unwind_Reason_Code __gcfa_personality_v0 (
    257                 int version, _Unwind_Action actions, unsigned long long exceptionClass,
    258                 struct _Unwind_Exception* unwind_exception,
    259                 struct _Unwind_Context* context)
     278_Unwind_Reason_Code __gcfa_personality_v0(
     279                int version,
     280                _Unwind_Action actions,
     281                unsigned long long exception_class,
     282                struct _Unwind_Exception * unwind_exception,
     283                struct _Unwind_Context * unwind_context)
    260284{
    261285
    262         //__cfaabi_dbg_print_safe("CFA: 0x%lx\n", _Unwind_GetCFA(context));
    263         __cfaabi_dbg_print_safe("Personality function (%d, %x, %llu, %p, %p):",
    264                         version, actions, exceptionClass, unwind_exception, context);
    265 
    266         // If we've reached the end of the stack then there is nothing much we can do...
    267         if( actions & _UA_END_OF_STACK ) return _URC_END_OF_STACK;
    268 
     286        //__cfadbg_print_safe(exception, "CFA: 0x%lx\n", _Unwind_GetCFA(context));
     287        __cfadbg_print_safe(exception, "Personality function (%d, %x, %llu, %p, %p):",
     288                        version, actions, exception_class, unwind_exception, unwind_context);
     289
     290        // Verify that actions follow the rules we expect.
     291        // This function should never be called at the end of the stack.
     292        verify(!(actions & _UA_END_OF_STACK));
     293        // Either only the search phase flag is set or...
    269294        if (actions & _UA_SEARCH_PHASE) {
    270                 __cfaabi_dbg_print_safe(" lookup phase");
    271         }
    272         else if (actions & _UA_CLEANUP_PHASE) {
    273                 __cfaabi_dbg_print_safe(" cleanup phase");
    274         }
    275         // Just in case, probably can't actually happen
    276         else {
    277                 printf(" error\n");
    278                 return _URC_FATAL_PHASE1_ERROR;
     295                verify(actions == _UA_SEARCH_PHASE);
     296                __cfadbg_print_safe(exception, " lookup phase");
     297        // ... we are in clean-up phase.
     298        } else {
     299                verify(actions & _UA_CLEANUP_PHASE);
     300                __cfadbg_print_safe(exception, " cleanup phase");
     301                // We shouldn't be the handler frame during forced unwind.
     302                if (actions & _UA_HANDLER_FRAME) {
     303                        verify(!(actions & _UA_FORCE_UNWIND));
     304                        __cfadbg_print_safe(exception, " (handler frame)");
     305                } else if (actions & _UA_FORCE_UNWIND) {
     306                        __cfadbg_print_safe(exception, " (force unwind)");
     307                }
    279308        }
    280309
    281310        // Get a pointer to the language specific data from which we will read what we need
    282         const unsigned char * lsd = (const unsigned char*) _Unwind_GetLanguageSpecificData( context );
    283 
    284         if( !lsd ) {    //Nothing to do, keep unwinding
     311        const unsigned char * lsd = _Unwind_GetLanguageSpecificData( unwind_context );
     312
     313        if ( !lsd ) {   //Nothing to do, keep unwinding
    285314                printf(" no LSD");
    286315                goto UNWIND;
     
    289318        // Get the instuction pointer and a reading pointer into the exception table
    290319        lsda_header_info lsd_info;
    291         const unsigned char * cur_ptr = parse_lsda_header(context, lsd, &lsd_info);
    292         _Unwind_Ptr instruction_ptr = _Unwind_GetIP( context );
     320        const unsigned char * cur_ptr = parse_lsda_header(unwind_context, lsd, &lsd_info);
     321        _Unwind_Ptr instruction_ptr = _Unwind_GetIP(unwind_context);
     322
     323        struct exception_context_t * context = this_exception_context();
    293324
    294325        // Linearly search the table for stuff to do
    295         while( cur_ptr < lsd_info.action_table ) {
     326        while ( cur_ptr < lsd_info.action_table ) {
    296327                _Unwind_Ptr callsite_start;
    297328                _Unwind_Ptr callsite_len;
     
    306337
    307338                // Have we reach the correct frame info yet?
    308                 if( lsd_info.Start + callsite_start + callsite_len < instruction_ptr ) {
     339                if ( lsd_info.Start + callsite_start + callsite_len < instruction_ptr ) {
    309340#ifdef __CFA_DEBUG_PRINT__
    310341                        void * ls = (void*)lsd_info.Start;
     
    314345                        void * ep = (void*)lsd_info.Start + callsite_start + callsite_len;
    315346                        void * ip = (void*)instruction_ptr;
    316                         __cfaabi_dbg_print_safe("\nfound %p - %p (%p, %p, %p), looking for %p\n",
     347                        __cfadbg_print_safe(exception, "\nfound %p - %p (%p, %p, %p), looking for %p\n",
    317348                                        bp, ep, ls, cs, cl, ip);
    318349#endif // __CFA_DEBUG_PRINT__
     
    321352
    322353                // Have we gone too far?
    323                 if( lsd_info.Start + callsite_start > instruction_ptr ) {
     354                if ( lsd_info.Start + callsite_start > instruction_ptr ) {
    324355                        printf(" gone too far");
    325356                        break;
    326357                }
    327358
    328                 // Something to do?
    329                 if( callsite_landing_pad ) {
    330                         // Which phase are we in
    331                         if (actions & _UA_SEARCH_PHASE) {
    332                                 // In search phase, these means we found a potential handler we must check.
    333 
    334                                 // We have arbitrarily decided that 0 means nothing to do and 1 means there is
    335                                 // a potential handler. This doesn't seem to conflict the gcc default behavior.
    336                                 if (callsite_action != 0) {
    337                                         // Now we want to run some code to see if the handler matches
    338                                         // This is the tricky part where we want to the power to run arbitrary code
    339                                         // However, generating a new exception table entry and try routine every time
    340                                         // is way more expansive than we might like
    341                                         // The information we have is :
    342                                         //  - The GR (Series of registers)
    343                                         //    GR1=GP Global Pointer of frame ref by context
    344                                         //  - The instruction pointer
    345                                         //  - The instruction pointer info (???)
    346                                         //  - The CFA (Canonical Frame Address)
    347                                         //  - The BSP (Probably the base stack pointer)
    348 
    349 
    350                                         // The current apprach uses one exception table entry per try block
    351                                         _uleb128_t imatcher;
    352                                         // Get the relative offset to the {...}?
    353                                         cur_ptr = read_uleb128(cur_ptr, &imatcher);
    354 
    355                                         _Unwind_Reason_Code (*matcher)(exception_t *) =
    356                                                 MATCHER_FROM_CONTEXT(context);
    357                                         int index = matcher(shared_stack.current_exception);
    358                                         _Unwind_Reason_Code ret = (0 == index)
    359                                                 ? _URC_CONTINUE_UNWIND : _URC_HANDLER_FOUND;
    360                                         shared_stack.current_handler_index = index;
    361 
    362                                         // Based on the return value, check if we matched the exception
    363                                         if( ret == _URC_HANDLER_FOUND) {
    364                                                 __cfaabi_dbg_print_safe(" handler found\n");
    365                                         } else {
    366                                                 __cfaabi_dbg_print_safe(" no handler\n");
    367                                         }
    368                                         return ret;
     359                // Check for what we must do:
     360                if ( 0 == callsite_landing_pad ) {
     361                        // Nothing to do, move along
     362                        __cfadbg_print_safe(exception, " no landing pad");
     363                } else if (actions & _UA_SEARCH_PHASE) {
     364                        // In search phase, these means we found a potential handler we must check.
     365
     366                        // We have arbitrarily decided that 0 means nothing to do and 1 means there is
     367                        // a potential handler. This doesn't seem to conflict the gcc default behavior.
     368                        if (callsite_action != 0) {
     369                                // Now we want to run some code to see if the handler matches
     370                                // This is the tricky part where we want to the power to run arbitrary code
     371                                // However, generating a new exception table entry and try routine every time
     372                                // is way more expansive than we might like
     373                                // The information we have is :
     374                                //  - The GR (Series of registers)
     375                                //    GR1=GP Global Pointer of frame ref by context
     376                                //  - The instruction pointer
     377                                //  - The instruction pointer info (???)
     378                                //  - The CFA (Canonical Frame Address)
     379                                //  - The BSP (Probably the base stack pointer)
     380
     381                                // The current apprach uses one exception table entry per try block
     382                                _uleb128_t imatcher;
     383                                // Get the relative offset to the {...}?
     384                                cur_ptr = read_uleb128(cur_ptr, &imatcher);
     385
     386#                               if defined( __x86_64 )
     387                                _Unwind_Word match_pos = _Unwind_GetCFA(unwind_context) + 8;
     388#                               elif defined( __i386 )
     389                                _Unwind_Word match_pos = _Unwind_GetCFA(unwind_context) + 24;
     390#                               endif
     391                                int (*matcher)(exception_t *) = *(int(**)(exception_t *))match_pos;
     392
     393                                int index = matcher(context->current_exception);
     394                                _Unwind_Reason_Code ret = (0 == index)
     395                                        ? _URC_CONTINUE_UNWIND : _URC_HANDLER_FOUND;
     396                                context->current_handler_index = index;
     397
     398                                // Based on the return value, check if we matched the exception
     399                                if (ret == _URC_HANDLER_FOUND) {
     400                                        __cfadbg_print_safe(exception, " handler found\n");
     401                                } else {
     402                                        __cfadbg_print_safe(exception, " no handler\n");
    369403                                }
    370 
    371                                 // This is only a cleanup handler, ignore it
    372                                 __cfaabi_dbg_print_safe(" no action");
     404                                return ret;
    373405                        }
    374                         else if (actions & _UA_CLEANUP_PHASE) {
    375 
    376                                 if( (callsite_action != 0) && !(actions & _UA_HANDLER_FRAME) ){
    377                                         // If this is a potential exception handler
    378                                         // but not the one that matched the exception in the seach phase,
    379                                         // just ignore it
    380                                         goto UNWIND;
    381                                 }
    382 
    383                                 // We need to run some clean-up or a handler
    384                                 // These statment do the right thing but I don't know any specifics at all
    385                                 _Unwind_SetGR( context, __builtin_eh_return_data_regno(0), (_Unwind_Ptr) unwind_exception );
    386                                 _Unwind_SetGR( context, __builtin_eh_return_data_regno(1), 0 );
    387 
    388                                 // I assume this sets the instruction pointer to the adress of the landing pad
    389                                 // It doesn't actually set it, it only state the value that needs to be set once we return _URC_INSTALL_CONTEXT
    390                                 _Unwind_SetIP( context, ((lsd_info.LPStart) + (callsite_landing_pad)) );
    391 
    392                                 __cfaabi_dbg_print_safe(" action\n");
    393 
    394                                 // Return have some action to run
    395                                 return _URC_INSTALL_CONTEXT;
     406
     407                        // This is only a cleanup handler, ignore it
     408                        __cfadbg_print_safe(exception, " no action");
     409                } else {
     410                        // In clean-up phase, no destructors here but this could be the handler.
     411
     412                        if ( (callsite_action != 0) && !(actions & _UA_HANDLER_FRAME) ){
     413                                // If this is a potential exception handler
     414                                // but not the one that matched the exception in the seach phase,
     415                                // just ignore it
     416                                goto UNWIND;
    396417                        }
     418
     419                        // We need to run some clean-up or a handler
     420                        // These statment do the right thing but I don't know any specifics at all
     421                        _Unwind_SetGR( unwind_context, __builtin_eh_return_data_regno(0),
     422                                (_Unwind_Ptr)unwind_exception );
     423                        _Unwind_SetGR( unwind_context, __builtin_eh_return_data_regno(1), 0 );
     424
     425                        // I assume this sets the instruction pointer to the adress of the landing pad
     426                        // It doesn't actually set it, it only state the value that needs to be set once we
     427                        // return _URC_INSTALL_CONTEXT
     428                        _Unwind_SetIP( unwind_context, ((lsd_info.LPStart) + (callsite_landing_pad)) );
     429
     430                        __cfadbg_print_safe(exception, " action\n");
     431
     432                        // Return have some action to run
     433                        return _URC_INSTALL_CONTEXT;
    397434                }
    398 
    399                 // Nothing to do, move along
    400                 __cfaabi_dbg_print_safe(" no landing pad");
    401435        }
    402436        // No handling found
    403         __cfaabi_dbg_print_safe(" table end reached\n");
     437        __cfadbg_print_safe(exception, " table end reached");
    404438
    405439        UNWIND:
    406         __cfaabi_dbg_print_safe(" unwind\n");
     440        __cfadbg_print_safe(exception, " unwind\n");
    407441
    408442        // Keep unwinding the stack
    409443        return _URC_CONTINUE_UNWIND;
    410444}
     445
     446#pragma GCC push_options
     447#pragma GCC optimize(0)
    411448
    412449// Try statements are hoisted out see comments for details. While this could probably be unique
    413450// and simply linked from libcfa but there is one problem left, see the exception table for details
    414451__attribute__((noinline))
    415 void __cfaabi_ehm__try_terminate(void (*try_block)(),
     452void __cfaehm_try_terminate(void (*try_block)(),
    416453                void (*catch_block)(int index, exception_t * except),
    417454                __attribute__((unused)) int (*match_block)(exception_t * except)) {
     
    419456        //! printf("%p %p %p %p\n", &try_block, &catch_block, &match_block, &xy);
    420457
    421         // Setup statments: These 2 statments won't actually result in any code, they only setup global tables.
    422         // However, they clobber gcc cancellation support from gcc.  We can replace the personality routine but
    423         // replacing the exception table gcc generates is not really doable, it generates labels based on how the
    424         // assembly works.
    425 
    426458        // Setup the personality routine and exception table.
     459        // Unforturnately these clobber gcc cancellation support which means we can't get access to
     460        // the attribute cleanup tables at the same time. We would have to inspect the assembly to
     461        // create a new set ourselves.
    427462#ifdef __PIC__
    428463        asm volatile (".cfi_personality 0x9b,CFA.ref.__gcfa_personality_v0");
     
    449484        // Label which defines the end of the area for which the handler is setup.
    450485        asm volatile (".TRYEND:");
    451         // Label which defines the start of the exception landing pad.  Basically what is called when the exception is
    452         // caught.  Note, if multiple handlers are given, the multiplexing should be done by the generated code, not the
    453         // exception runtime.
     486        // Label which defines the start of the exception landing pad. Basically what is called when
     487        // the exception is caught. Note, if multiple handlers are given, the multiplexing should be
     488        // done by the generated code, not the exception runtime.
    454489        asm volatile (".CATCH:");
    455490
    456491        // Exception handler
    457         catch_block( shared_stack.current_handler_index,
    458                      shared_stack.current_exception );
     492        // Note: Saving the exception context on the stack breaks termination exceptions.
     493        catch_block( this_exception_context()->current_handler_index,
     494                     this_exception_context()->current_exception );
    459495}
    460496
     
    464500
    465501#ifdef __PIC__
    466 #if defined( __i386 ) || defined( __x86_64 )
    467502asm (
    468503        // HEADER
     
    481516        // handler landing pad offset and 1 (action code, gcc seems to use 0).
    482517        ".LLSDACSBCFA2:\n"
    483         "       .uleb128 .TRYSTART-__cfaabi_ehm__try_terminate\n"
     518        "       .uleb128 .TRYSTART-__cfaehm_try_terminate\n"
    484519        "       .uleb128 .TRYEND-.TRYSTART\n"
    485         "       .uleb128 .CATCH-__cfaabi_ehm__try_terminate\n"
     520        "       .uleb128 .CATCH-__cfaehm_try_terminate\n"
    486521        "       .uleb128 1\n"
    487522        ".LLSDACSECFA2:\n"
    488523        // TABLE FOOTER
    489524        "       .text\n"
    490         "       .size   __cfaabi_ehm__try_terminate, .-__cfaabi_ehm__try_terminate\n"
     525        "       .size   __cfaehm_try_terminate, .-__cfaehm_try_terminate\n"
    491526);
    492527
     
    507542        "       .quad __gcfa_personality_v0\n"
    508543#else // then __i386
    509         "   .long __gcfa_personality_v0\n"
     544        "       .long __gcfa_personality_v0\n"
    510545#endif
    511546);
    512 #else
    513 #error Exception Handling: unknown architecture for position independent code.
    514 #endif // __i386 || __x86_64
    515547#else // __PIC__
    516 #if defined( __i386 ) || defined( __x86_64 )
    517548asm (
    518549        // HEADER
     
    529560        ".LLSDACSBCFA2:\n"
    530561        //      Handled area start (relative to start of function)
    531         "       .uleb128 .TRYSTART-__cfaabi_ehm__try_terminate\n"
     562        "       .uleb128 .TRYSTART-__cfaehm_try_terminate\n"
    532563        //      Handled area length
    533564        "       .uleb128 .TRYEND-.TRYSTART\n"
    534565        //      Handler landing pad address (relative to start of function)
    535         "       .uleb128 .CATCH-__cfaabi_ehm__try_terminate\n"
     566        "       .uleb128 .CATCH-__cfaehm_try_terminate\n"
    536567        //      Action code, gcc seems to always use 0.
    537568        "       .uleb128 1\n"
     
    539570        ".LLSDACSECFA2:\n"
    540571        "       .text\n"
    541         "       .size   __cfaabi_ehm__try_terminate, .-__cfaabi_ehm__try_terminate\n"
     572        "       .size   __cfaehm_try_terminate, .-__cfaehm_try_terminate\n"
    542573        "       .ident  \"GCC: (Ubuntu 6.2.0-3ubuntu11~16.04) 6.2.0 20160901\"\n"
    543574        "       .section        .note.GNU-stack,\"x\",@progbits\n"
    544575);
    545 #else
    546 #error Exception Handling: unknown architecture for position dependent code.
    547 #endif // __i386 || __x86_64
    548576#endif // __PIC__
    549577
  • libcfa/src/exception.h

    rb7d6a36 r6a490b2  
    99// Author           : Andrew Beach
    1010// Created On       : Mon Jun 26 15:11:00 2017
    11 // Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Feb 22 18:11:15 2018
    13 // Update Count     : 8
     11// Last Modified By : Andrew Beach
     12// Last Modified On : Fri Mar 27 10:16:00 2020
     13// Update Count     : 9
    1414//
    1515
     
    2121#endif
    2222
    23 struct __cfaabi_ehm__base_exception_t;
    24 typedef struct __cfaabi_ehm__base_exception_t exception_t;
    25 struct __cfaabi_ehm__base_exception_t_vtable {
    26         const struct __cfaabi_ehm__base_exception_t_vtable * parent;
     23struct __cfaehm_base_exception_t;
     24typedef struct __cfaehm_base_exception_t exception_t;
     25struct __cfaehm_base_exception_t_vtable {
     26        const struct __cfaehm_base_exception_t_vtable * parent;
    2727        size_t size;
    28         void (*copy)(struct __cfaabi_ehm__base_exception_t *this,
    29                      struct __cfaabi_ehm__base_exception_t * other);
    30         void (*free)(struct __cfaabi_ehm__base_exception_t *this);
    31         const char * (*msg)(struct __cfaabi_ehm__base_exception_t *this);
     28        void (*copy)(struct __cfaehm_base_exception_t *this,
     29                     struct __cfaehm_base_exception_t * other);
     30        void (*free)(struct __cfaehm_base_exception_t *this);
     31        const char * (*msg)(struct __cfaehm_base_exception_t *this);
    3232};
    33 struct __cfaabi_ehm__base_exception_t {
    34         struct __cfaabi_ehm__base_exception_t_vtable const * virtual_table;
     33struct __cfaehm_base_exception_t {
     34        struct __cfaehm_base_exception_t_vtable const * virtual_table;
    3535};
    36 extern struct __cfaabi_ehm__base_exception_t_vtable
    37         ___cfaabi_ehm__base_exception_t_vtable_instance;
     36extern struct __cfaehm_base_exception_t_vtable
     37        ___cfaehm_base_exception_t_vtable_instance;
    3838
    3939
    4040// Used in throw statement translation.
    41 void __cfaabi_ehm__throw_terminate(exception_t * except) __attribute__((noreturn));
    42 void __cfaabi_ehm__rethrow_terminate() __attribute__((noreturn));
    43 void __cfaabi_ehm__throw_resume(exception_t * except);
     41void __cfaehm_throw_terminate(exception_t * except) __attribute__((noreturn));
     42void __cfaehm_rethrow_terminate() __attribute__((noreturn));
     43void __cfaehm_throw_resume(exception_t * except);
    4444
    4545// Function catches termination exceptions.
    46 void __cfaabi_ehm__try_terminate(
     46void __cfaehm_try_terminate(
    4747    void (*try_block)(),
    4848    void (*catch_block)(int index, exception_t * except),
     
    5050
    5151// Clean-up the exception in catch blocks.
    52 void __cfaabi_ehm__cleanup_terminate(void * except);
     52void __cfaehm_cleanup_terminate(void * except);
    5353
    5454// Data structure creates a list of resume handlers.
    55 struct __cfaabi_ehm__try_resume_node {
    56     struct __cfaabi_ehm__try_resume_node * next;
     55struct __cfaehm_try_resume_node {
     56    struct __cfaehm_try_resume_node * next;
    5757    _Bool (*handler)(exception_t * except);
    5858};
    5959
    6060// These act as constructor and destructor for the resume node.
    61 void __cfaabi_ehm__try_resume_setup(
    62     struct __cfaabi_ehm__try_resume_node * node,
     61void __cfaehm_try_resume_setup(
     62    struct __cfaehm_try_resume_node * node,
    6363    _Bool (*handler)(exception_t * except));
    64 void __cfaabi_ehm__try_resume_cleanup(
    65     struct __cfaabi_ehm__try_resume_node * node);
     64void __cfaehm_try_resume_cleanup(
     65    struct __cfaehm_try_resume_node * node);
    6666
    6767// Check for a standard way to call fake deconstructors.
    68 struct __cfaabi_ehm__cleanup_hook {};
     68struct __cfaehm_cleanup_hook {};
    6969
    7070#ifdef __cforall
  • libcfa/src/heap.cfa

    rb7d6a36 r6a490b2  
    1010// Created On       : Tue Dec 19 21:58:35 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Feb  4 10:04:51 2020
    13 // Update Count     : 648
     12// Last Modified On : Wed May  6 17:29:26 2020
     13// Update Count     : 727
    1414//
    1515
     
    1919#include <errno.h>                                                                              // errno
    2020#include <string.h>                                                                             // memset, memcpy
     21#include <limits.h>                                                                             // ULONG_MAX
    2122extern "C" {
    2223#include <sys/mman.h>                                                                   // mmap, munmap
    2324} // extern "C"
    2425
    25 // #comment TD : Many of these should be merged into math I believe
    2626#include "bits/align.hfa"                                                               // libPow2
    2727#include "bits/defs.hfa"                                                                // likely, unlikely
     
    3030//#include "stdlib.hfa"                                                                 // bsearchl
    3131#include "malloc.h"
     32#include "bitmanip.hfa"                                                                 // ceiling
    3233
    3334#define MIN(x, y) (y > x ? x : y)
     
    8182};
    8283
     84size_t default_heap_expansion() __attribute__(( weak )) {
     85        return __CFA_DEFAULT_HEAP_EXPANSION__;
     86} // default_heap_expansion
     87
    8388size_t default_mmap_start() __attribute__(( weak )) {
    8489        return __CFA_DEFAULT_MMAP_START__;
    8590} // default_mmap_start
    86 
    87 size_t default_heap_expansion() __attribute__(( weak )) {
    88         return __CFA_DEFAULT_HEAP_EXPANSION__;
    89 } // default_heap_expansion
    9091
    9192
     
    150151                                                        union {
    151152//                                                              FreeHeader * home;              // allocated block points back to home locations (must overlay alignment)
     153                                                                // 2nd low-order bit => zero filled
    152154                                                                void * home;                    // allocated block points back to home locations (must overlay alignment)
    153155                                                                size_t blockSize;               // size for munmap (must overlay alignment)
     
    169171                                struct FakeHeader {
    170172                                        #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    171                                         uint32_t alignment;                                     // low-order bits of home/blockSize used for tricks
     173                                        // 1st low-order bit => fake header & alignment
     174                                        uint32_t alignment;
    172175                                        #endif // __ORDER_LITTLE_ENDIAN__
    173176
     
    179182                                } fake; // FakeHeader
    180183                        } kind; // Kind
     184                        size_t size;                                                            // allocation size in bytes
    181185                } header; // Header
    182186                char pad[libAlign() - sizeof( Header )];
     
    262266static unsigned long long int free_storage;
    263267static unsigned int free_calls;
     268static unsigned long long int aalloc_storage;
     269static unsigned int aalloc_calls;
    264270static unsigned long long int calloc_storage;
    265271static unsigned int calloc_calls;
    266272static unsigned long long int memalign_storage;
    267273static unsigned int memalign_calls;
     274static unsigned long long int amemalign_storage;
     275static unsigned int amemalign_calls;
    268276static unsigned long long int cmemalign_storage;
    269277static unsigned int cmemalign_calls;
     278static unsigned long long int resize_storage;
     279static unsigned int resize_calls;
    270280static unsigned long long int realloc_storage;
    271281static unsigned int realloc_calls;
     
    275285// Use "write" because streams may be shutdown when calls are made.
    276286static void printStats() {
    277         char helpText[512];
     287        char helpText[1024];
    278288        __cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText),
    279289                                                                        "\nHeap statistics:\n"
    280290                                                                        "  malloc: calls %u / storage %llu\n"
     291                                                                        "  aalloc: calls %u / storage %llu\n"
    281292                                                                        "  calloc: calls %u / storage %llu\n"
    282293                                                                        "  memalign: calls %u / storage %llu\n"
     294                                                                        "  amemalign: calls %u / storage %llu\n"
    283295                                                                        "  cmemalign: calls %u / storage %llu\n"
     296                                                                        "  resize: calls %u / storage %llu\n"
    284297                                                                        "  realloc: calls %u / storage %llu\n"
    285298                                                                        "  free: calls %u / storage %llu\n"
     
    288301                                                                        "  sbrk: calls %u / storage %llu\n",
    289302                                                                        malloc_calls, malloc_storage,
     303                                                                        aalloc_calls, calloc_storage,
    290304                                                                        calloc_calls, calloc_storage,
    291305                                                                        memalign_calls, memalign_storage,
     306                                                                        amemalign_calls, amemalign_storage,
    292307                                                                        cmemalign_calls, cmemalign_storage,
     308                                                                        resize_calls, resize_storage,
    293309                                                                        realloc_calls, realloc_storage,
    294310                                                                        free_calls, free_storage,
     
    300316
    301317static int printStatsXML( FILE * stream ) {                             // see malloc_info
    302         char helpText[512];
     318        char helpText[1024];
    303319        int len = snprintf( helpText, sizeof(helpText),
    304320                                                "<malloc version=\"1\">\n"
     
    307323                                                "</sizes>\n"
    308324                                                "<total type=\"malloc\" count=\"%u\" size=\"%llu\"/>\n"
     325                                                "<total type=\"aalloc\" count=\"%u\" size=\"%llu\"/>\n"
    309326                                                "<total type=\"calloc\" count=\"%u\" size=\"%llu\"/>\n"
    310327                                                "<total type=\"memalign\" count=\"%u\" size=\"%llu\"/>\n"
     328                                                "<total type=\"amemalign\" count=\"%u\" size=\"%llu\"/>\n"
    311329                                                "<total type=\"cmemalign\" count=\"%u\" size=\"%llu\"/>\n"
     330                                                "<total type=\"resize\" count=\"%u\" size=\"%llu\"/>\n"
    312331                                                "<total type=\"realloc\" count=\"%u\" size=\"%llu\"/>\n"
    313332                                                "<total type=\"free\" count=\"%u\" size=\"%llu\"/>\n"
     
    317336                                                "</malloc>",
    318337                                                malloc_calls, malloc_storage,
     338                                                aalloc_calls, aalloc_storage,
    319339                                                calloc_calls, calloc_storage,
    320340                                                memalign_calls, memalign_storage,
     341                                                amemalign_calls, amemalign_storage,
    321342                                                cmemalign_calls, cmemalign_storage,
     343                                                resize_calls, resize_storage,
    322344                                                realloc_calls, realloc_storage,
    323345                                                free_calls, free_storage,
     
    339361
    340362
    341 static inline void checkAlign( size_t alignment ) {
    342         if ( alignment < libAlign() || ! libPow2( alignment ) ) {
    343                 abort( "Alignment %zu for memory allocation is less than %d and/or not a power of 2.", alignment, libAlign() );
    344         } // if
    345 } // checkAlign
    346 
    347 
    348 static inline bool setHeapExpand( size_t value ) {
    349   if ( heapExpand < pageSize ) return true;
    350         heapExpand = value;
    351         return false;
    352 } // setHeapExpand
    353 
    354 
    355363// thunk problem
    356364size_t Bsearchl( unsigned int key, const unsigned int * vals, size_t dim ) {
     
    369377
    370378static inline bool setMmapStart( size_t value ) {               // true => mmapped, false => sbrk
    371   if ( value < pageSize || bucketSizes[NoBucketSizes - 1] < value ) return true;
     379  if ( value < pageSize || bucketSizes[NoBucketSizes - 1] < value ) return false;
    372380        mmapStart = value;                                                                      // set global
    373381
     
    376384        assert( maxBucketsUsed < NoBucketSizes );                       // subscript failure ?
    377385        assert( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?
    378         return false;
     386        return true;
    379387} // setMmapStart
     388
     389
     390// <-------+----------------------------------------------------> bsize (bucket size)
     391// |header |addr
     392//==================================================================================
     393//                   align/offset |
     394// <-----------------<------------+-----------------------------> bsize (bucket size)
     395//                   |fake-header | addr
     396#define headerAddr( addr ) ((HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) ))
     397#define realHeader( header ) ((HeapManager.Storage.Header *)((char *)header - header->kind.fake.offset))
     398
     399// <-------<<--------------------- dsize ---------------------->> bsize (bucket size)
     400// |header |addr
     401//==================================================================================
     402//                   align/offset |
     403// <------------------------------<<---------- dsize --------->>> bsize (bucket size)
     404//                   |fake-header |addr
     405#define dataStorage( bsize, addr, header ) (bsize - ( (char *)addr - (char *)header ))
     406
     407
     408static inline void checkAlign( size_t alignment ) {
     409        if ( alignment < libAlign() || ! libPow2( alignment ) ) {
     410                abort( "Alignment %zu for memory allocation is less than %d and/or not a power of 2.", alignment, libAlign() );
     411        } // if
     412} // checkAlign
    380413
    381414
     
    391424static inline void fakeHeader( HeapManager.Storage.Header *& header, size_t & alignment ) {
    392425        if ( unlikely( (header->kind.fake.alignment & 1) == 1 ) ) { // fake header ?
    393                 size_t offset = header->kind.fake.offset;
    394426                alignment = header->kind.fake.alignment & -2;   // remove flag from value
    395427                #ifdef __CFA_DEBUG__
    396428                checkAlign( alignment );                                                // check alignment
    397429                #endif // __CFA_DEBUG__
    398                 header = (HeapManager.Storage.Header *)((char *)header - offset);
     430                header = realHeader( header );                                  // backup from fake to real header
    399431        } // if
    400432} // fakeHeader
    401 
    402 
    403 // <-------+----------------------------------------------------> bsize (bucket size)
    404 // |header |addr
    405 //==================================================================================
    406 //                                | alignment
    407 // <-----------------<------------+-----------------------------> bsize (bucket size)
    408 //                   |fake-header | addr
    409 #define headerAddr( addr ) ((HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) ))
    410 
    411 // <-------<<--------------------- dsize ---------------------->> bsize (bucket size)
    412 // |header |addr
    413 //==================================================================================
    414 //                                | alignment
    415 // <------------------------------<<---------- dsize --------->>> bsize (bucket size)
    416 //                   |fake-header |addr
    417 #define dataStorage( bsize, addr, header ) (bsize - ( (char *)addr - (char *)header ))
    418433
    419434
     
    428443
    429444        #ifdef __CFA_DEBUG__
    430         checkHeader( addr < heapBegin || header < (HeapManager.Storage.Header *)heapBegin, name, addr ); // bad low address ?
     445        checkHeader( addr < heapBegin, name, addr );            // bad low address ?
    431446        #endif // __CFA_DEBUG__
    432447
     
    487502        // along with the block and is a multiple of the alignment size.
    488503
    489   if ( unlikely( size > ~0ul - sizeof(HeapManager.Storage) ) ) return 0p;
     504  if ( unlikely( size > ULONG_MAX - sizeof(HeapManager.Storage) ) ) return 0p;
    490505        size_t tsize = size + sizeof(HeapManager.Storage);
    491506        if ( likely( tsize < mmapStart ) ) {                            // small size => sbrk
     
    539554                block->header.kind.real.home = freeElem;                // pointer back to free list of apropriate size
    540555        } else {                                                                                        // large size => mmap
    541   if ( unlikely( size > ~0ul - pageSize ) ) return 0p;
     556  if ( unlikely( size > ULONG_MAX - pageSize ) ) return 0p;
    542557                tsize = libCeiling( tsize, pageSize );                  // must be multiple of page size
    543558                #ifdef __STATISTICS__
     
    557572        } // if
    558573
     574        block->header.size = size;                                                      // store allocation size
    559575        void * addr = &(block->data);                                           // adjust off header to user bytes
    560576
     
    680696        #endif // FASTLOOKUP
    681697
    682         if ( setMmapStart( default_mmap_start() ) ) {
     698        if ( ! setMmapStart( default_mmap_start() ) ) {
    683699                abort( "HeapManager : internal error, mmap start initialization failure." );
    684700        } // if
     
    686702
    687703        char * end = (char *)sbrk( 0 );
    688         sbrk( (char *)libCeiling( (long unsigned int)end, libAlign() ) - end ); // move start of heap to multiple of alignment
    689         heapBegin = heapEnd = sbrk( 0 );                                        // get new start point
     704        heapBegin = heapEnd = sbrk( (char *)libCeiling( (long unsigned int)end, libAlign() ) - end ); // move start of heap to multiple of alignment
    690705} // HeapManager
    691706
     
    713728        //assert( heapManager.heapBegin != 0 );
    714729        //heapManager{};
    715         if ( heapManager.heapBegin == 0p ) heapManager{};
     730        if ( heapManager.heapBegin == 0p ) heapManager{};       // sanity check
    716731} // memory_startup
    717732
     
    725740        //assert( heapManager.heapBegin != 0 );
    726741        if ( unlikely( heapManager.heapBegin == 0p ) ) heapManager{}; // called before memory_startup ?
     742#if __SIZEOF_POINTER__ == 8
     743        verify( size < ((typeof(size_t))1 << 48) );
     744#endif // __SIZEOF_POINTER__ == 8
    727745        void * addr = doMalloc( size );
    728746        if ( unlikely( addr == 0p ) ) errno = ENOMEM;           // POSIX
     
    731749
    732750
    733 static inline void * callocNoStats( size_t noOfElems, size_t elemSize ) {
    734         size_t size = noOfElems * elemSize;
     751static inline void * callocNoStats( size_t dim, size_t elemSize ) {
     752        size_t size = dim * elemSize;
    735753        char * addr = (char *)mallocNoStats( size );
    736754  if ( unlikely( addr == 0p ) ) return 0p;
     
    790808
    791809
    792 static inline void * cmemalignNoStats( size_t alignment, size_t noOfElems, size_t elemSize ) {
    793         size_t size = noOfElems * elemSize;
     810static inline void * cmemalignNoStats( size_t alignment, size_t dim, size_t elemSize ) {
     811        size_t size = dim * elemSize;
    794812        char * addr = (char *)memalignNoStats( alignment, size );
    795813  if ( unlikely( addr == 0p ) ) return 0p;
     
    803821        #endif // __CFA_DEBUG__
    804822                memset( addr, '\0', dataStorage( bsize, addr, header ) ); // set to zeros
    805         header->kind.real.blockSize |= 2;                               // mark as zero filled
    806 
     823
     824        header->kind.real.blockSize |= 2;                                       // mark as zero filled
    807825        return addr;
    808826} // cmemalignNoStats
     
    819837
    820838extern "C" {
    821         // The malloc() function allocates size bytes and returns a pointer to the allocated memory. The memory is not
    822         // initialized. If size is 0, then malloc() returns either 0p, or a unique pointer value that can later be
    823         // successfully passed to free().
     839        // Allocates size bytes and returns a pointer to the allocated memory.  The contents are undefined. If size is 0,
     840        // then malloc() returns a unique pointer value that can later be successfully passed to free().
    824841        void * malloc( size_t size ) {
    825842                #ifdef __STATISTICS__
     
    831848        } // malloc
    832849
    833         // The calloc() function allocates memory for an array of nmemb elements of size bytes each and returns a pointer to
    834         // the allocated memory. The memory is set to zero. If nmemb or size is 0, then calloc() returns either 0p, or a
    835         // unique pointer value that can later be successfully passed to free().
    836         void * calloc( size_t noOfElems, size_t elemSize ) {
     850
     851        // Same as malloc() except size bytes is an array of dim elements each of elemSize bytes.
     852        void * aalloc( size_t dim, size_t elemSize ) {
     853                #ifdef __STATISTICS__
     854                __atomic_add_fetch( &aalloc_calls, 1, __ATOMIC_SEQ_CST );
     855                __atomic_add_fetch( &aalloc_storage, dim * elemSize, __ATOMIC_SEQ_CST );
     856                #endif // __STATISTICS__
     857
     858                return mallocNoStats( dim * elemSize );
     859        } // aalloc
     860
     861
     862        // Same as aalloc() with memory set to zero.
     863        void * calloc( size_t dim, size_t elemSize ) {
    837864                #ifdef __STATISTICS__
    838865                __atomic_add_fetch( &calloc_calls, 1, __ATOMIC_SEQ_CST );
    839                 __atomic_add_fetch( &calloc_storage, noOfElems * elemSize, __ATOMIC_SEQ_CST );
    840                 #endif // __STATISTICS__
    841 
    842                 return callocNoStats( noOfElems, elemSize );
     866                __atomic_add_fetch( &calloc_storage, dim * elemSize, __ATOMIC_SEQ_CST );
     867                #endif // __STATISTICS__
     868
     869                return callocNoStats( dim, elemSize );
    843870        } // calloc
    844871
    845         // The realloc() function changes the size of the memory block pointed to by ptr to size bytes. The contents will be
    846         // unchanged in the range from the start of the region up to the minimum of the old and new sizes. If the new size
    847         // is larger than the old size, the added memory will not be initialized.  If ptr is 0p, then the call is
    848         // equivalent to malloc(size), for all values of size; if size is equal to zero, and ptr is not 0p, then the call
    849         // is equivalent to free(ptr). Unless ptr is 0p, it must have been returned by an earlier call to malloc(),
    850         // calloc() or realloc(). If the area pointed to was moved, a free(ptr) is done.
     872        // Change the size of the memory block pointed to by oaddr to size bytes. The contents are undefined.  If oaddr is
     873        // 0p, then the call is equivalent to malloc(size), for all values of size; if size is equal to zero, and oaddr is
     874        // not 0p, then the call is equivalent to free(oaddr). Unless oaddr is 0p, it must have been returned by an earlier
     875        // call to malloc(), alloc(), calloc() or realloc(). If the area pointed to was moved, a free(oaddr) is done.
     876        void * resize( void * oaddr, size_t size ) {
     877                #ifdef __STATISTICS__
     878                __atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST );
     879                __atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST );
     880                #endif // __STATISTICS__
     881
     882                // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
     883          if ( unlikely( size == 0 ) ) { free( oaddr ); return mallocNoStats( size ); } // special cases
     884          if ( unlikely( oaddr == 0p ) ) return mallocNoStats( size );
     885
     886                HeapManager.Storage.Header * header;
     887                HeapManager.FreeHeader * freeElem;
     888                size_t bsize, oalign = 0;
     889                headers( "resize", oaddr, header, freeElem, bsize, oalign );
     890
     891                size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
     892                // same size, DO NOT preserve STICKY PROPERTIES.
     893          if ( oalign == 0 && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size
     894                        header->kind.real.blockSize &= -2;                      // no alignment and turn off 0 fill
     895                        return oaddr;
     896                } // if
     897       
     898                // change size, DO NOT preserve STICKY PROPERTIES.
     899                free( oaddr );
     900                void * naddr = mallocNoStats( size );                   // create new area
     901                return naddr;
     902        } // resize
     903
     904
     905        // Same as resize() but the contents are unchanged in the range from the start of the region up to the minimum of
     906        // the old and new sizes.
    851907        void * realloc( void * oaddr, size_t size ) {
    852908                #ifdef __STATISTICS__
    853909                __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
     910                __atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
    854911                #endif // __STATISTICS__
    855912
     
    867924                        // Do not know size of original allocation => cannot do 0 fill for any additional space because do not know
    868925                        // where to start filling, i.e., do not overwrite existing values in space.
    869                         //
    870                         // This case does not result in a new profiler entry because the previous one still exists and it must match with
    871                         // the free for this memory.  Hence, this realloc does not appear in the profiler output.
    872926                        return oaddr;
    873927                } // if
    874 
    875                 #ifdef __STATISTICS__
    876                 __atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
    877                 #endif // __STATISTICS__
    878928
    879929                // change size and copy old content to new storage
     
    903953        } // realloc
    904954
    905         // The obsolete function memalign() allocates size bytes and returns a pointer to the allocated memory. The memory
    906         // address will be a multiple of alignment, which must be a power of two.
     955        // Same as malloc() except the memory address is a multiple of alignment, which must be a power of two. (obsolete)
    907956        void * memalign( size_t alignment, size_t size ) {
    908957                #ifdef __STATISTICS__
     
    915964
    916965
    917         // The cmemalign() function is the same as calloc() with memory alignment.
    918         void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ) {
     966        // Same as aalloc() with memory alignment.
     967        void * amemalign( size_t alignment, size_t dim, size_t elemSize ) {
    919968                #ifdef __STATISTICS__
    920969                __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
    921                 __atomic_add_fetch( &cmemalign_storage, noOfElems * elemSize, __ATOMIC_SEQ_CST );
    922                 #endif // __STATISTICS__
    923 
    924                 return cmemalignNoStats( alignment, noOfElems, elemSize );
     970                __atomic_add_fetch( &cmemalign_storage, dim * elemSize, __ATOMIC_SEQ_CST );
     971                #endif // __STATISTICS__
     972
     973                return memalignNoStats( alignment, dim * elemSize );
     974        } // amemalign
     975
     976
     977        // Same as calloc() with memory alignment.
     978        void * cmemalign( size_t alignment, size_t dim, size_t elemSize ) {
     979                #ifdef __STATISTICS__
     980                __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
     981                __atomic_add_fetch( &cmemalign_storage, dim * elemSize, __ATOMIC_SEQ_CST );
     982                #endif // __STATISTICS__
     983
     984                return cmemalignNoStats( alignment, dim, elemSize );
    925985        } // cmemalign
    926986
    927         // The function aligned_alloc() is the same as memalign(), except for the added restriction that size should be a
    928         // multiple of alignment.
     987        // Same as memalign(), but ISO/IEC 2011 C11 Section 7.22.2 states: the value of size shall be an integral multiple
     988    // of alignment. This requirement is universally ignored.
    929989        void * aligned_alloc( size_t alignment, size_t size ) {
    930990                return memalign( alignment, size );
     
    932992
    933993
    934         // The function posix_memalign() allocates size bytes and places the address of the allocated memory in *memptr. The
    935         // address of the allocated memory will be a multiple of alignment, which must be a power of two and a multiple of
    936         // sizeof(void *). If size is 0, then posix_memalign() returns either 0p, or a unique pointer value that can later
    937         // be successfully passed to free(3).
     994        // Allocates size bytes and places the address of the allocated memory in *memptr. The address of the allocated
     995        // memory shall be a multiple of alignment, which must be a power of two and a multiple of sizeof(void *). If size
     996        // is 0, then posix_memalign() returns either 0p, or a unique pointer value that can later be successfully passed to
     997        // free(3).
    938998        int posix_memalign( void ** memptr, size_t alignment, size_t size ) {
    939999          if ( alignment < sizeof(void *) || ! libPow2( alignment ) ) return EINVAL; // check alignment
     
    9431003        } // posix_memalign
    9441004
    945         // The obsolete function valloc() allocates size bytes and returns a pointer to the allocated memory. The memory
    946         // address will be a multiple of the page size.  It is equivalent to memalign(sysconf(_SC_PAGESIZE),size).
     1005        // Allocates size bytes and returns a pointer to the allocated memory. The memory address shall be a multiple of the
     1006        // page size.  It is equivalent to memalign(sysconf(_SC_PAGESIZE),size).
    9471007        void * valloc( size_t size ) {
    9481008                return memalign( pageSize, size );
     
    9501010
    9511011
    952         // The free() function frees the memory space pointed to by ptr, which must have been returned by a previous call to
    953         // malloc(), calloc() or realloc().  Otherwise, or if free(ptr) has already been called before, undefined behavior
    954         // occurs. If ptr is 0p, no operation is performed.
     1012        // Same as valloc but rounds size to multiple of page size.
     1013        void * pvalloc( size_t size ) {
     1014                return memalign( pageSize, libCeiling( size, pageSize ) );
     1015        } // pvalloc
     1016
     1017
     1018        // Frees the memory space pointed to by ptr, which must have been returned by a previous call to malloc(), calloc()
     1019        // or realloc().  Otherwise, or if free(ptr) has already been called before, undefined behaviour occurs. If ptr is
     1020        // 0p, no operation is performed.
    9551021        void free( void * addr ) {
    9561022                #ifdef __STATISTICS__
     
    9731039
    9741040
    975         // The malloc_alignment() function returns the alignment of the allocation.
     1041        // Returns the alignment of an allocation.
    9761042        size_t malloc_alignment( void * addr ) {
    9771043          if ( unlikely( addr == 0p ) ) return libAlign();      // minimum alignment
     
    9801046                        return header->kind.fake.alignment & -2;        // remove flag from value
    9811047                } else {
    982                         return libAlign ();                                                     // minimum alignment
     1048                        return libAlign();                                                      // minimum alignment
    9831049                } // if
    9841050        } // malloc_alignment
    9851051
    986 
    987         // The malloc_zero_fill() function returns true if the allocation is zero filled, i.e., initially allocated by calloc().
     1052        // Set the alignment for an the allocation and return previous alignment or 0 if no alignment.
     1053        size_t $malloc_alignment_set( void * addr, size_t alignment ) {
     1054          if ( unlikely( addr == 0p ) ) return libAlign();      // minimum alignment
     1055                size_t ret;
     1056                HeapManager.Storage.Header * header = headerAddr( addr );
     1057                if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
     1058                        ret = header->kind.fake.alignment & -2;         // remove flag from old value
     1059                        header->kind.fake.alignment = alignment | 1; // add flag to new value
     1060                } else {
     1061                        ret = 0;                                                                        // => no alignment to change
     1062                } // if
     1063                return ret;
     1064        } // $malloc_alignment_set
     1065
     1066
     1067        // Returns true if the allocation is zero filled, e.g., allocated by calloc().
    9881068        bool malloc_zero_fill( void * addr ) {
    9891069          if ( unlikely( addr == 0p ) ) return false;           // null allocation is not zero fill
    9901070                HeapManager.Storage.Header * header = headerAddr( addr );
    9911071                if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
    992                         header = (HeapManager.Storage.Header *)((char *)header - header->kind.fake.offset);
    993                 } // if
    994                 return (header->kind.real.blockSize & 2) != 0;  // zero filled (calloc/cmemalign) ?
     1072                        header = realHeader( header );                          // backup from fake to real header
     1073                } // if
     1074                return (header->kind.real.blockSize & 2) != 0;  // zero filled ?
    9951075        } // malloc_zero_fill
    9961076
    997 
    998         // The malloc_usable_size() function returns the number of usable bytes in the block pointed to by ptr, a pointer to
    999         // a block of memory allocated by malloc(3) or a related function.
     1077        // Set allocation is zero filled and return previous zero filled.
     1078        bool $malloc_zero_fill_set( void * addr ) {
     1079          if ( unlikely( addr == 0p ) ) return false;           // null allocation is not zero fill
     1080                HeapManager.Storage.Header * header = headerAddr( addr );
     1081                if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
     1082                        header = realHeader( header );                          // backup from fake to real header
     1083                } // if
     1084                bool ret = (header->kind.real.blockSize & 2) != 0; // zero filled ?
     1085                header->kind.real.blockSize |= 2;                               // mark as zero filled
     1086                return ret;
     1087        } // $malloc_zero_fill_set
     1088
     1089
     1090        // Returns original total allocation size (not bucket size) => array size is dimension * sizeif(T).
     1091        size_t malloc_size( void * addr ) {
     1092          if ( unlikely( addr == 0p ) ) return false;           // null allocation is not zero fill
     1093                HeapManager.Storage.Header * header = headerAddr( addr );
     1094                if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
     1095                        header = realHeader( header );                          // backup from fake to real header
     1096                } // if
     1097                return header->size;
     1098        } // malloc_size
     1099
     1100        // Set allocation size and return previous size.
     1101        size_t $malloc_size_set( void * addr, size_t size ) {
     1102          if ( unlikely( addr == 0p ) ) return false;           // null allocation is not zero fill
     1103                HeapManager.Storage.Header * header = headerAddr( addr );
     1104                if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
     1105                        header = realHeader( header );                          // backup from fake to real header
     1106                } // if
     1107                size_t ret = header->size;
     1108                header->size = size;
     1109                return ret;
     1110        } // $malloc_size_set
     1111
     1112
     1113        // Returns the number of usable bytes in the block pointed to by ptr, a pointer to a block of memory allocated by
     1114        // malloc or a related function.
    10001115        size_t malloc_usable_size( void * addr ) {
    10011116          if ( unlikely( addr == 0p ) ) return 0;                       // null allocation has 0 size
     
    10091124
    10101125
    1011         // The malloc_stats() function prints (on default standard error) statistics about memory allocated by malloc(3) and
    1012         // related functions.
     1126        // Prints (on default standard error) statistics about memory allocated by malloc and related functions.
    10131127        void malloc_stats( void ) {
    10141128                #ifdef __STATISTICS__
     
    10181132        } // malloc_stats
    10191133
    1020         // The malloc_stats_fd() function changes the file descripter where malloc_stats() writes the statistics.
     1134        // Changes the file descripter where malloc_stats() writes statistics.
    10211135        int malloc_stats_fd( int fd __attribute__(( unused )) ) {
    10221136                #ifdef __STATISTICS__
     
    10301144
    10311145
    1032         // The mallopt() function adjusts parameters that control the behavior of the memory-allocation functions (see
    1033         // malloc(3)). The param argument specifies the parameter to be modified, and value specifies the new value for that
    1034         // parameter.
     1146        // Adjusts parameters that control the behaviour of the memory-allocation functions (see malloc). The param argument
     1147        // specifies the parameter to be modified, and value specifies the new value for that parameter.
    10351148        int mallopt( int option, int value ) {
    10361149                choose( option ) {
    10371150                  case M_TOP_PAD:
    1038                         if ( setHeapExpand( value ) ) return 1;
     1151                        heapExpand = ceiling( value, pageSize ); return 1;
    10391152                  case M_MMAP_THRESHOLD:
    10401153                        if ( setMmapStart( value ) ) return 1;
     1154                        break;
    10411155                } // switch
    10421156                return 0;                                                                               // error, unsupported
    10431157        } // mallopt
    10441158
    1045         // The malloc_trim() function attempts to release free memory at the top of the heap (by calling sbrk(2) with a
    1046         // suitable argument).
     1159        // Attempt to release free memory at the top of the heap (by calling sbrk with a suitable argument).
    10471160        int malloc_trim( size_t ) {
    10481161                return 0;                                                                               // => impossible to release memory
     
    10501163
    10511164
    1052         // The malloc_info() function exports an XML string that describes the current state of the memory-allocation
    1053         // implementation in the caller.  The string is printed on the file stream stream.  The exported string includes
    1054         // information about all arenas (see malloc(3)).
     1165        // Exports an XML string that describes the current state of the memory-allocation implementation in the caller.
     1166        // The string is printed on the file stream stream.  The exported string includes information about all arenas (see
     1167        // malloc).
    10551168        int malloc_info( int options, FILE * stream ) {
    10561169                if ( options != 0 ) { errno = EINVAL; return -1; }
     
    10591172
    10601173
    1061         // The malloc_get_state() function records the current state of all malloc(3) internal bookkeeping variables (but
    1062         // not the actual contents of the heap or the state of malloc_hook(3) functions pointers).  The state is recorded in
    1063         // a system-dependent opaque data structure dynamically allocated via malloc(3), and a pointer to that data
    1064         // structure is returned as the function result.  (It is the caller's responsibility to free(3) this memory.)
     1174        // Records the current state of all malloc internal bookkeeping variables (but not the actual contents of the heap
     1175        // or the state of malloc_hook functions pointers).  The state is recorded in a system-dependent opaque data
     1176        // structure dynamically allocated via malloc, and a pointer to that data structure is returned as the function
     1177        // result.  (The caller must free this memory.)
    10651178        void * malloc_get_state( void ) {
    10661179                return 0p;                                                                              // unsupported
     
    10681181
    10691182
    1070         // The malloc_set_state() function restores the state of all malloc(3) internal bookkeeping variables to the values
    1071         // recorded in the opaque data structure pointed to by state.
     1183        // Restores the state of all malloc internal bookkeeping variables to the values recorded in the opaque data
     1184        // structure pointed to by state.
    10721185        int malloc_set_state( void * ptr ) {
    10731186                return 0;                                                                               // unsupported
     
    10771190
    10781191// Must have CFA linkage to overload with C linkage realloc.
    1079 void * realloc( void * oaddr, size_t nalign, size_t size ) {
     1192void * resize( void * oaddr, size_t nalign, size_t size ) {
    10801193        #ifdef __STATISTICS__
    1081         __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
     1194        __atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST );
     1195        __atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST );
    10821196        #endif // __STATISTICS__
    10831197
    10841198        // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
    1085   if ( unlikely( size == 0 ) ) { free( oaddr ); return mallocNoStats( size ); } // special cases
    1086   if ( unlikely( oaddr == 0p ) ) return mallocNoStats( size );
     1199  if ( unlikely( size == 0 ) ) { free( oaddr ); return memalignNoStats( nalign, size ); } // special cases
     1200  if ( unlikely( oaddr == 0p ) ) return memalignNoStats( nalign, size );
     1201
    10871202
    10881203        if ( unlikely( nalign == 0 ) ) nalign = libAlign();     // reset alignment to minimum
     
    10951210        HeapManager.FreeHeader * freeElem;
    10961211        size_t bsize, oalign = 0;
    1097         headers( "realloc", oaddr, header, freeElem, bsize, oalign );
     1212        headers( "resize", oaddr, header, freeElem, bsize, oalign );
    10981213        size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
    10991214
    1100   if ( oalign != 0 && (uintptr_t)oaddr % nalign == 0 ) { // has alignment and just happens to work out
    1101                 headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
    1102                 return realloc( oaddr, size );
    1103         } // if
    1104 
    1105         #ifdef __STATISTICS__
    1106         __atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
    1107         #endif // __STATISTICS__
    1108 
    1109         // change size and copy old content to new storage
     1215        if ( oalign <= nalign && (uintptr_t)oaddr % nalign == 0 ) { // <= alignment and new alignment happens to match
     1216                if ( oalign >= libAlign() ) {                                   // fake header ?
     1217                        headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
     1218                } // if
     1219                if ( size <= odsize && odsize <= size * 2 ) {   // allow 50% wasted storage for smaller size
     1220                        header->kind.real.blockSize &= -2;                      // turn off 0 fill
     1221                        return oaddr;
     1222                } // if
     1223        } // if
     1224
     1225        // change size
    11101226
    11111227        void * naddr;
     
    11161232        } // if
    11171233
     1234        free( oaddr );
     1235        return naddr;
     1236} // resize
     1237
     1238
     1239void * realloc( void * oaddr, size_t nalign, size_t size ) {
     1240        if ( unlikely( nalign == 0 ) ) nalign = libAlign();     // reset alignment to minimum
     1241        #ifdef __CFA_DEBUG__
     1242        else
     1243                checkAlign( nalign );                                                   // check alignment
     1244        #endif // __CFA_DEBUG__
     1245
     1246        HeapManager.Storage.Header * header;
     1247        HeapManager.FreeHeader * freeElem;
     1248        size_t bsize, oalign = 0;
     1249        headers( "realloc", oaddr, header, freeElem, bsize, oalign );
     1250        size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
     1251
     1252        if ( oalign <= nalign && (uintptr_t)oaddr % nalign == 0 ) { // <= alignment and new alignment happens to match
     1253                if ( oalign >= libAlign() ) {                                   // fake header ?
     1254                        headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
     1255                } // if
     1256                return realloc( oaddr, size );
     1257        } // if
     1258
     1259        // change size and copy old content to new storage
     1260
     1261        #ifdef __STATISTICS__
     1262        __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
     1263        __atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
     1264        #endif // __STATISTICS__
     1265
     1266        // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
     1267  if ( unlikely( size == 0 ) ) { free( oaddr ); return memalignNoStats( nalign, size ); } // special cases
     1268  if ( unlikely( oaddr == 0p ) ) return memalignNoStats( nalign, size );
     1269
     1270        void * naddr;
     1271        if ( unlikely( header->kind.real.blockSize & 2 ) ) { // previous request zero fill
     1272                naddr = cmemalignNoStats( nalign, 1, size );    // create new aligned area
     1273        } else {
     1274                naddr = memalignNoStats( nalign, size );                // create new aligned area
     1275        } // if
     1276
    11181277        headers( "realloc", naddr, header, freeElem, bsize, oalign );
    1119         size_t ndsize = dataStorage( bsize, naddr, header ); // data storage avilable in bucket
     1278        size_t ndsize = dataStorage( bsize, naddr, header ); // data storage available in bucket
    11201279        // To preserve prior fill, the entire bucket must be copied versus the size.
    11211280        memcpy( naddr, oaddr, MIN( odsize, ndsize ) );          // copy bytes
  • libcfa/src/interpose.cfa

    rb7d6a36 r6a490b2  
    1010// Created On       : Wed Mar 29 16:10:31 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Mon Feb 17 10:18:53 2020
    13 // Update Count     : 166
     12// Last Modified On : Fri Mar 13 17:35:37 2020
     13// Update Count     : 178
    1414//
    1515
    1616#include <stdarg.h>                                                                             // va_start, va_end
     17#include <stdio.h>
    1718#include <string.h>                                                                             // strlen
    1819#include <unistd.h>                                                                             // _exit, getpid
     
    143144void abort( const char fmt[], ... ) __attribute__(( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ ));
    144145void abort( bool signalAbort, const char fmt[], ... ) __attribute__(( format(printf, 2, 3), __nothrow__, __leaf__, __noreturn__ ));
     146void __abort( bool signalAbort, const char fmt[], va_list args ) __attribute__(( __nothrow__, __leaf__, __noreturn__ ));
    145147
    146148extern "C" {
     
    152154                va_list argp;
    153155                va_start( argp, fmt );
    154                 abort( false, fmt, argp );
     156                __abort( false, fmt, argp );
    155157                va_end( argp );
    156158        }
     
    218220}
    219221
    220 void abort( bool signalAbort, const char fmt[], ... ) {
     222// Cannot forward va_list.
     223void __abort( bool signalAbort, const char fmt[], va_list args ) {
    221224        void * kernel_data = kernel_abort();                            // must be done here to lock down kernel
    222225        int len;
     
    228231
    229232        assert( fmt );
    230         va_list args;
    231         va_start( args, fmt );
    232 
    233233        len = vsnprintf( abort_text, abort_text_size, fmt, args );
    234         va_end( args );
    235234        __cfaabi_bits_write( STDERR_FILENO, abort_text, len );
    236235
    237236        if ( fmt[strlen( fmt ) - 1] != '\n' ) {                         // add optional newline if missing at the end of the format text
    238                 __cfaabi_dbg_write( "\n", 1 );
     237                __cfaabi_bits_write( STDERR_FILENO, "\n", 1 );
    239238        } // if
    240239        kernel_abort_msg( kernel_data, abort_text, abort_text_size );
     
    248247        va_list args;
    249248        va_start( args, fmt );
    250         abort( false, fmt, args );
     249        __abort( false, fmt, args );
     250    // CONTROL NEVER REACHES HERE!
    251251        va_end( args );
     252}
     253
     254void abort( bool signalAbort, const char fmt[], ... ) {
     255    va_list args;
     256    va_start( args, fmt );
     257    __abort( signalAbort, fmt, args );
     258    // CONTROL NEVER REACHES HERE!
     259    va_end( args );
    252260}
    253261
  • libcfa/src/iostream.cfa

    rb7d6a36 r6a490b2  
    1010// Created On       : Wed May 27 17:56:53 2015
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Feb 20 15:53:23 2020
    13 // Update Count     : 829
     12// Last Modified On : Sat May  2 18:30:25 2020
     13// Update Count     : 1017
    1414//
    1515
     
    2929#include <complex.h>                                                                    // creal, cimag
    3030} // extern "C"
     31
     32#include <bitmanip.hfa>                                                                 // fms
    3133
    3234
     
    459461\
    460462                if ( f.base == 'b' || f.base == 'B' ) {                 /* bespoke binary format */ \
    461                         int bits;                                                                                                       \
    462                         if ( f.val == (T){0} ) bits = 1;                        /* force at least one bit to print */ \
    463                         else bits = sizeof(long long int) * 8 - __builtin_clzll( f.val ); /* position of most significant bit */ \
    464                         bits = bits > sizeof(f.val) * 8 ? sizeof(f.val) * 8 : bits; \
    465                         int spaces = f.wd - bits;                                       /* can be negative */ \
    466                         if ( ! f.flags.nobsdp ) { spaces -= 2; }        /* base prefix takes space */ \
    467                         /* printf( "%d %d\n", bits, spaces ); */ \
     463                        int bits = high1( f.val );                                      /* position of most significant bit */ \
     464                        if ( bits == 0 ) bits = 1;                                      /* 0 value => force one bit to print */ \
     465                        int spaces; \
    468466                        if ( ! f.flags.left ) {                                         /* right justified ? */ \
    469467                                /* Note, base prefix then zero padding or spacing then prefix. */ \
    470                                 if ( f.flags.pad0 || f.flags.pc ) { \
     468                                if ( f.flags.pc ) { \
     469                                        spaces = f.wd - f.pc; \
     470                                        if ( ! f.flags.nobsdp ) { spaces -= 2; } /* base prefix takes space */ \
     471                                        if ( spaces > 0 ) fmt( os, "%*s", spaces, " " ); /* space pad */ \
    471472                                        if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \
    472                                         if ( f.flags.pc ) spaces = f.pc - bits; \
     473                                        spaces = f.pc - bits; \
    473474                                        if ( spaces > 0 ) fmt( os, "%0*d", spaces, 0 ); /* zero pad */ \
    474475                                } else { \
    475                                         if ( spaces > 0 ) fmt( os, "%*s", spaces, " " ); /* space pad */ \
    476                                         if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \
     476                                        spaces = f.wd - bits; \
     477                                        if ( ! f.flags.nobsdp ) { spaces -= 2; } /* base prefix takes space */ \
     478                                        if ( f.flags.pad0 ) { \
     479                                                if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \
     480                                                if ( spaces > 0 ) fmt( os, "%0*d", spaces, 0 ); /* zero pad */ \
     481                                        } else { \
     482                                                if ( spaces > 0 ) fmt( os, "%*s", spaces, " " ); /* space pad */ \
     483                                                if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \
     484                                        } /* if */ \
    477485                                } /* if */ \
    478                         } else if ( ! f.flags.nobsdp ) { \
    479                                 fmt( os, "0%c", f.base ); \
     486                        } else { \
     487                                if ( ! f.flags.nobsdp ) fmt( os, "0%c", f.base ); \
     488                                if ( f.flags.pc ) { \
     489                                        spaces = f.pc - bits; \
     490                                        if ( spaces > 0 ) fmt( os, "%0*d", spaces, 0 ); /* zero pad */ \
     491                                        spaces = f.wd - f.pc; \
     492                                } else { /* pad0 flag ignored with left flag */ \
     493                                        spaces = f.wd - bits; \
     494                                } /* if */ \
     495                                if ( ! f.flags.nobsdp ) { spaces -= 2; } /* base prefix takes space */ \
    480496                        } /* if */ \
    481                         int shift = (bits - 1) / 4 * 4; /* floor( bits - 1, 4 ) */ \
     497                        int shift = floor( bits - 1, 4 ); \
    482498                        typeof( f.val ) temp = f.val; \
    483499                        fmt( os, "%s", shortbin[(temp >> shift) & 0xf] ); \
     
    534550#define IntegralFMTImpl128( T, SIGNED, CODE, IFMTNP, IFMTP ) \
    535551forall( dtype ostype | ostream( ostype ) ) \
    536 static void base10_128( ostype & os, _Ostream_Manip(T) fmt ) { \
    537         if ( fmt.val > UINT64_MAX ) { \
    538                 fmt.val /= P10_UINT64; \
    539                 base10_128( os, fmt ); /* recursive */ \
    540                 _Ostream_Manip(unsigned long long int) fmt2 @= { (uint64_t)(fmt.val % P10_UINT64), 0, 19, 'u', { .all : 0 } }; \
    541                 fmt2.flags.nobsdp = true; \
    542                 printf( "fmt2 %c %lld %d\n", fmt2.base, fmt2.val, fmt2.all );   \
     552static void base10_128( ostype & os, _Ostream_Manip(T) f ) { \
     553        if ( f.val > UINT64_MAX ) { \
     554                unsigned long long int lsig = f.val % P10_UINT64; \
     555                f.val /= P10_UINT64; /* msig */ \
     556                base10_128( os, f ); /* recursion */ \
     557                _Ostream_Manip(unsigned long long int) fmt @= { lsig, 0, 19, 'u', { .all : 0 } }; \
     558                fmt.flags.nobsdp = true; \
     559                /* printf( "fmt1 %c %lld %d\n", fmt.base, fmt.val, fmt.all ); */ \
    543560                sepOff( os ); \
    544                 (ostype &)(os | fmt2); \
     561                (ostype &)(os | fmt); \
    545562        } else { \
    546                 printf( "fmt %c %lld %d\n", fmt.base, fmt.val, fmt.all ); \
     563                /* printf( "fmt2 %c %lld %d\n", f.base, (unsigned long long int)f.val, f.all ); */ \
     564                _Ostream_Manip(SIGNED long long int) fmt @= { (SIGNED long long int)f.val, f.wd, f.pc, f.base, { .all : f.all } }; \
    547565                (ostype &)(os | fmt); \
    548566        } /* if */ \
    549 } /* base10_128 */                                                \
     567} /* base10_128 */ \
    550568forall( dtype ostype | ostream( ostype ) ) { \
    551569        ostype & ?|?( ostype & os, _Ostream_Manip(T) f ) { \
    552570                if ( $sepPrt( os ) ) fmt( os, "%s", $sepGetCur( os ) ); \
    553571\
    554                 if ( f.base == 'b' | f.base == 'o' | f.base == 'x' | f.base == 'X' ) { \
     572                if ( f.base == 'b' | f.base == 'B' | f.base == 'o' | f.base == 'x' | f.base == 'X' ) { \
    555573                        unsigned long long int msig = (unsigned long long int)(f.val >> 64); \
    556574                        unsigned long long int lsig = (unsigned long long int)(f.val); \
     
    562580                        } else { \
    563581                                fmt2.flags.pad0 = fmt2.flags.nobsdp = true;     \
    564                                 if ( f.base == 'b' ) { \
    565                                         if ( f.wd > 64 ) fmt.wd = f.wd - 64; \
    566                                         fmt2.wd = 64; \
     582                                if ( f.base == 'b' | f.base == 'B' ) { \
     583                                        if ( fmt.flags.pc && fmt.pc > 64 ) fmt.pc -= 64; else { fmt.flags.pc = false; fmt.pc = 0; } \
     584                                        if ( fmt.flags.left ) { \
     585                                                fmt.flags.left = false; \
     586                                                fmt.wd = 0; \
     587                                                /* printf( "L %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
     588                                                fmt2.flags.left = true; \
     589                                                int msigd = high1( msig ); \
     590                                                fmt2.wd = f.wd - (fmt.pc > msigd ? fmt.pc : msigd); \
     591                                                if ( ! fmt.flags.nobsdp ) fmt2.wd -= 2; /* compensate for 0b base specifier */ \
     592                                                if ( (int)fmt2.wd < 64 ) fmt2.wd = 64; /* cast deals with negative value */ \
     593                                                fmt2.flags.pc = true; fmt2.pc = 64; \
     594                                        } else { \
     595                                                if ( fmt.wd > 64 ) fmt.wd -= 64; \
     596                                                else fmt.wd = 1; \
     597                                                /* printf( "R %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
     598                                                fmt2.wd = 64; \
     599                                        } /* if */ \
     600                                        /* printf( "C %llo %d %d '%c' %x\n", fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
    567601                                        (ostype &)(os | fmt | "" | fmt2); \
    568602                                } else if ( f.base == 'o' ) { \
     603                                        if ( fmt.flags.pc && fmt.pc > 22 ) fmt.pc -= 22; else { fmt.flags.pc = false; fmt.pc = 0; } \
    569604                                        fmt.val = (unsigned long long int)fmt.val >> 2; \
    570                                         if ( f.wd > 21 ) fmt.wd = f.wd - 21; \
    571                                         fmt2.wd = 1; \
    572                                         fmt2.val = ((msig & 0x3) << 1) + 1; \
    573                                         (ostype &)(os | fmt | "" | fmt2); \
    574                                         sepOff( os ); \
    575                                         fmt2.wd = 21; \
    576                                         fmt2.val = lsig & 0x7fffffffffffffff; \
     605                                        fmt2.val = ((msig & 0x3) << 1) + ((lsig & 0x8000000000000000U) != 0); \
     606                                        if ( fmt.flags.left ) { \
     607                                                fmt.flags.left = false; \
     608                                                fmt.wd = 0; \
     609                                                /* printf( "L %llo %llo %llo %d %d '%c' %x %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all, fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
     610                                                (ostype &)(os | fmt | "" | fmt2); \
     611                                                sepOff( os ); \
     612                                                fmt2.flags.left = true; \
     613                                                int msigd = ceiling( high1( fmt.val ), 3 ); \
     614                                                fmt2.wd = f.wd - (fmt.pc > msigd ? fmt.pc : msigd); \
     615                                                if ( ! fmt.flags.nobsdp ) fmt2.wd -= 1; /* compensate for 0 base specifier */ \
     616                                                if ( (int)fmt2.wd < 21 ) fmt2.wd = 21; /* cast deals with negative value */ \
     617                                                fmt2.flags.pc = true; fmt2.pc = 21; \
     618                                        } else { \
     619                                                if ( fmt.wd > 22 ) fmt.wd -= 22; \
     620                                                else fmt.wd = 1; \
     621                                                /* printf( "R %llo %llo %llo %d %d '%c' %x %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all, fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
     622                                                (ostype &)(os | fmt | "" | fmt2); \
     623                                                sepOff( os ); \
     624                                                fmt2.wd = 21; \
     625                                        } /* if */ \
     626                                        fmt2.val = lsig & 0x7fffffffffffffffU; \
     627                                        /* printf( "\nC %llo %d %d '%c' %x\n", fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
    577628                                        (ostype &)(os | fmt2); \
    578                                 } else { \
    579                                         if ( f.flags.left ) { \
    580                                                 if ( f.wd > 16 ) fmt2.wd = f.wd - 16;   \
    581                                                 fmt.wd = 16;                                                    \
     629                                } else { /* f.base == 'x'  | f.base == 'X' */ \
     630                                        if ( fmt.flags.pc && fmt.pc > 16 ) fmt.pc -= 16; else { fmt.flags.pc = false; fmt.pc = 0; } \
     631                                        if ( fmt.flags.left ) { \
     632                                                fmt.flags.left = false; \
     633                                                fmt.wd = 0; \
     634                                                /* printf( "L %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
     635                                                fmt2.flags.left = true; \
     636                                                int msigd = high1( msig ); \
     637                                                fmt2.wd = f.wd - (fmt.pc > msigd ? fmt.pc : msigd); \
     638                                                if ( ! fmt.flags.nobsdp ) fmt2.wd -= 2; /* compensate for 0x base specifier */ \
     639                                                if ( (int)fmt2.wd < 16 ) fmt2.wd = 16; /* cast deals with negative value */ \
     640                                                fmt2.flags.pc = true; fmt2.pc = 16; \
    582641                                        } else { \
    583                                                 if ( f.wd > 16 ) fmt.wd = f.wd - 16;    \
    584                                                 fmt2.wd = 16;                                                   \
     642                                                if ( fmt.wd > 16 ) fmt.wd -= 16; \
     643                                                else fmt.wd = 1; \
     644                                                /* printf( "R %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
     645                                                fmt2.wd = 16; \
    585646                                        } /* if */ \
     647                                        /* printf( "C %llo %d %d '%c' %x\n", fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
    586648                                        (ostype &)(os | fmt | "" | fmt2); \
    587649                                } /* if */ \
    588650                        } /* if */ \
    589651                } else { \
     652                        if ( CODE == 'd' ) { \
     653                                if ( f.val < 0 )  { fmt( os, "-" ); sepOff( os ); f.val = -f.val; f.flags.sign = false; } \
     654                        } /* if */ \
    590655                        base10_128( os, f ); \
    591656                } /* if */ \
  • libcfa/src/startup.cfa

    rb7d6a36 r6a490b2  
    1414//
    1515
    16 #include <time.h>                                                                               // tzset
     16#include <time.h>                // tzset
     17#include <locale.h>        // setlocale
    1718#include "startup.hfa"
    1819
     
    2122    void __cfaabi_appready_startup( void ) {
    2223                tzset();                                                                                // initialize time global variables
     24                setlocale(LC_NUMERIC, "");
    2325                #ifdef __CFA_DEBUG__
    2426                extern void heapAppStart();
     
    4143struct __spinlock_t;
    4244extern "C" {
    43         void __cfaabi_dbg_record(struct __spinlock_t & this, const char prev_name[]) __attribute__(( weak )) {}
     45        void __cfaabi_dbg_record_lock(struct __spinlock_t & this, const char prev_name[]) __attribute__(( weak )) {}
    4446}
    4547
  • libcfa/src/stdhdr/malloc.h

    rb7d6a36 r6a490b2  
    1010// Created On       : Thu Jul 20 15:58:16 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sat Aug 11 09:06:31 2018
    13 // Update Count     : 10
     12// Last Modified On : Thu Apr 16 22:44:06 2020
     13// Update Count     : 13
    1414//
    1515
     
    3131
    3232extern "C" {
     33void * aalloc( size_t noOfElems, size_t elemSize );
     34void * amemalign( size_t alignment, size_t noOfElems, size_t elemSize );
     35void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize );
    3336size_t malloc_alignment( void * );
    3437bool malloc_zero_fill( void * );
     38size_t malloc_size( void * );
    3539int malloc_stats_fd( int fd );
    36 void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize );
    3740} // extern "C"
    3841
  • libcfa/src/stdlib.cfa

    rb7d6a36 r6a490b2  
    1010// Created On       : Thu Jan 28 17:10:29 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Feb  4 08:27:08 2020
    13 // Update Count     : 486
     12// Last Modified On : Thu Apr 16 22:43:33 2020
     13// Update Count     : 498
    1414//
    1515
     
    2020#define _XOPEN_SOURCE 600                                                               // posix_memalign, *rand48
    2121#include <string.h>                                                                             // memcpy, memset
    22 #include <malloc.h>                                                                             // malloc_usable_size
    2322//#include <math.h>                                                                             // fabsf, fabs, fabsl
    2423#include <complex.h>                                                                    // _Complex_I
     
    3837        } // alloc_set
    3938
     39        T * alloc_set( T ptr[], size_t dim, T fill ) {          // realloc array with fill
     40                size_t olen = malloc_usable_size( ptr );                // current allocation
     41                void * nptr = (void *)realloc( (void *)ptr, dim * sizeof(T) ); // C realloc
     42                size_t nlen = malloc_usable_size( nptr );               // new allocation
     43                if ( nlen > olen ) {                                                    // larger ?
     44                        for ( i; malloc_size( ptr ) / sizeof(T) ~ dim ) {
     45                                memcpy( &ptr[i], &fill, sizeof(T) );    // initialize with fill value
     46                        } // for
     47                } // if
     48                return (T *)nptr;
     49        } // alloc_align_set
     50
    4051        T * alloc_align_set( T ptr[], size_t align, char fill ) { // aligned realloc with fill
    4152                size_t olen = malloc_usable_size( ptr );                // current allocation
     
    4556                if ( nlen > olen ) {                                                    // larger ?
    4657                        memset( (char *)nptr + olen, (int)fill, nlen - olen ); // initialize added storage
     58                } // if
     59                return (T *)nptr;
     60        } // alloc_align_set
     61
     62        T * alloc_align_set( T ptr[], size_t align, size_t dim, T fill ) { // aligned realloc with fill
     63                size_t olen = malloc_usable_size( ptr );                // current allocation
     64                void * nptr = (void *)realloc( (void *)ptr, align, sizeof(T) ); // CFA realloc
     65                // char * nptr = alloc_align( ptr, align );
     66                size_t nlen = malloc_usable_size( nptr );               // new allocation
     67                if ( nlen > olen ) {                                                    // larger ?
     68                        for ( i; dim ) { memcpy( &ptr[i], &fill, sizeof(T) ); } // initialize with fill value
    4769                } // if
    4870                return (T *)nptr;
  • libcfa/src/stdlib.hfa

    rb7d6a36 r6a490b2  
    1010// Created On       : Thu Jan 28 17:12:35 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Tue Feb  4 08:27:01 2020
    13 // Update Count     : 401
     12// Last Modified On : Thu Apr 16 22:44:05 2020
     13// Update Count     : 432
    1414//
    1515
     
    2121#include <stdlib.h>                                                                             // *alloc, strto*, ato*
    2222
     23// Reduce includes by explicitly defining these routines.
    2324extern "C" {
    2425        void * memalign( size_t align, size_t size );           // malloc.h
     26        size_t malloc_usable_size( void * ptr );                        // malloc.h
     27        size_t malloc_size( void * addr );                                      // CFA heap
     28        void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ); // CFA heap
    2529        void * memset( void * dest, int fill, size_t size ); // string.h
    2630        void * memcpy( void * dest, const void * src, size_t size ); // string.h
    27     void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ); // CFA heap
     31        void * resize( void * oaddr, size_t size );                     // CFA heap
    2832} // extern "C"
    2933
     34void * resize( void * oaddr, size_t nalign, size_t size ); // CFA heap
    3035void * realloc( void * oaddr, size_t nalign, size_t size ); // CFA heap
    3136
     
    4045
    4146static inline forall( dtype T | sized(T) ) {
    42         // C dynamic allocation
     47        // Cforall safe equivalents, i.e., implicit size specification
    4348
    4449        T * malloc( void ) {
     
    7176                return posix_memalign( (void **)ptr, align, sizeof(T) ); // C posix_memalign
    7277        } // posix_memalign
    73 
    74         // Cforall dynamic allocation
     78} // distribution
     79
     80static inline forall( dtype T | sized(T) ) {
     81        // Cforall safe general allocation, fill, resize, array
    7582
    7683        T * alloc( void ) {
     
    8390        } // alloc
    8491
    85         T * alloc( T ptr[], size_t dim ) {                                      // realloc
    86                 return (T *)(void *)realloc( (void *)ptr, dim * sizeof(T) ); // C realloc
     92        forall( dtype S | sized(S) )
     93        T * alloc( S ptr[], size_t dim = 1 ) {                          // singleton/array resize
     94                size_t len = malloc_usable_size( ptr );                 // current bucket size
     95                if ( sizeof(T) * dim > len ) {                                  // not enough space ?
     96                        T * temp = alloc( dim );                                        // new storage
     97                        free( ptr );                                                            // free old storage
     98                        return temp;
     99                } else {
     100                        return (T *)ptr;
     101                } // if
     102        } // alloc
     103
     104        T * alloc( T ptr[], size_t dim, bool copy = true ) {
     105                if ( copy ) {                                                                   // realloc
     106                        return (T *)(void *)realloc( (void *)ptr, dim * sizeof(T) ); // C realloc
     107                } else {
     108                        struct __Unknown {};
     109                        return alloc( (__Unknown *)ptr, dim );          // reuse, cheat making T/S different types
     110                } // if
    87111        } // alloc
    88112
     
    112136forall( dtype T | sized(T) ) {
    113137        T * alloc_set( T ptr[], size_t dim, char fill );        // realloc array with fill
     138        T * alloc_set( T ptr[], size_t dim, T fill );           // realloc array with fill
    114139} // distribution
    115140
     
    125150        T * alloc_align( T ptr[], size_t align ) {                      // aligned realloc array
    126151                return (T *)(void *)realloc( (void *)ptr, align, sizeof(T) ); // CFA realloc
     152        } // alloc_align
     153
     154        forall( dtype S | sized(S) )
     155        T * alloc_align( S ptr[], size_t align ) {                      // aligned reuse array
     156                return (T *)(void *)resize( (void *)ptr, align, sizeof(T) ); // CFA realloc
    127157        } // alloc_align
    128158
     
    155185
    156186forall( dtype T | sized(T) ) {
     187        T * alloc_align_set( T ptr[], size_t align, char fill ); // aligned realloc with fill
     188        T * alloc_align_set( T ptr[], size_t align, T fill ); // aligned realloc with fill
    157189        T * alloc_align_set( T ptr[], size_t align, size_t dim, char fill ); // aligned realloc array with fill
    158 } // distribution
    159 
    160 static inline forall( dtype T | sized(T) ) {
    161         // data, non-array types
     190        T * alloc_align_set( T ptr[], size_t align, size_t dim, T fill ); // aligned realloc array with fill
     191} // distribution
     192
     193static inline forall( dtype T | sized(T) ) {
     194        // Cforall safe initialization/copy, i.e., implicit size specification, non-array types
    162195        T * memset( T * dest, char fill ) {
    163196                return (T *)memset( dest, fill, sizeof(T) );
     
    170203
    171204static inline forall( dtype T | sized(T) ) {
    172         // data, array types
     205        // Cforall safe initialization/copy, i.e., implicit size specification, array types
    173206        T * amemset( T dest[], char fill, size_t dim ) {
    174207                return (T *)(void *)memset( dest, fill, dim * sizeof(T) ); // C memset
     
    180213} // distribution
    181214
    182 // allocation/deallocation and constructor/destructor, non-array types
     215// Cforall allocation/deallocation and constructor/destructor, non-array types
    183216forall( dtype T | sized(T), ttype Params | { void ?{}( T &, Params ); } ) T * new( Params p );
    184217forall( dtype T | sized(T) | { void ^?{}( T & ); } ) void delete( T * ptr );
    185218forall( dtype T, ttype Params | sized(T) | { void ^?{}( T & ); void delete( Params ); } ) void delete( T * ptr, Params rest );
    186219
    187 // allocation/deallocation and constructor/destructor, array types
     220// Cforall allocation/deallocation and constructor/destructor, array types
    188221forall( dtype T | sized(T), ttype Params | { void ?{}( T &, Params ); } ) T * anew( size_t dim, Params p );
    189222forall( dtype T | sized(T) | { void ^?{}( T & ); } ) void adelete( size_t dim, T arr[] );
  • src/AST/Convert.cpp

    rb7d6a36 r6a490b2  
    493493        }
    494494
     495        const ast::Stmt * visit(const ast::SuspendStmt * node ) override final {
     496                if ( inCache( node ) ) return nullptr;
     497                auto stmt = new SuspendStmt();
     498                stmt->then   = get<CompoundStmt>().accept1( node->then   );
     499                switch(node->type) {
     500                        case ast::SuspendStmt::None     : stmt->type = SuspendStmt::None     ; break;
     501                        case ast::SuspendStmt::Coroutine: stmt->type = SuspendStmt::Coroutine; break;
     502                        case ast::SuspendStmt::Generator: stmt->type = SuspendStmt::Generator; break;
     503                }
     504                return stmtPostamble( stmt, node );
     505        }
     506
    495507        const ast::Stmt * visit( const ast::WaitForStmt * node ) override final {
    496508                if ( inCache( node ) ) return nullptr;
     
    18591871        }
    18601872
     1873        virtual void visit( const SuspendStmt * old ) override final {
     1874                if ( inCache( old ) ) return;
     1875                ast::SuspendStmt::Type type;
     1876                switch (old->type) {
     1877                        case SuspendStmt::Coroutine: type = ast::SuspendStmt::Coroutine; break;
     1878                        case SuspendStmt::Generator: type = ast::SuspendStmt::Generator; break;
     1879                        case SuspendStmt::None     : type = ast::SuspendStmt::None     ; break;
     1880                        default: abort();
     1881                }
     1882                this->node = new ast::SuspendStmt(
     1883                        old->location,
     1884                        GET_ACCEPT_1(then  , CompoundStmt),
     1885                        type,
     1886                        GET_LABELS_V(old->labels)
     1887                );
     1888                cache.emplace( old, this->node );
     1889        }
     1890
    18611891        virtual void visit( const WaitForStmt * old ) override final {
    18621892                if ( inCache( old ) ) return;
  • src/AST/Decl.hpp

    rb7d6a36 r6a490b2  
    259259
    260260        bool is_coroutine() { return kind == Coroutine; }
    261         bool is_monitor() { return kind == Monitor; }
    262         bool is_thread() { return kind == Thread; }
     261        bool is_generator() { return kind == Generator; }
     262        bool is_monitor  () { return kind == Monitor  ; }
     263        bool is_thread   () { return kind == Thread   ; }
    263264
    264265        const Decl * accept( Visitor & v ) const override { return v.visit( this ); }
  • src/AST/Fwd.hpp

    rb7d6a36 r6a490b2  
    5353class CatchStmt;
    5454class FinallyStmt;
     55class SuspendStmt;
    5556class WaitForStmt;
    5657class WithStmt;
  • src/AST/Pass.hpp

    rb7d6a36 r6a490b2  
    111111        const ast::Stmt *             visit( const ast::CatchStmt            * ) override final;
    112112        const ast::Stmt *             visit( const ast::FinallyStmt          * ) override final;
     113        const ast::Stmt *             visit( const ast::SuspendStmt          * ) override final;
    113114        const ast::Stmt *             visit( const ast::WaitForStmt          * ) override final;
    114115        const ast::Decl *             visit( const ast::WithStmt             * ) override final;
  • src/AST/Pass.impl.hpp

    rb7d6a36 r6a490b2  
    823823
    824824//--------------------------------------------------------------------------
     825// FinallyStmt
     826template< typename pass_t >
     827const ast::Stmt * ast::Pass< pass_t >::visit( const ast::SuspendStmt * node ) {
     828        VISIT_START( node );
     829
     830        VISIT(
     831                maybe_accept( node, &SuspendStmt::then   );
     832        )
     833
     834        VISIT_END( Stmt, node );
     835}
     836
     837//--------------------------------------------------------------------------
    825838// WaitForStmt
    826839template< typename pass_t >
  • src/AST/Print.cpp

    rb7d6a36 r6a490b2  
    674674                safe_print( node->body );
    675675                --indent;
     676
     677                return node;
     678        }
     679
     680        virtual const ast::Stmt * visit( const ast::SuspendStmt * node ) override final {
     681                os << "Suspend Statement";
     682                switch (node->type) {
     683                        case ast::SuspendStmt::None     : os << " with implicit target"; break;
     684                        case ast::SuspendStmt::Generator: os << " for generator"; break;
     685                        case ast::SuspendStmt::Coroutine: os << " for coroutine"; break;
     686                }
     687                os << endl;
     688
     689                ++indent;
     690                if(node->then) {
     691                        os << indent << " with post statement :" << endl;
     692                        safe_print( node->then );
     693                }
     694                ++indent;
    676695
    677696                return node;
  • src/AST/Stmt.hpp

    rb7d6a36 r6a490b2  
    342342};
    343343
     344/// Suspend statement
     345class SuspendStmt final : public Stmt {
     346public:
     347        ptr<CompoundStmt> then;
     348        enum Type { None, Coroutine, Generator } type = None;
     349
     350        SuspendStmt( const CodeLocation & loc, const CompoundStmt * then, Type type, std::vector<Label> && labels = {} )
     351        : Stmt(loc, std::move(labels)), then(then), type(type) {}
     352
     353        const Stmt * accept( Visitor & v ) const override { return v.visit( this ); }
     354private:
     355        SuspendStmt * clone() const override { return new SuspendStmt{ *this }; }
     356        MUTATE_FRIEND
     357};
     358
    344359/// Wait for concurrency statement `when (...) waitfor (... , ...) ... timeout(...) ... else ...`
    345360class WaitForStmt final : public Stmt {
  • src/AST/Visitor.hpp

    rb7d6a36 r6a490b2  
    4747    virtual const ast::Stmt *             visit( const ast::CatchStmt            * ) = 0;
    4848    virtual const ast::Stmt *             visit( const ast::FinallyStmt          * ) = 0;
     49    virtual const ast::Stmt *             visit( const ast::SuspendStmt          * ) = 0;
    4950    virtual const ast::Stmt *             visit( const ast::WaitForStmt          * ) = 0;
    5051    virtual const ast::Decl *             visit( const ast::WithStmt             * ) = 0;
  • src/Common/PassVisitor.h

    rb7d6a36 r6a490b2  
    110110        virtual void visit( FinallyStmt * finallyStmt ) override final;
    111111        virtual void visit( const FinallyStmt * finallyStmt ) override final;
     112        virtual void visit( SuspendStmt * suspendStmt ) override final;
     113        virtual void visit( const SuspendStmt * suspendStmt ) override final;
    112114        virtual void visit( WaitForStmt * waitforStmt ) override final;
    113115        virtual void visit( const WaitForStmt * waitforStmt ) override final;
     
    276278        virtual Statement * mutate( CatchStmt * catchStmt ) override final;
    277279        virtual Statement * mutate( FinallyStmt * finallyStmt ) override final;
     280        virtual Statement * mutate( SuspendStmt * suspendStmt ) override final;
    278281        virtual Statement * mutate( WaitForStmt * waitforStmt ) override final;
    279282        virtual Declaration * mutate( WithStmt * withStmt ) override final;
  • src/Common/PassVisitor.impl.h

    rb7d6a36 r6a490b2  
    15221522
    15231523//--------------------------------------------------------------------------
     1524// SuspendStmt
     1525template< typename pass_type >
     1526void PassVisitor< pass_type >::visit( SuspendStmt * node ) {
     1527        VISIT_START( node );
     1528
     1529        maybeAccept_impl( node->then  , *this );
     1530
     1531        VISIT_END( node );
     1532}
     1533
     1534template< typename pass_type >
     1535void PassVisitor< pass_type >::visit( const SuspendStmt * node ) {
     1536        VISIT_START( node );
     1537
     1538        maybeAccept_impl( node->then  , *this );
     1539
     1540        VISIT_END( node );
     1541}
     1542
     1543template< typename pass_type >
     1544Statement * PassVisitor< pass_type >::mutate( SuspendStmt * node ) {
     1545        MUTATE_START( node );
     1546
     1547        maybeMutate_impl( node->then  , *this );
     1548
     1549        MUTATE_END( Statement, node );
     1550}
     1551
     1552//--------------------------------------------------------------------------
    15241553// WaitForStmt
    15251554template< typename pass_type >
  • src/CompilationState.cc

    rb7d6a36 r6a490b2  
    2727        nopreludep = false,
    2828        genproto = false,
     29        deterministic_output = false,
    2930        nomainp = false,
    3031        parsep = false,
  • src/CompilationState.h

    rb7d6a36 r6a490b2  
    2828        nopreludep,
    2929        genproto,
     30        deterministic_output,
    3031        nomainp,
    3132        parsep,
  • src/Concurrency/Keywords.cc

    rb7d6a36 r6a490b2  
    1616#include "Concurrency/Keywords.h"
    1717
    18 #include <cassert>                 // for assert
    19 #include <string>                  // for string, operator==
    20 
    21 #include "Common/PassVisitor.h"    // for PassVisitor
    22 #include "Common/SemanticError.h"  // for SemanticError
    23 #include "Common/utility.h"        // for deleteAll, map_range
    24 #include "CodeGen/OperatorTable.h" // for isConstructor
    25 #include "InitTweak/InitTweak.h"   // for getPointerBase
    26 #include "SynTree/LinkageSpec.h"   // for Cforall
    27 #include "SynTree/Constant.h"      // for Constant
    28 #include "SynTree/Declaration.h"   // for StructDecl, FunctionDecl, ObjectDecl
    29 #include "SynTree/Expression.h"    // for VariableExpr, ConstantExpr, Untype...
    30 #include "SynTree/Initializer.h"   // for SingleInit, ListInit, Initializer ...
    31 #include "SynTree/Label.h"         // for Label
    32 #include "SynTree/Statement.h"     // for CompoundStmt, DeclStmt, ExprStmt
    33 #include "SynTree/Type.h"          // for StructInstType, Type, PointerType
    34 #include "SynTree/Visitor.h"       // for Visitor, acceptAll
     18#include <cassert>                        // for assert
     19#include <string>                         // for string, operator==
     20
     21#include "Common/PassVisitor.h"           // for PassVisitor
     22#include "Common/SemanticError.h"         // for SemanticError
     23#include "Common/utility.h"               // for deleteAll, map_range
     24#include "CodeGen/OperatorTable.h"        // for isConstructor
     25#include "ControlStruct/LabelGenerator.h" // for LebelGenerator
     26#include "InitTweak/InitTweak.h"          // for getPointerBase
     27#include "SynTree/LinkageSpec.h"          // for Cforall
     28#include "SynTree/Constant.h"             // for Constant
     29#include "SynTree/Declaration.h"          // for StructDecl, FunctionDecl, ObjectDecl
     30#include "SynTree/Expression.h"           // for VariableExpr, ConstantExpr, Untype...
     31#include "SynTree/Initializer.h"          // for SingleInit, ListInit, Initializer ...
     32#include "SynTree/Label.h"                // for Label
     33#include "SynTree/Statement.h"            // for CompoundStmt, DeclStmt, ExprStmt
     34#include "SynTree/Type.h"                 // for StructInstType, Type, PointerType
     35#include "SynTree/Visitor.h"              // for Visitor, acceptAll
    3536
    3637class Attribute;
     
    8889        //      int data;                                  int data;
    8990        //      a_struct_t more_data;                      a_struct_t more_data;
    90         //                                =>             thread_desc __thrd_d;
     91        //                                =>             $thread __thrd_d;
    9192        // };                                        };
    92         //                                           static inline thread_desc * get_thread( MyThread * this ) { return &this->__thrd_d; }
     93        //                                           static inline $thread * get_thread( MyThread * this ) { return &this->__thrd_d; }
    9394        //
    9495        class ThreadKeyword final : public ConcurrentSueKeyword {
     
    9697
    9798                ThreadKeyword() : ConcurrentSueKeyword(
    98                         "thread_desc",
     99                        "$thread",
    99100                        "__thrd",
    100101                        "get_thread",
     
    120121        //      int data;                                  int data;
    121122        //      a_struct_t more_data;                      a_struct_t more_data;
    122         //                                =>             coroutine_desc __cor_d;
     123        //                                =>             $coroutine __cor_d;
    123124        // };                                        };
    124         //                                           static inline coroutine_desc * get_coroutine( MyCoroutine * this ) { return &this->__cor_d; }
     125        //                                           static inline $coroutine * get_coroutine( MyCoroutine * this ) { return &this->__cor_d; }
    125126        //
    126127        class CoroutineKeyword final : public ConcurrentSueKeyword {
     
    128129
    129130                CoroutineKeyword() : ConcurrentSueKeyword(
    130                         "coroutine_desc",
     131                        "$coroutine",
    131132                        "__cor",
    132133                        "get_coroutine",
     
    147148        };
    148149
     150
     151
    149152        //-----------------------------------------------------------------------------
    150153        //Handles monitor type declarations :
     
    152155        //      int data;                                  int data;
    153156        //      a_struct_t more_data;                      a_struct_t more_data;
    154         //                                =>             monitor_desc __mon_d;
     157        //                                =>             $monitor __mon_d;
    155158        // };                                        };
    156         //                                           static inline monitor_desc * get_coroutine( MyMonitor * this ) { return &this->__cor_d; }
     159        //                                           static inline $monitor * get_coroutine( MyMonitor * this ) { return &this->__cor_d; }
    157160        //
    158161        class MonitorKeyword final : public ConcurrentSueKeyword {
     
    160163
    161164                MonitorKeyword() : ConcurrentSueKeyword(
    162                         "monitor_desc",
     165                        "$monitor",
    163166                        "__mon",
    164167                        "get_monitor",
     
    180183
    181184        //-----------------------------------------------------------------------------
     185        //Handles generator type declarations :
     186        // generator MyGenerator {                   struct MyGenerator {
     187        //      int data;                                  int data;
     188        //      a_struct_t more_data;                      a_struct_t more_data;
     189        //                                =>             int __gen_next;
     190        // };                                        };
     191        //
     192        class GeneratorKeyword final : public ConcurrentSueKeyword {
     193          public:
     194
     195                GeneratorKeyword() : ConcurrentSueKeyword(
     196                        "$generator",
     197                        "__generator_state",
     198                        "get_generator",
     199                        "Unable to find builtin type $generator\n",
     200                        true,
     201                        AggregateDecl::Generator
     202                )
     203                {}
     204
     205                virtual ~GeneratorKeyword() {}
     206
     207                virtual bool is_target( StructDecl * decl ) override final { return decl->is_generator(); }
     208
     209                static void implement( std::list< Declaration * > & translationUnit ) {
     210                        PassVisitor< GeneratorKeyword > impl;
     211                        mutateAll( translationUnit, impl );
     212                }
     213        };
     214
     215
     216        //-----------------------------------------------------------------------------
     217        class SuspendKeyword final : public WithStmtsToAdd, public WithGuards {
     218        public:
     219                SuspendKeyword() = default;
     220                virtual ~SuspendKeyword() = default;
     221
     222                void  premutate( FunctionDecl * );
     223                DeclarationWithType * postmutate( FunctionDecl * );
     224
     225                Statement * postmutate( SuspendStmt * );
     226
     227                static void implement( std::list< Declaration * > & translationUnit ) {
     228                        PassVisitor< SuspendKeyword > impl;
     229                        mutateAll( translationUnit, impl );
     230                }
     231
     232        private:
     233                DeclarationWithType * is_main( FunctionDecl * );
     234                bool is_real_suspend( FunctionDecl * );
     235
     236                Statement * make_generator_suspend( SuspendStmt * );
     237                Statement * make_coroutine_suspend( SuspendStmt * );
     238
     239                struct LabelPair {
     240                        Label obj;
     241                        int   idx;
     242                };
     243
     244                LabelPair make_label() {
     245                        labels.push_back( gen.newLabel("generator") );
     246                        return { labels.back(), int(labels.size()) };
     247                }
     248
     249                DeclarationWithType * in_generator = nullptr;
     250                FunctionDecl * decl_suspend = nullptr;
     251                std::vector<Label> labels;
     252                ControlStruct::LabelGenerator & gen = *ControlStruct::LabelGenerator::getGenerator();
     253        };
     254
     255        //-----------------------------------------------------------------------------
    182256        //Handles mutex routines definitions :
    183257        // void foo( A * mutex a, B * mutex b,  int i ) {                  void foo( A * a, B * b,  int i ) {
    184         //                                                                       monitor_desc * __monitors[] = { get_monitor(a), get_monitor(b) };
     258        //                                                                       $monitor * __monitors[] = { get_monitor(a), get_monitor(b) };
    185259        //                                                                       monitor_guard_t __guard = { __monitors, 2 };
    186260        //    /*Some code*/                                       =>           /*Some code*/
     
    221295        //Handles mutex routines definitions :
    222296        // void foo( A * mutex a, B * mutex b,  int i ) {                  void foo( A * a, B * b,  int i ) {
    223         //                                                                       monitor_desc * __monitors[] = { get_monitor(a), get_monitor(b) };
     297        //                                                                       $monitor * __monitors[] = { get_monitor(a), get_monitor(b) };
    224298        //                                                                       monitor_guard_t __guard = { __monitors, 2 };
    225299        //    /*Some code*/                                       =>           /*Some code*/
     
    251325                CoroutineKeyword        ::implement( translationUnit );
    252326                MonitorKeyword  ::implement( translationUnit );
     327                GeneratorKeyword  ::implement( translationUnit );
     328                SuspendKeyword    ::implement( translationUnit );
    253329        }
    254330
     
    306382        Expression * ConcurrentSueKeyword::postmutate( KeywordCastExpr * cast ) {
    307383                if ( cast_target == cast->target ) {
    308                         // convert (thread &)t to (thread_desc &)*get_thread(t), etc.
     384                        // convert (thread &)t to ($thread &)*get_thread(t), etc.
    309385                        if( !type_decl ) SemanticError( cast, context_error );
    310386                        if( !dtor_decl ) SemanticError( cast, context_error );
     
    377453                        get_type,
    378454                        nullptr,
    379                         noAttributes,
     455                        { new Attribute("const") },
    380456                        Type::Inline
    381457                );
     
    446522
    447523                declsToAddAfter.push_back( get_decl );
    448 
    449                 // get_decl->fixUniqueId();
    450         }
     524        }
     525
     526        //=============================================================================================
     527        // Suspend keyword implementation
     528        //=============================================================================================
     529        DeclarationWithType * SuspendKeyword::is_main( FunctionDecl * func) {
     530                if(func->name != "main") return nullptr;
     531                if(func->type->parameters.size() != 1) return nullptr;
     532
     533                auto param = func->type->parameters.front();
     534
     535                auto type  = dynamic_cast<ReferenceType * >(param->get_type());
     536                if(!type) return nullptr;
     537
     538                auto obj   = dynamic_cast<StructInstType *>(type->base);
     539                if(!obj) return nullptr;
     540
     541                if(!obj->baseStruct->is_generator()) return nullptr;
     542
     543                return param;
     544        }
     545
     546        bool SuspendKeyword::is_real_suspend( FunctionDecl * func ) {
     547                if(isMangled(func->linkage)) return false; // the real suspend isn't mangled
     548                if(func->name != "__cfactx_suspend") return false; // the real suspend has a specific name
     549                if(func->type->parameters.size() != 0) return false; // Too many parameters
     550                if(func->type->returnVals.size() != 0) return false; // Too many return values
     551
     552                return true;
     553        }
     554
     555        void SuspendKeyword::premutate( FunctionDecl * func ) {
     556                GuardValue(in_generator);
     557                in_generator = nullptr;
     558
     559                // Is this the real suspend?
     560                if(is_real_suspend(func)) {
     561                        decl_suspend = decl_suspend ? decl_suspend : func;
     562                        return;
     563                }
     564
     565                // Is this the main of a generator?
     566                auto param = is_main( func );
     567                if(!param) return;
     568
     569                if(func->type->returnVals.size() != 0) SemanticError(func->location, "Generator main must return void");
     570
     571                in_generator = param;
     572                GuardValue(labels);
     573                labels.clear();
     574        }
     575
     576        DeclarationWithType * SuspendKeyword::postmutate( FunctionDecl * func ) {
     577                if( !func->statements ) return func; // Not the actual definition, don't do anything
     578                if( !in_generator     ) return func; // Not in a generator, don't do anything
     579                if( labels.empty()    ) return func; // Generator has no states, nothing to do, could throw a warning
     580
     581                // This is a generator main, we need to add the following code to the top
     582                // static void * __generator_labels[] = {&&s0, &&s1, ...};
     583                // goto * __generator_labels[gen.__generator_state];
     584                const auto & loc = func->location;
     585
     586                const auto first_label = gen.newLabel("generator");
     587
     588                // for each label add to declaration
     589                std::list<Initializer*> inits = { new SingleInit( new LabelAddressExpr( first_label ) ) };
     590                for(const auto & label : labels) {
     591                        inits.push_back(
     592                                new SingleInit(
     593                                        new LabelAddressExpr( label )
     594                                )
     595                        );
     596                }
     597                auto init = new ListInit(std::move(inits), noDesignators, true);
     598                labels.clear();
     599
     600                // create decl
     601                auto decl = new ObjectDecl(
     602                        "__generator_labels",
     603                        Type::StorageClasses( Type::Static ),
     604                        LinkageSpec::AutoGen,
     605                        nullptr,
     606                        new ArrayType(
     607                                Type::Qualifiers(),
     608                                new PointerType(
     609                                        Type::Qualifiers(),
     610                                        new VoidType( Type::Qualifiers() )
     611                                ),
     612                                nullptr,
     613                                false, false
     614                        ),
     615                        init
     616                );
     617
     618                // create the goto
     619                assert(in_generator);
     620
     621                auto go_decl = new ObjectDecl(
     622                        "__generator_label",
     623                        noStorageClasses,
     624                        LinkageSpec::AutoGen,
     625                        nullptr,
     626                        new PointerType(
     627                                Type::Qualifiers(),
     628                                new VoidType( Type::Qualifiers() )
     629                        ),
     630                        new SingleInit(
     631                                new UntypedExpr(
     632                                        new NameExpr("?[?]"),
     633                                        {
     634                                                new NameExpr("__generator_labels"),
     635                                                new UntypedMemberExpr(
     636                                                        new NameExpr("__generator_state"),
     637                                                        new VariableExpr( in_generator )
     638                                                )
     639                                        }
     640                                )
     641                        )
     642                );
     643                go_decl->location = loc;
     644
     645                auto go = new BranchStmt(
     646                        new VariableExpr( go_decl ),
     647                        BranchStmt::Goto
     648                );
     649                go->location = loc;
     650                go->computedTarget->location = loc;
     651
     652                auto noop = new NullStmt({ first_label });
     653                noop->location = loc;
     654
     655                // wrap everything in a nice compound
     656                auto body = new CompoundStmt({
     657                        new DeclStmt( decl ),
     658                        new DeclStmt( go_decl ),
     659                        go,
     660                        noop,
     661                        func->statements
     662                });
     663                body->location   = loc;
     664                func->statements = body;
     665
     666                return func;
     667        }
     668
     669        Statement * SuspendKeyword::postmutate( SuspendStmt * stmt ) {
     670                SuspendStmt::Type type = stmt->type;
     671                if(type == SuspendStmt::None) {
     672                        // This suspend has a implicit target, find it
     673                        type = in_generator ? SuspendStmt::Generator : SuspendStmt::Coroutine;
     674                }
     675
     676                // Check that the target makes sense
     677                if(!in_generator && type == SuspendStmt::Generator) SemanticError( stmt->location, "'suspend generator' must be used inside main of generator type.");
     678
     679                // Act appropriately
     680                switch(type) {
     681                        case SuspendStmt::Generator: return make_generator_suspend(stmt);
     682                        case SuspendStmt::Coroutine: return make_coroutine_suspend(stmt);
     683                        default: abort();
     684                }
     685        }
     686
     687        Statement * SuspendKeyword::make_generator_suspend( SuspendStmt * stmt ) {
     688                assert(in_generator);
     689                // Target code is :
     690                //   gen.__generator_state = X;
     691                //   { THEN }
     692                //   return;
     693                //   __gen_X:;
     694
     695                // Save the location and delete the old statement, we only need the location from this point on
     696                auto loc = stmt->location;
     697
     698                // Build the label and get its index
     699                auto label = make_label();
     700
     701                // Create the context saving statement
     702                auto save = new ExprStmt( new UntypedExpr(
     703                        new NameExpr( "?=?" ),
     704                        {
     705                                new UntypedMemberExpr(
     706                                        new NameExpr("__generator_state"),
     707                                        new VariableExpr( in_generator )
     708                                ),
     709                                new ConstantExpr(
     710                                        Constant::from_int( label.idx )
     711                                )
     712                        }
     713                ));
     714                assert(save->expr);
     715                save->location = loc;
     716                stmtsToAddBefore.push_back( save );
     717
     718                // if we have a then add it here
     719                auto then = stmt->then;
     720                stmt->then = nullptr;
     721                delete stmt;
     722                if(then) stmtsToAddBefore.push_back( then );
     723
     724                // Create the return statement
     725                auto ret = new ReturnStmt( nullptr );
     726                ret->location = loc;
     727                stmtsToAddBefore.push_back( ret );
     728
     729                // Create the null statement with the created label
     730                auto noop = new NullStmt({ label.obj });
     731                noop->location = loc;
     732
     733                // Return the null statement to take the place of the previous statement
     734                return noop;
     735        }
     736
     737        Statement * SuspendKeyword::make_coroutine_suspend( SuspendStmt * stmt ) {
     738                if(stmt->then) SemanticError( stmt->location, "Compound statement following coroutines is not implemented.");
     739
     740                // Save the location and delete the old statement, we only need the location from this point on
     741                auto loc = stmt->location;
     742                delete stmt;
     743
     744                // Create the call expression
     745                if(!decl_suspend) SemanticError( loc, "suspend keyword applied to coroutines requires coroutines to be in scope, add #include <coroutine.hfa>\n");
     746                auto expr = new UntypedExpr( VariableExpr::functionPointer( decl_suspend ) );
     747                expr->location = loc;
     748
     749                // Change this statement into a regular expr
     750                assert(expr);
     751                auto nstmt = new ExprStmt( expr );
     752                nstmt->location = loc;
     753                return nstmt;
     754        }
     755
    451756
    452757        //=============================================================================================
     
    516821        void MutexKeyword::postvisit(StructDecl* decl) {
    517822
    518                 if( decl->name == "monitor_desc" && decl->body ) {
     823                if( decl->name == "$monitor" && decl->body ) {
    519824                        assert( !monitor_decl );
    520825                        monitor_decl = decl;
     
    612917                );
    613918
    614                 //monitor_desc * __monitors[] = { get_monitor(a), get_monitor(b) };
     919                //$monitor * __monitors[] = { get_monitor(a), get_monitor(b) };
    615920                body->push_front( new DeclStmt( monitors) );
    616921        }
     
    673978                );
    674979
    675                 //monitor_desc * __monitors[] = { get_monitor(a), get_monitor(b) };
     980                //$monitor * __monitors[] = { get_monitor(a), get_monitor(b) };
    676981                body->push_front( new DeclStmt( monitors) );
    677982        }
     
    681986        //=============================================================================================
    682987        void ThreadStarter::previsit( StructDecl * decl ) {
    683                 if( decl->name == "thread_desc" && decl->body ) {
     988                if( decl->name == "$thread" && decl->body ) {
    684989                        assert( !thread_decl );
    685990                        thread_decl = decl;
  • src/Concurrency/Waitfor.cc

    rb7d6a36 r6a490b2  
    244244                        decl_mask = decl;
    245245                }
    246                 else if( decl->name == "monitor_desc" ) {
     246                else if( decl->name == "$monitor" ) {
    247247                        assert( !decl_monitor );
    248248                        decl_monitor = decl;
  • src/ControlStruct/ExceptTranslate.cc

    rb7d6a36 r6a490b2  
    99// Author           : Andrew Beach
    1010// Created On       : Wed Jun 14 16:49:00 2017
    11 // Last Modified By : Peter A. Buhr
    12 // Last Modified On : Fri Dec 13 23:40:15 2019
    13 // Update Count     : 12
     11// Last Modified By : Andrew Beach
     12// Last Modified On : Fri Mar 27 11:58:00 2020
     13// Update Count     : 13
    1414//
    1515
     
    211211                        ThrowStmt *throwStmt ) {
    212212                // __throw_terminate( `throwStmt->get_name()` ); }
    213                 return create_given_throw( "__cfaabi_ehm__throw_terminate", throwStmt );
     213                return create_given_throw( "__cfaehm_throw_terminate", throwStmt );
    214214        }
    215215
     
    232232                        ) ) );
    233233                result->push_back( new ExprStmt(
    234                         new UntypedExpr( new NameExpr( "__cfaabi_ehm__rethrow_terminate" ) )
     234                        new UntypedExpr( new NameExpr( "__cfaehm_rethrow_terminate" ) )
    235235                        ) );
    236236                delete throwStmt;
     
    241241                        ThrowStmt *throwStmt ) {
    242242                // __throw_resume( `throwStmt->get_name` );
    243                 return create_given_throw( "__cfaabi_ehm__throw_resume", throwStmt );
     243                return create_given_throw( "__cfaehm_throw_resume", throwStmt );
    244244        }
    245245
     
    309309                        local_except->get_attributes().push_back( new Attribute(
    310310                                "cleanup",
    311                                 { new NameExpr( "__cfaabi_ehm__cleanup_terminate" ) }
     311                                { new NameExpr( "__cfaehm_cleanup_terminate" ) }
    312312                                ) );
    313313
     
    429429                        FunctionDecl * terminate_catch,
    430430                        FunctionDecl * terminate_match ) {
    431                 // { __cfaabi_ehm__try_terminate(`try`, `catch`, `match`); }
     431                // { __cfaehm_try_terminate(`try`, `catch`, `match`); }
    432432
    433433                UntypedExpr * caller = new UntypedExpr( new NameExpr(
    434                         "__cfaabi_ehm__try_terminate" ) );
     434                        "__cfaehm_try_terminate" ) );
    435435                std::list<Expression *>& args = caller->get_args();
    436436                args.push_back( nameOf( try_wrapper ) );
     
    486486
    487487                // struct __try_resume_node __resume_node
    488                 //      __attribute__((cleanup( __cfaabi_ehm__try_resume_cleanup )));
     488                //      __attribute__((cleanup( __cfaehm_try_resume_cleanup )));
    489489                // ** unwinding of the stack here could cause problems **
    490490                // ** however I don't think that can happen currently **
    491                 // __cfaabi_ehm__try_resume_setup( &__resume_node, resume_handler );
     491                // __cfaehm_try_resume_setup( &__resume_node, resume_handler );
    492492
    493493                std::list< Attribute * > attributes;
     
    495495                        std::list< Expression * > attr_params;
    496496                        attr_params.push_back( new NameExpr(
    497                                 "__cfaabi_ehm__try_resume_cleanup" ) );
     497                                "__cfaehm_try_resume_cleanup" ) );
    498498                        attributes.push_back( new Attribute( "cleanup", attr_params ) );
    499499                }
     
    514514
    515515                UntypedExpr *setup = new UntypedExpr( new NameExpr(
    516                         "__cfaabi_ehm__try_resume_setup" ) );
     516                        "__cfaehm_try_resume_setup" ) );
    517517                setup->get_args().push_back( new AddressExpr( nameOf( obj ) ) );
    518518                setup->get_args().push_back( nameOf( resume_handler ) );
     
    539539        ObjectDecl * ExceptionMutatorCore::create_finally_hook(
    540540                        FunctionDecl * finally_wrapper ) {
    541                 // struct __cfaabi_ehm__cleanup_hook __finally_hook
     541                // struct __cfaehm_cleanup_hook __finally_hook
    542542                //      __attribute__((cleanup( finally_wrapper )));
    543543
     
    593593                        // Skip children?
    594594                        return;
    595                 } else if ( structDecl->get_name() == "__cfaabi_ehm__base_exception_t" ) {
     595                } else if ( structDecl->get_name() == "__cfaehm_base_exception_t" ) {
    596596                        assert( nullptr == except_decl );
    597597                        except_decl = structDecl;
    598598                        init_func_types();
    599                 } else if ( structDecl->get_name() == "__cfaabi_ehm__try_resume_node" ) {
     599                } else if ( structDecl->get_name() == "__cfaehm_try_resume_node" ) {
    600600                        assert( nullptr == node_decl );
    601601                        node_decl = structDecl;
    602                 } else if ( structDecl->get_name() == "__cfaabi_ehm__cleanup_hook" ) {
     602                } else if ( structDecl->get_name() == "__cfaehm_cleanup_hook" ) {
    603603                        assert( nullptr == hook_decl );
    604604                        hook_decl = structDecl;
  • src/Parser/ParseNode.h

    rb7d6a36 r6a490b2  
    428428Statement * build_asm( bool voltile, Expression * instruction, ExpressionNode * output = nullptr, ExpressionNode * input = nullptr, ExpressionNode * clobber = nullptr, LabelNode * gotolabels = nullptr );
    429429Statement * build_directive( std::string * directive );
     430SuspendStmt * build_suspend( StatementNode *, SuspendStmt::Type = SuspendStmt::None);
    430431WaitForStmt * build_waitfor( ExpressionNode * target, StatementNode * stmt, ExpressionNode * when );
    431432WaitForStmt * build_waitfor( ExpressionNode * target, StatementNode * stmt, ExpressionNode * when, WaitForStmt * existing );
  • src/Parser/StatementNode.cc

    rb7d6a36 r6a490b2  
    249249} // build_finally
    250250
     251SuspendStmt * build_suspend( StatementNode * then, SuspendStmt::Type type ) {
     252        auto node = new SuspendStmt();
     253
     254        node->type = type;
     255
     256        std::list< Statement * > stmts;
     257        buildMoveList< Statement, StatementNode >( then, stmts );
     258        if(!stmts.empty()) {
     259                assert( stmts.size() == 1 );
     260                node->then = dynamic_cast< CompoundStmt * >( stmts.front() );
     261        }
     262
     263        return node;
     264}
     265
    251266WaitForStmt * build_waitfor( ExpressionNode * targetExpr, StatementNode * stmt, ExpressionNode * when ) {
    252267        auto node = new WaitForStmt();
  • src/Parser/TypeData.cc

    rb7d6a36 r6a490b2  
    769769          case AggregateDecl::Struct:
    770770          case AggregateDecl::Coroutine:
     771          case AggregateDecl::Generator:
    771772          case AggregateDecl::Monitor:
    772773          case AggregateDecl::Thread:
  • src/Parser/lex.ll

    rb7d6a36 r6a490b2  
    6565#define FLOATXX(v) KEYWORD_RETURN(v);
    6666#else
    67 #define FLOATXX(v) IDENTIFIER_RETURN(); 
     67#define FLOATXX(v) IDENTIFIER_RETURN();
    6868#endif // HAVE_KEYWORDS_FLOATXX
    6969
     
    301301_Static_assert  { KEYWORD_RETURN(STATICASSERT); }               // C11
    302302struct                  { KEYWORD_RETURN(STRUCT); }
    303         /* suspend                      { KEYWORD_RETURN(SUSPEND); }                    // CFA */
     303suspend                 { KEYWORD_RETURN(SUSPEND); }                    // CFA
    304304switch                  { KEYWORD_RETURN(SWITCH); }
    305305thread                  { KEYWORD_RETURN(THREAD); }                             // C11
  • src/Parser/parser.yy

    rb7d6a36 r6a490b2  
    1010// Created On       : Sat Sep  1 20:22:55 2001
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sun Feb 16 08:22:14 2020
    13 // Update Count     : 4461
     12// Last Modified On : Mon Apr 27 12:25:42 2020
     13// Update Count     : 4483
    1414//
    1515
     
    278278%token OTYPE FTYPE DTYPE TTYPE TRAIT                                    // CFA
    279279%token SIZEOF OFFSETOF
    280 // %token SUSPEND RESUME                                                                        // CFA
     280// %token RESUME                                                                        // CFA
     281%token SUSPEND                                                                  // CFA
    281282%token ATTRIBUTE EXTENSION                                                              // GCC
    282283%token IF ELSE SWITCH CASE DEFAULT DO WHILE FOR BREAK CONTINUE GOTO RETURN
     
    918919        conditional_expression
    919920        | unary_expression assignment_operator assignment_expression
    920                 { $$ = new ExpressionNode( build_binary_val( $2, $1, $3 ) ); }
     921                {
     922                        if ( $2 == OperKinds::AtAssn ) {
     923                                SemanticError( yylloc, "C @= assignment is currently unimplemented." ); $$ = nullptr;
     924                        } else {
     925                                $$ = new ExpressionNode( build_binary_val( $2, $1, $3 ) );
     926                        } // if
     927                }
    921928        | unary_expression '=' '{' initializer_list_opt comma_opt '}'
    922929                { SemanticError( yylloc, "Initializer assignment is currently unimplemented." ); $$ = nullptr; }
     
    959966
    960967tuple_expression_list:
    961         assignment_expression_opt
    962         | tuple_expression_list ',' assignment_expression_opt
     968        assignment_expression
     969        | '@'                                                                                           // CFA
     970                { SemanticError( yylloc, "Eliding tuple element with '@' is currently unimplemented." ); $$ = nullptr; }
     971        | tuple_expression_list ',' assignment_expression
    963972                { $$ = (ExpressionNode *)($1->set_last( $3 )); }
     973        | tuple_expression_list ',' '@'
     974                { SemanticError( yylloc, "Eliding tuple element with '@' is currently unimplemented." ); $$ = nullptr; }
    964975        ;
    965976
     
    12591270        | RETURN '{' initializer_list_opt comma_opt '}' ';'
    12601271                { SemanticError( yylloc, "Initializer return is currently unimplemented." ); $$ = nullptr; }
    1261         // | SUSPEND ';'
    1262         //      { SemanticError( yylloc, "Suspend expression is currently unimplemented." ); $$ = nullptr; }
    1263         // | SUSPEND compound_statement ';'
    1264         //      { SemanticError( yylloc, "Suspend expression is currently unimplemented." ); $$ = nullptr; }
     1272        | SUSPEND ';'
     1273                { $$ = new StatementNode( build_suspend( nullptr ) ); }
     1274        | SUSPEND compound_statement
     1275                { $$ = new StatementNode( build_suspend( $2 ) ); }
     1276        | SUSPEND COROUTINE ';'
     1277                { $$ = new StatementNode( build_suspend( nullptr, SuspendStmt::Coroutine ) ); }
     1278        | SUSPEND COROUTINE compound_statement
     1279                { $$ = new StatementNode( build_suspend( $3, SuspendStmt::Coroutine ) ); }
     1280        | SUSPEND GENERATOR ';'
     1281                { $$ = new StatementNode( build_suspend( nullptr, SuspendStmt::Generator ) ); }
     1282        | SUSPEND GENERATOR compound_statement
     1283                { $$ = new StatementNode( build_suspend( $3, SuspendStmt::Generator ) ); }
    12651284        | THROW assignment_expression_opt ';'                           // handles rethrow
    12661285                { $$ = new StatementNode( build_throw( $2 ) ); }
     
    15891608                // type_specifier can resolve to just TYPEDEFname (e.g., typedef int T; int f( T );). Therefore this must be
    15901609                // flattened to allow lookahead to the '(' without having to reduce identifier_or_type_name.
    1591         cfa_abstract_tuple identifier_or_type_name '(' push cfa_parameter_ellipsis_list_opt pop ')'
     1610        cfa_abstract_tuple identifier_or_type_name '(' push cfa_parameter_ellipsis_list_opt pop ')' attribute_list_opt
    15921611                // To obtain LR(1 ), this rule must be factored out from function return type (see cfa_abstract_declarator).
    1593                 { $$ = DeclarationNode::newFunction( $2, $1, $5, 0 ); }
    1594         | cfa_function_return identifier_or_type_name '(' push cfa_parameter_ellipsis_list_opt pop ')'
    1595                 { $$ = DeclarationNode::newFunction( $2, $1, $5, 0 ); }
     1612                { $$ = DeclarationNode::newFunction( $2, $1, $5, 0 )->addQualifiers( $8 ); }
     1613        | cfa_function_return identifier_or_type_name '(' push cfa_parameter_ellipsis_list_opt pop ')' attribute_list_opt
     1614                { $$ = DeclarationNode::newFunction( $2, $1, $5, 0 )->addQualifiers( $8 ); }
    15961615        ;
    15971616
     
    20772096aggregate_control:                                                                              // CFA
    20782097        GENERATOR
    2079                 { yyy = true; $$ = AggregateDecl::Coroutine; }
     2098                { yyy = true; $$ = AggregateDecl::Generator; }
     2099        | MONITOR GENERATOR
     2100                { SemanticError( yylloc, "monitor generator is currently unimplemented." ); $$ = AggregateDecl::NoAggregate; }
    20802101        | COROUTINE
    20812102                { yyy = true; $$ = AggregateDecl::Coroutine; }
    20822103        | MONITOR
    20832104                { yyy = true; $$ = AggregateDecl::Monitor; }
     2105        | MONITOR COROUTINE
     2106                { SemanticError( yylloc, "monitor coroutine is currently unimplemented." ); $$ = AggregateDecl::NoAggregate; }
    20842107        | THREAD
    20852108                { yyy = true; $$ = AggregateDecl::Thread; }
     2109        | MONITOR THREAD
     2110                { SemanticError( yylloc, "monitor thread is currently unimplemented." ); $$ = AggregateDecl::NoAggregate; }
    20862111        ;
    20872112
  • src/ResolvExpr/AlternativeFinder.cc

    rb7d6a36 r6a490b2  
    12921292
    12931293                try {
    1294                         // Attempt 1 : turn (thread&)X into (thread_desc&)X.__thrd
     1294                        // Attempt 1 : turn (thread&)X into ($thread&)X.__thrd
    12951295                        // Clone is purely for memory management
    12961296                        std::unique_ptr<Expression> tech1 { new UntypedMemberExpr(new NameExpr(castExpr->concrete_target.field), castExpr->arg->clone()) };
     
    13031303                } catch(SemanticErrorException & ) {}
    13041304
    1305                 // Fallback : turn (thread&)X into (thread_desc&)get_thread(X)
     1305                // Fallback : turn (thread&)X into ($thread&)get_thread(X)
    13061306                std::unique_ptr<Expression> fallback { UntypedExpr::createDeref( new UntypedExpr(new NameExpr(castExpr->concrete_target.getter), { castExpr->arg->clone() })) };
    13071307                // don't prune here, since it's guaranteed all alternatives will have the same type
  • src/ResolvExpr/Resolver.cc

    rb7d6a36 r6a490b2  
    99// Author           : Aaron B. Moss
    1010// Created On       : Sun May 17 12:17:01 2015
    11 // Last Modified By : Aaron B. Moss
    12 // Last Modified On : Wed May 29 11:00:00 2019
    13 // Update Count     : 241
     11// Last Modified By : Andrew Beach
     12// Last Modified On : Fri Mar 27 11:58:00 2020
     13// Update Count     : 242
    1414//
    1515
     
    560560                // TODO: Replace *exception type with &exception type.
    561561                if ( throwStmt->get_expr() ) {
    562                         const StructDecl * exception_decl = indexer.lookupStruct( "__cfaabi_ehm__base_exception_t" );
     562                        const StructDecl * exception_decl = indexer.lookupStruct( "__cfaehm_base_exception_t" );
    563563                        assert( exception_decl );
    564564                        Type * exceptType = new PointerType( noQualifiers, new StructInstType( noQualifiers, const_cast<StructDecl *>(exception_decl) ) );
     
    14771477                if ( throwStmt->expr ) {
    14781478                        const ast::StructDecl * exceptionDecl =
    1479                                 symtab.lookupStruct( "__cfaabi_ehm__base_exception_t" );
     1479                                symtab.lookupStruct( "__cfaehm_base_exception_t" );
    14801480                        assert( exceptionDecl );
    14811481                        ast::ptr< ast::Type > exceptType =
  • src/ResolvExpr/TypeEnvironment.cc

    rb7d6a36 r6a490b2  
    2020#include <utility>                     // for pair, move
    2121
     22#include "CompilationState.h"          // for deterministic_output
    2223#include "Common/utility.h"            // for maybeClone
    2324#include "SynTree/Type.h"              // for Type, FunctionType, Type::Fora...
     
    106107
    107108        void EqvClass::print( std::ostream &os, Indenter indent ) const {
    108                 os << "( ";
    109                 std::copy( vars.begin(), vars.end(), std::ostream_iterator< std::string >( os, " " ) );
    110                 os << ")";
     109                if( !deterministic_output ) {
     110                        os << "( ";
     111                        std::copy( vars.begin(), vars.end(), std::ostream_iterator< std::string >( os, " " ) );
     112                        os << ")";
     113                }
    111114                if ( type ) {
    112115                        os << " -> ";
     
    235238                // check safely bindable
    236239                if ( r.type && occursIn( r.type, s.vars.begin(), s.vars.end(), *this ) ) return false;
    237                
     240
    238241                // merge classes in
    239242                r.vars.insert( s.vars.begin(), s.vars.end() );
  • src/SynTree/Declaration.h

    rb7d6a36 r6a490b2  
    302302
    303303        bool is_coroutine() { return kind == Coroutine; }
    304         bool is_monitor() { return kind == Monitor; }
    305         bool is_thread() { return kind == Thread; }
     304        bool is_generator() { return kind == Generator; }
     305        bool is_monitor  () { return kind == Monitor  ; }
     306        bool is_thread   () { return kind == Thread   ; }
    306307
    307308        virtual StructDecl * clone() const override { return new StructDecl( *this ); }
  • src/SynTree/LinkageSpec.cc

    rb7d6a36 r6a490b2  
    99// Author           : Rodolfo G. Esteves
    1010// Created On       : Sat May 16 13:22:09 2015
    11 // Last Modified By : Peter A. Buhr
    12 // Last Modified On : Mon Dec 16 15:02:29 2019
    13 // Update Count     : 28
     11// Last Modified By : Andrew Beach
     12// Last Modified On : Mon Mar  2 16:13:00 2020
     13// Update Count     : 29
    1414//
    1515
     
    2020
    2121#include "LinkageSpec.h"
     22#include "Common/CodeLocation.h"
    2223#include "Common/SemanticError.h"
    2324
  • src/SynTree/LinkageSpec.h

    rb7d6a36 r6a490b2  
    99// Author           : Rodolfo G. Esteves
    1010// Created On       : Sat May 16 13:24:28 2015
    11 // Last Modified By : Peter A. Buhr
    12 // Last Modified On : Mon Dec 16 15:03:43 2019
    13 // Update Count     : 20
     11// Last Modified By : Andrew Beach
     12// Last Modified On : Mon Mar  2 16:13:00 2020
     13// Update Count     : 21
    1414//
    1515
     
    1818#include <string>
    1919
    20 #include "Common/CodeLocation.h"
     20struct CodeLocation;
    2121
    2222namespace LinkageSpec {
  • src/SynTree/Mutator.h

    rb7d6a36 r6a490b2  
    5151        virtual Statement * mutate( CatchStmt * catchStmt ) = 0;
    5252        virtual Statement * mutate( FinallyStmt * catchStmt ) = 0;
     53        virtual Statement * mutate( SuspendStmt * suspendStmt ) = 0;
    5354        virtual Statement * mutate( WaitForStmt * waitforStmt ) = 0;
    5455        virtual Declaration * mutate( WithStmt * withStmt ) = 0;
  • src/SynTree/Statement.cc

    rb7d6a36 r6a490b2  
    420420}
    421421
     422SuspendStmt::SuspendStmt( const SuspendStmt & other )
     423        : Statement( other )
     424        , then( maybeClone(other.then) )
     425{}
     426
     427SuspendStmt::~SuspendStmt() {
     428        delete then;
     429}
     430
     431void SuspendStmt::print( std::ostream & os, Indenter indent ) const {
     432        os << "Suspend Statement";
     433        switch (type) {
     434                case None     : os << " with implicit target"; break;
     435                case Generator: os << " for generator"       ; break;
     436                case Coroutine: os << " for coroutine"       ; break;
     437        }
     438        os << endl;
     439        indent += 1;
     440
     441        if(then) {
     442                os << indent << " with post statement :" << endl;
     443                then->print( os, indent + 1);
     444        }
     445}
     446
    422447WaitForStmt::WaitForStmt() : Statement() {
    423448        timeout.time      = nullptr;
  • src/SynTree/Statement.h

    rb7d6a36 r6a490b2  
    422422};
    423423
     424class SuspendStmt : public Statement {
     425  public:
     426        CompoundStmt * then = nullptr;
     427        enum Type { None, Coroutine, Generator } type = None;
     428
     429        SuspendStmt() = default;
     430        SuspendStmt( const SuspendStmt & );
     431        virtual ~SuspendStmt();
     432
     433        virtual SuspendStmt * clone() const override { return new SuspendStmt( *this ); }
     434        virtual void accept( Visitor & v ) override { v.visit( this ); }
     435        virtual void accept( Visitor & v ) const override { v.visit( this ); }
     436        virtual Statement * acceptMutator( Mutator & m )  override { return m.mutate( this ); }
     437        virtual void print( std::ostream & os, Indenter indent = {} ) const override;
     438};
     439
    424440class WaitForStmt : public Statement {
    425441  public:
  • src/SynTree/SynTree.h

    rb7d6a36 r6a490b2  
    5454class CatchStmt;
    5555class FinallyStmt;
     56class SuspendStmt;
    5657class WaitForStmt;
    5758class WithStmt;
  • src/SynTree/Visitor.h

    rb7d6a36 r6a490b2  
    7878        virtual void visit( FinallyStmt * node ) { visit( const_cast<const FinallyStmt *>(node) ); }
    7979        virtual void visit( const FinallyStmt * finallyStmt ) = 0;
     80        virtual void visit( SuspendStmt * node ) { visit( const_cast<const SuspendStmt *>(node) ); }
     81        virtual void visit( const SuspendStmt * suspendStmt ) = 0;
    8082        virtual void visit( WaitForStmt * node ) { visit( const_cast<const WaitForStmt *>(node) ); }
    8183        virtual void visit( const WaitForStmt * waitforStmt ) = 0;
  • src/main.cc

    rb7d6a36 r6a490b2  
    443443
    444444
    445 static const char optstring[] = ":c:ghlLmNnpP:S:twW:D:";
     445static const char optstring[] = ":c:ghlLmNnpdP:S:twW:D:";
    446446
    447447enum { PreludeDir = 128 };
     
    456456        { "no-prelude", no_argument, nullptr, 'n' },
    457457        { "prototypes", no_argument, nullptr, 'p' },
     458        { "deterministic-out", no_argument, nullptr, 'd' },
    458459        { "print", required_argument, nullptr, 'P' },
    459460        { "prelude-dir", required_argument, nullptr, PreludeDir },
     
    476477        "do not read prelude",                                // -n
    477478        "generate prototypes for prelude functions",            // -p
     479        "don't print output that isn't deterministic",        // -d
    478480        "print",                                              // -P
    479481        "<directory> prelude directory for debug/nodebug",      // no flag
     
    580582                        genproto = true;
    581583                        break;
     584                  case 'd':                                     // don't print non-deterministic output
     585                    deterministic_output = true;
     586                        break;
    582587                  case 'P':                                                                             // print options
    583588                        for ( int i = 0;; i += 1 ) {
  • tests/.expect/alloc-ERROR.txt

    rb7d6a36 r6a490b2  
    1 alloc.cfa:310:1 error: No reasonable alternatives for expression Applying untyped:
     1alloc.cfa:362:1 error: No reasonable alternatives for expression Applying untyped:
    22  Name: ?=?
    33...to:
    4   Name: p
     4  Name: ip
    55  Applying untyped:
    66    Name: realloc
     
    1919
    2020
    21 alloc.cfa:311:1 error: No reasonable alternatives for expression Applying untyped:
     21alloc.cfa:363:1 error: No reasonable alternatives for expression Applying untyped:
    2222  Name: ?=?
    2323...to:
    24   Name: p
    25   Applying untyped:
    26     Name: alloc
    27   ...to:
    28     Name: stp
    29     Applying untyped:
    30       Name: ?*?
    31     ...to:
    32       Name: dim
    33       Sizeof Expression on: Applying untyped:
    34           Name: *?
    35         ...to:
    36           Name: stp
    37 
    38 
    39 
    40 
    41 alloc.cfa:312:1 error: No reasonable alternatives for expression Applying untyped:
    42   Name: ?=?
    43 ...to:
    44   Name: p
     24  Name: ip
    4525  Applying untyped:
    4626    Name: memset
     
    5030
    5131
    52 alloc.cfa:313:1 error: No reasonable alternatives for expression Applying untyped:
     32alloc.cfa:364:1 error: No reasonable alternatives for expression Applying untyped:
    5333  Name: ?=?
    5434...to:
    55   Name: p
     35  Name: ip
    5636  Applying untyped:
    5737    Name: memcpy
  • tests/.expect/alloc.txt

    rb7d6a36 r6a490b2  
    23230xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101
    2424
    25 CFA resize array alloc
     25CFA realloc array alloc
    26260xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef
    27 CFA resize array alloc
     27CFA realloc array alloc
    28280xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101
    29 CFA resize array alloc
     29CFA realloc array alloc
    30300xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef
    31 CFA resize array alloc
     31CFA realloc array alloc, fill
    32320xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede
    33 CFA resize array alloc
     33CFA realloc array alloc, fill
    34340xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef
    35 CFA resize array alloc, fill
     35CFA realloc array alloc, fill
    36360xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede
    3737
  • tests/Makefile.am

    rb7d6a36 r6a490b2  
    4141        -quiet @CFA_FLAGS@ \
    4242        -DIN_DIR="${abs_srcdir}/.in/"
     43
     44AM_CFAFLAGS = -XCFA --deterministic-out
    4345
    4446# get the desired cfa to test
  • tests/Makefile.in

    rb7d6a36 r6a490b2  
    408408        -DIN_DIR="${abs_srcdir}/.in/"
    409409
     410AM_CFAFLAGS = -XCFA --deterministic-out
    410411
    411412# get the desired cfa to test
  • tests/alloc.cfa

    rb7d6a36 r6a490b2  
    1010// Created On       : Wed Feb  3 07:56:22 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sun Feb 16 09:21:13 2020
    13 // Update Count     : 405
     12// Last Modified On : Mon Apr  6 21:08:23 2020
     13// Update Count     : 428
    1414//
    1515
     
    2828        size_t dim = 10;
    2929        char fill = '\xde';
    30         int * p, * p1;
     30        int * ip, * ip1;
    3131
    3232        // allocation, non-array types
    3333
    34         p = (int *)malloc( sizeof(*p) );                                        // C malloc, type unsafe
    35         *p = 0xdeadbeef;
    36         printf( "C   malloc %#x\n", *p );
    37         free( p );
    38 
    39         p = malloc();                                       // CFA malloc, type safe
    40         *p = 0xdeadbeef;
    41         printf( "CFA malloc %#x\n", *p );
    42         free( p );
    43 
    44         p = alloc();                                        // CFA alloc, type safe
    45         *p = 0xdeadbeef;
    46         printf( "CFA alloc %#x\n", *p );
    47         free( p );
    48 
    49         p = alloc_set( fill );                                                          // CFA alloc, fill
    50         printf( "CFA alloc, fill %08x\n", *p );
    51         free( p );
    52 
    53         p = alloc_set( 3 );                                                                     // CFA alloc, fill
    54         printf( "CFA alloc, fill %d\n", *p );
    55         free( p );
     34        ip = (int *)malloc( sizeof(*ip) );                                      // C malloc, type unsafe
     35        *ip = 0xdeadbeef;
     36        printf( "C   malloc %#x\n", *ip );
     37        free( ip );
     38
     39        ip = malloc();                                                                          // CFA malloc, type safe
     40        *ip = 0xdeadbeef;
     41        printf( "CFA malloc %#x\n", *ip );
     42        free( ip );
     43
     44        ip = alloc();                                                                           // CFA alloc, type safe
     45        *ip = 0xdeadbeef;
     46        printf( "CFA alloc %#x\n", *ip );
     47        free( ip );
     48
     49        ip = alloc_set( fill );                                                         // CFA alloc, fill
     50        printf( "CFA alloc, fill %08x\n", *ip );
     51        free( ip );
     52
     53        ip = alloc_set( 3 );                                                            // CFA alloc, fill
     54        printf( "CFA alloc, fill %d\n", *ip );
     55        free( ip );
    5656
    5757
     
    5959        printf( "\n" );
    6060
    61         p = (int *)calloc( dim, sizeof( *p ) );                         // C array calloc, type unsafe
     61        ip = (int *)calloc( dim, sizeof( *ip ) );                       // C array calloc, type unsafe
    6262        printf( "C   array calloc, fill 0\n" );
    63         for ( i; dim ) { printf( "%#x ", p[i] ); }
    64         printf( "\n" );
    65         free( p );
    66 
    67         p = calloc( dim );                                  // CFA array calloc, type safe
     63        for ( i; dim ) { printf( "%#x ", ip[i] ); }
     64        printf( "\n" );
     65        free( ip );
     66
     67        ip = calloc( dim );                                                                     // CFA array calloc, type safe
    6868        printf( "CFA array calloc, fill 0\n" );
    69         for ( i; dim ) { printf( "%#x ", p[i] ); }
    70         printf( "\n" );
    71         free( p );
    72 
    73         p = alloc( dim );                                   // CFA array alloc, type safe
    74         for ( i; dim ) { p[i] = 0xdeadbeef; }
     69        for ( i; dim ) { printf( "%#x ", ip[i] ); }
     70        printf( "\n" );
     71        free( ip );
     72
     73        ip = alloc( dim );                                                                      // CFA array alloc, type safe
     74        for ( i; dim ) { ip[i] = 0xdeadbeef; }
    7575        printf( "CFA array alloc, no fill\n" );
    76         for ( i; dim ) { printf( "%#x ", p[i] ); }
    77         printf( "\n" );
    78         free( p );
    79 
    80         p = alloc_set( 2 * dim, fill );                                         // CFA array alloc, fill
     76        for ( i; dim ) { printf( "%#x ", ip[i] ); }
     77        printf( "\n" );
     78        free( ip );
     79
     80        ip = alloc_set( 2 * dim, fill );                                        // CFA array alloc, fill
    8181        printf( "CFA array alloc, fill %#hhx\n", fill );
    82         for ( i; 2 * dim ) { printf( "%#x ", p[i] ); }
    83         printf( "\n" );
    84         free( p );
    85 
    86         p = alloc_set( 2 * dim, 0xdeadbeef );                           // CFA array alloc, fill
     82        for ( i; 2 * dim ) { printf( "%#x ", ip[i] ); }
     83        printf( "\n" );
     84        free( ip );
     85
     86        ip = alloc_set( 2 * dim, 0xdeadbeef );                          // CFA array alloc, fill
    8787        printf( "CFA array alloc, fill %#hhx\n", 0xdeadbeef );
    88         for ( i; 2 * dim ) { printf( "%#x ", p[i] ); }
    89         printf( "\n" );
    90         // do not free
    91 
    92         p1 = alloc_set( 2 * dim, p );                                           // CFA array alloc, fill
     88        for ( i; 2 * dim ) { printf( "%#x ", ip[i] ); }
     89        printf( "\n" );
     90        // do not free
     91
     92        ip1 = alloc_set( 2 * dim, ip );                                         // CFA array alloc, fill
    9393        printf( "CFA array alloc, fill from array\n" );
    94         for ( i; 2 * dim ) { printf( "%#x %#x, ", p[i], p1[i] ); }
    95         free( p1 );
    96         printf( "\n" );
    97 
     94        for ( i; 2 * dim ) { printf( "%#x %#x, ", ip[i], ip1[i] ); }
     95        free( ip1 );
     96        printf( "\n" );
     97
     98
     99        // realloc, non-array types
     100        printf( "\n" );
     101
     102        ip = (int *)realloc( ip, dim * sizeof(*ip) );           // C realloc
     103        printf( "C realloc\n" );
     104        for ( i; dim ) { printf( "%#x ", ip[i] ); }
     105        printf( "\n" );
     106        // do not free
     107
     108        ip = realloc( ip, 2 * dim * sizeof(*ip) );                      // CFA realloc
     109        for ( i; dim ~ 2 * dim ) { ip[i] = 0x1010101; }
     110        printf( "CFA realloc\n" );
     111        for ( i; 2 * dim ) { printf( "%#x ", ip[i] ); }
     112        printf( "\n" );
     113        // do not free
     114
     115
     116        // realloc, array types
     117        printf( "\n" );
     118
     119        ip = alloc( ip, dim );                                                          // CFA realloc array alloc
     120        for ( i; dim ) { ip[i] = 0xdeadbeef; }
     121        printf( "CFA realloc array alloc\n" );
     122        for ( i; dim ) { printf( "%#x ", ip[i] ); }
     123        printf( "\n" );
     124        // do not free
     125
     126        ip = alloc( ip, 2 * dim );                                                      // CFA realloc array alloc
     127        for ( i; dim ~ 2 * dim ) { ip[i] = 0x1010101; }         // fill upper part
     128        printf( "CFA realloc array alloc\n" );
     129        for ( i; 2 * dim ) { printf( "%#x ", ip[i] ); }
     130        printf( "\n" );
     131        // do not free
     132
     133        ip = alloc( ip, dim );                                                          // CFA realloc array alloc
     134        printf( "CFA realloc array alloc\n" );
     135        for ( i; dim ) { printf( "%#x ", ip[i] ); }
     136        printf( "\n" );
     137        // do not free
     138
     139        ip = alloc_set( ip, 3 * dim, fill );                            // CFA realloc array alloc, fill
     140        printf( "CFA realloc array alloc, fill\n" );
     141        for ( i; 3 * dim ) { printf( "%#x ", ip[i] ); }
     142        printf( "\n" );
     143        // do not free
     144
     145        ip = alloc_set( ip, dim, fill );                                        // CFA realloc array alloc, fill
     146        printf( "CFA realloc array alloc, fill\n" );
     147        for ( i; dim ) { printf( "%#x ", ip[i] ); }
     148        printf( "\n" );
     149        // do not free
     150
     151        ip = alloc_set( ip, 3 * dim, fill );                            // CFA realloc array alloc, fill
     152        printf( "CFA realloc array alloc, fill\n" );
     153        for ( i; 3 * dim ) { printf( "%#x ", ip[i] ); }
     154        printf( "\n" );
     155        // do not free
     156#if 0 // FIX ME
     157        ip = alloc_set( ip, 5 * dim, 5 );                                       // CFA realloc array alloc, 5
     158        printf( "CFA realloc array alloc, 5\n" );
     159        for ( i; 5 * dim ) { printf( "%#x ", ip[i] ); }
     160        printf( "\n" );
     161        // do not free
     162
     163        ip = alloc_set( ip, dim, 5 );                                           // CFA realloc array alloc, 5
     164        printf( "CFA realloc array alloc, 5\n" );
     165        for ( i; dim ) { printf( "%#x ", ip[i] ); }
     166        printf( "\n" );
     167        // do not free
     168
     169        ip = alloc_set( ip, 5 * dim, 5 );                                       // CFA realloc array alloc, 5
     170        printf( "CFA realloc array alloc, 5\n" );
     171        for ( i; 5 * dim ) { printf( "%#x ", ip[i] ); }
     172        printf( "\n" );
     173#endif // 0
     174        free( ip );
    98175
    99176        // resize, non-array types
    100         printf( "\n" );
    101 
    102         p = (int *)realloc( p, dim * sizeof(*p) );                      // C realloc
    103         printf( "C realloc\n" );
    104         for ( i; dim ) { printf( "%#x ", p[i] ); }
    105         printf( "\n" );
    106         // do not free
    107 
    108         p = realloc( p, 2 * dim * sizeof(*p) );             // CFA realloc
    109         for ( i; dim ~ 2 * dim ) { p[i] = 0x1010101; }
    110         printf( "CFA realloc\n" );
    111         for ( i; 2 * dim ) { printf( "%#x ", p[i] ); }
    112         printf( "\n" );
    113         // do not free
     177
     178        struct S {
     179                int a[5];
     180        };
     181
     182    ip = alloc();
     183        *ip = 5;
     184    double * dp = alloc( ip );
     185        *dp = 5.5;
     186    S * sp = alloc( dp );
     187        *sp = (S){ {0, 1, 2, 3, 4} };
     188    ip = alloc( sp );
     189        *ip = 3;
     190    free( ip );
    114191
    115192
    116193        // resize, array types
    117         printf( "\n" );
    118 
    119         p = alloc( p, dim );                                // CFA resize array alloc
    120         for ( i; dim ) { p[i] = 0xdeadbeef; }
    121         printf( "CFA resize array alloc\n" );
    122         for ( i; dim ) { printf( "%#x ", p[i] ); }
    123         printf( "\n" );
    124         // do not free
    125 
    126         p = alloc( p, 2 * dim );                            // CFA resize array alloc
    127         for ( i; dim ~ 2 * dim ) { p[i] = 0x1010101; }          // fill upper part
    128         printf( "CFA resize array alloc\n" );
    129         for ( i; 2 * dim ) { printf( "%#x ", p[i] ); }
    130         printf( "\n" );
    131         // do not free
    132 
    133         p = alloc( p, dim );                                // CFA resize array alloc
    134         printf( "CFA resize array alloc\n" );
    135         for ( i; dim ) { printf( "%#x ", p[i] ); }
    136         printf( "\n" );
    137         // do not free
    138 
    139         p = alloc_set( p, 3 * dim, fill );                                      // CFA resize array alloc, fill
    140         printf( "CFA resize array alloc\n" );
    141         for ( i; 3 * dim ) { printf( "%#x ", p[i] ); }
    142         printf( "\n" );
    143         // do not free
    144 
    145         p = alloc_set( p, dim, fill );                                          // CFA resize array alloc, fill
    146         printf( "CFA resize array alloc\n" );
    147         for ( i; dim ) { printf( "%#x ", p[i] ); }
    148         printf( "\n" );
    149         // do not free
    150 
    151         p = alloc_set( p, 3 * dim, fill );                                      // CFA resize array alloc, fill
    152         printf( "CFA resize array alloc, fill\n" );
    153         for ( i; 3 * dim ) { printf( "%#x ", p[i] );; }
    154         printf( "\n" );
    155         free( p );
     194
     195    ip = alloc( 5 );
     196        for ( i; 5 ) { ip[i] = 5; }
     197    dp = alloc( ip, 5 );
     198        for ( i; 5 ) { dp[i] = 5.5; }
     199    sp = alloc( dp, 5 );
     200        for ( i; 5 ) { sp[i] = (S){ {0, 1, 2, 3, 4} }; }
     201    ip = alloc( sp, 3 );
     202        for ( i; 3 ) { ip[i] = 3; }
     203    ip = alloc( ip, 7 );
     204        for ( i; 7 ) { ip[i] = 7; }
     205    ip = alloc( ip, 7, false );
     206        for ( i; 7 ) { ip[i] = 7; }
     207    free( ip );
    156208
    157209
     
    168220        free( stp );
    169221
    170         stp = &(*memalign( Alignment )){ 42, 42.5 };          // CFA memalign
     222        stp = &(*memalign( Alignment )){ 42, 42.5 };            // CFA memalign
    171223        assert( (uintptr_t)stp % Alignment == 0 );
    172224        printf( "CFA memalign %d %g\n", stp->x, stp->y );
     
    300352        free( fp - 1 );
    301353
    302         p = foo( bar( baz( malloc(), 0 ), 0 ), 0 );
    303         *p = 0xdeadbeef;
    304         printf( "CFA deep malloc %#x\n", *p );
    305         free( p );
     354        ip = foo( bar( baz( malloc(), 0 ), 0 ), 0 );
     355        *ip = 0xdeadbeef;
     356        printf( "CFA deep malloc %#x\n", *ip );
     357        free( ip );
    306358
    307359#ifdef ERR1
    308360        stp = malloc();
    309361        printf( "\nSHOULD FAIL\n" );
    310         p = realloc( stp, dim * sizeof( *stp ) );
    311         p = alloc( stp, dim * sizeof( *stp ) );
    312         p = memset( stp, 10 );
    313         p = memcpy( &st1, &st );
     362        ip = realloc( stp, dim * sizeof( *stp ) );
     363        ip = memset( stp, 10 );
     364        ip = memcpy( &st1, &st );
    314365#endif
    315366} // main
  • tests/concurrent/.expect/monitor.txt

    rb7d6a36 r6a490b2  
    1 4000000
     13000000
  • tests/concurrent/coroutineYield.cfa

    rb7d6a36 r6a490b2  
    3333                        sout | "Coroutine 2";
    3434                #endif
    35                 suspend();
     35                suspend;
    3636        }
    3737}
  • tests/concurrent/monitor.cfa

    rb7d6a36 r6a490b2  
    2929
    3030void main( MyThread & this ) {
    31         for(int i = 0; i < 1_000_000; i++) {
     31        for(int i = 0; i < 750_000; i++) {
    3232                increment( global );
    3333        }
  • tests/concurrent/multi-monitor.cfa

    rb7d6a36 r6a490b2  
    1111
    1212void increment( monitor_t & mutex p1, monitor_t & mutex p2, int & value ) {
     13        assert(active_thread() == get_monitor(p1)->owner);
     14        assert(active_thread() == get_monitor(p2)->owner);
    1315        value += 1;
     16        assert(active_thread() == get_monitor(p1)->owner);
     17        assert(active_thread() == get_monitor(p2)->owner);
    1418}
    1519
  • tests/concurrent/signal/block.cfa

    rb7d6a36 r6a490b2  
    3333
    3434monitor global_data_t {
    35         thread_desc * last_thread;
    36         thread_desc * last_signaller;
     35        $thread * last_thread;
     36        $thread * last_signaller;
    3737};
    3838
     
    8282        if( !is_empty( cond ) ) {
    8383
    84                 thread_desc * next = front( cond );
     84                $thread * next = front( cond );
    8585
    8686                if( ! signal_block( cond ) ) {
  • tests/concurrent/suspend_then.cfa

    rb7d6a36 r6a490b2  
    11#include <fstream.hfa>
    22#include <kernel.hfa>
    3 #include <monitor.hfa>
    43#include <thread.hfa>
    54#include <time.hfa>
     
    109#include "long_tests.hfa"
    1110
    12 #ifndef PREEMPTION_RATE
    13 #define PREEMPTION_RATE 10`ms
    14 #endif
    15 
    1611Duration default_preemption() {
    17         return PREEMPTION_RATE;
     12        return 0;
    1813}
    1914
     
    2621#if !defined(TEST_FOREVER)
    2722        static inline void print(const char * const text ) {
    28                 write( STDERR_FILENO, text, strlen(text) );
     23                write( STDOUT_FILENO, text, strlen(text) );
    2924        }
    3025#else
     
    3227#endif
    3328
    34 coroutine Coroutine {};
     29generator Coroutine { int i; };
    3530
    3631volatile bool done = false;
     
    4944
    5045void main(Coroutine& this) {
    51         suspend();
    52         for(int i = 0; TEST(i < N); i++) {
     46        this.i = 0;
     47        suspend;
     48        for(;TEST(this.i < N); this.i++) {
    5349
    54                 print("C - Suspending");
    55                 void publish() {
    56                         print("C - Publishing");
     50                print("C - Suspending\n");
     51                suspend{
     52                        print("C - Publishing\n");
    5753                        assert(!the_cor);
    5854                        store( this );
    5955                }
    60                 suspend_then(publish);
    6156                assert(!the_cor);
    62                 print("Coroutine 2");
     57                print("C - Back\n");
    6358                KICK_WATCHDOG;
    6459                yield();
    6560        }
    6661        done = true;
    67         suspend();
     62        suspend;
    6863}
    6964
     
    7772                if(!mine) continue;
    7873
    79                 print("T - took");
     74                print("T - took\n");
    8075                resume(*mine);
    81                 print("T - back");
    8276        }
    8377}
  • tests/coroutine/.expect/fmtLines.txt

    rb7d6a36 r6a490b2  
    4848{                                                         // f  or n  ewli 
    4949ne c  hara  cter  s                                     su 
    50 spen  d();                                      if   ( fm 
    51 t.ch   !=   '\n'   ) b  reak 
    52 ;               /  / ig  nore   new  line 
    53                                   } //   for                              sout 
    54  | f  mt.c  h;                                                  //  
    55 prin  t ch  arac  ter                   }  
    56 // f  or                        sou  t |   "  " 
    57 ;                                                               //   prin  t bl 
    58 ock   sepa  rato  r             }   //  
    59 for             sou  t |   nl;                                   
    60                                   // p  rint   gro  up s 
    61 epar  ator      } /  / fo  r} / 
    62 / ma  invo  id p  rt(   Form 
    63 at &   fmt  , ch  ar c  h )   
    64 {      fmt  .ch   = ch  ;    
    65  res  ume(   fmt   );}   //  
    66 prti  nt m  ain(  ) {     Form 
    67 at f  mt;         char   ch;    for 
    68  ( ;  ; )   {           s  in |   ch; 
    69                                                                                 //   rea  d on 
    70 e ch  arac  ter     if   ( e 
    71 of(   sin   ) )   brea  k;               
    72                                         //   eof   ?            p  rt(  
    73 fmt,   ch   );  }   //   for} 
    74  //   main  // L  ocal   Var 
    75 iabl  es:   ////   tab  -wid 
    76 th:   4 //  // c  ompi  le-c 
    77 omma  nd:   "cfa   fmt  Line 
    78 s.cf  a" /  ///   End:   //
     50spen  d;                                        i  f (   fmt. 
     51ch !  = '\  n' )   bre  ak;      
     52        //   igno  re n  ewli  ne                
     53                }   // f  or                            so  ut | 
     54 fmt  .ch;                                                      /  / pr 
     55int   char  acte  r                       } // 
     56 for                    s  out   | "    ";       
     57                                                        /  / pr  int   bloc 
     58k se  para  tor         } /  / fo 
     59r               s  out   | nl  ;                                                         
     60                //   pri  nt g  roup   sep 
     61arat  or        }   //   for}   //  
     62main  void   prt  ( Fo  rmat 
     63 & f  mt,   char   ch   ) {   
     64   f  mt.c  h =   ch;      r 
     65esum  e( f  mt )  ;} /  / pr 
     66tint   mai  n()   {     Fo  rmat 
     67 fmt  ; ch  ar c  h;    f  or ( 
     68 ;;   ) {               sin   | c  h;            
     69                                                                  // r  ead   one  
     70char  acte  r       if (   eof 
     71( si  n )   ) br  eak;                                   
     72                        /  / eo  f ?            prt  ( fm 
     73t, c  h );      } /  / fo  r} / 
     74/ ma  in//   Loc  al V  aria 
     75bles  : //  // t  ab-w  idth 
     76: 4   ////   com  pile  -com 
     77mand  : "c  fa f  mtLi  nes. 
     78cfa"   ///  / En  d: /  /
  • tests/coroutine/.in/fmtLines.txt

    rb7d6a36 r6a490b2  
    3535                        for ( fmt.b = 0; fmt.b < 4; fmt.b += 1 ) {      // blocks of 4 characters
    3636                                for ( ;; ) {                                                    // for newline characters
    37                                         suspend();
     37                                        suspend;
    3838                                        if ( fmt.ch != '\n' ) break;            // ignore newline
    3939                                } // for
  • tests/coroutine/cntparens.cfa

    rb7d6a36 r6a490b2  
    1 // 
     1//
    22// Cforall Version 1.0.0 Copyright (C) 2017 University of Waterloo
    33//
    44// The contents of this file are covered under the licence agreement in the
    55// file "LICENCE" distributed with Cforall.
    6 // 
     6//
    77// cntparens.cfa -- match left/right parenthesis
    8 // 
     8//
    99// Author           : Peter A. Buhr
    1010// Created On       : Sat Apr 20 11:04:45 2019
     
    1212// Last Modified On : Sat Apr 20 11:06:21 2019
    1313// Update Count     : 1
    14 // 
     14//
    1515
    1616#include <fstream.hfa>
     
    2626void main( CntParens & cpns ) with( cpns ) {
    2727        for ( ; ch == '('; cnt += 1 ) {                                         // left parenthesis
    28                 suspend();
     28                suspend;
    2929        }
    3030        for ( ; ch == ')' && cnt > 1; cnt -= 1 ) {                      // right parenthesis
    31                 suspend();
     31                suspend;
    3232        }
    3333        status = ch == ')' ? Match : Error;
    3434} // main
    35        
     35
    3636void ?{}( CntParens & cpns ) with( cpns ) { status = Cont; cnt = 0; }
    3737
  • tests/coroutine/devicedriver.cfa

    rb7d6a36 r6a490b2  
    1 // 
     1//
    22// Cforall Version 1.0.0 Copyright (C) 2017 University of Waterloo
    33//
    44// The contents of this file are covered under the licence agreement in the
    55// file "LICENCE" distributed with Cforall.
    6 // 
    7 // devicedriver.cfa -- 
    8 // 
     6//
     7// devicedriver.cfa --
     8//
    99// Author           : Peter A. Buhr
    1010// Created On       : Sat Mar 16 15:30:34 2019
     
    1212// Last Modified On : Sat Apr 20 09:07:19 2019
    1313// Update Count     : 90
    14 // 
     14//
    1515
    1616#include <fstream.hfa>
     
    2929
    3030void checkCRC( Driver & d, unsigned int sum ) with( d ) {
    31         suspend();
     31        suspend;
    3232        unsigned short int crc = byte << 8;                                     // sign extension over written
    33         suspend();
     33        suspend;
    3434        // prevent sign extension for signed char
    3535        status = (crc | (unsigned char)byte) == sum ? MSG : ECRC;
     
    4141                status = CONT;
    4242                unsigned int lnth = 0, sum = 0;
    43                 while ( byte != STX ) suspend();
     43                while ( byte != STX ) suspend;
    4444          emsg: for () {
    45                         suspend();
     45                        suspend;
    4646                        choose ( byte ) {                                                       // process byte
    4747                          case STX:
    48                                 status = ESTX; suspend(); continue msg;
     48                                status = ESTX; suspend; continue msg;
    4949                          case ETX:
    5050                                break emsg;
    5151                          case ESC:
    52                                 suspend();
     52                                suspend;
    5353                        } // choose
    5454                        if ( lnth >= MaxMsg ) {                                         // buffer full ?
    55                                 status = ELNTH; suspend(); continue msg;
     55                                status = ELNTH; suspend; continue msg;
    5656                        } // if
    5757                        msg[lnth++] = byte;
     
    6060                msg[lnth] = '\0';                                                               // terminate string
    6161                checkCRC( d, sum );                                                             // refactor CRC check
    62                 suspend();
     62                suspend;
    6363        } // for
    6464} // main
  • tests/coroutine/fibonacci.cfa

    rb7d6a36 r6a490b2  
    2222        int fn1, fn2;                                                                           // retained between resumes
    2323        fn = 0;  fn1 = fn;                                                                      // 1st case
    24         suspend();                                                                                      // restart last resume
     24        suspend;                                                                                        // restart last resume
    2525        fn = 1;  fn2 = fn1;  fn1 = fn;                                          // 2nd case
    26         suspend();                                                                                      // restart last resume
     26        suspend;                                                                                        // restart last resume
    2727        for () {
    2828                fn = fn1 + fn2;  fn2 = fn1;  fn1 = fn;                  // general case
    29                 suspend();                                                                              // restart last resume
     29                suspend;                                                                                // restart last resume
    3030        } // for
    3131}
  • tests/coroutine/fibonacci_1.cfa

    rb7d6a36 r6a490b2  
    1212// Last Modified On : Thu Mar 21 08:10:45 2019
    1313// Update Count     : 25
    14 // 
     14//
    1515
    1616#include <fstream.hfa>
     
    2323        [fn1, fn] = [0, 1];                                                                     // precompute first two states
    2424        for () {
    25                 suspend();                                                                              // restart last resume
     25                suspend;                                                                                // restart last resume
    2626                [fn1, fn] = [fn, fn1 + fn];                                             // general case
    2727        } // for
  • tests/coroutine/fmtLines.cfa

    rb7d6a36 r6a490b2  
    2727                        for ( b = 0; b < 4; b += 1 ) {                          // blocks of 4 characters
    2828                                for () {                                                                // for newline characters
    29                                         suspend();
     29                                        suspend;
    3030                                  if ( ch != '\n' ) break;                              // ignore newline
    3131                                } // for
  • tests/coroutine/raii.cfa

    rb7d6a36 r6a490b2  
    3939        Raii raii = { "Coroutine" };
    4040        sout | "Before Suspend";
    41         suspend();
     41        suspend;
    4242        sout | "After Suspend";
    4343}
  • tests/coroutine/runningTotal.cfa

    rb7d6a36 r6a490b2  
    2525void update( RunTotal & rntl, int input ) with( rntl ) { // helper
    2626        total += input;                                                                         // remember between activations
    27         suspend();                                                                                      // inactivate on stack
     27        suspend;                                                                                        // inactivate on stack
    2828}
    2929
  • tests/coroutine/suspend_then.cfa

    rb7d6a36 r6a490b2  
    1515
    1616#include <fstream.hfa>
    17 #include <coroutine.hfa>
    1817
    19 void then() {
    20         sout | "Then!";
    21 }
    22 
    23 coroutine Fibonacci { int fn; };                                                // used for communication
     18generator Fibonacci {
     19        int fn;                                                                         // used for communication
     20        int fn1, fn2;                                                           // retained between resumes
     21};
    2422
    2523void main( Fibonacci & fib ) with( fib ) {                              // called on first resume
    26         int fn1, fn2;                                                           // retained between resumes
    2724        fn = 0;  fn1 = fn;                                                      // 1st case
    28         suspend_then(then);                                                     // restart last resume
     25        suspend { sout | "Then!"; }                                             // restart last resume
    2926        fn = 1;  fn2 = fn1;  fn1 = fn;                                  // 2nd case
    30         suspend_then(then);                                                     // restart last resume
     27        suspend { sout | "Then!"; }                                             // restart last resume
    3128        for () {
    3229                fn = fn1 + fn2;  fn2 = fn1;  fn1 = fn;                  // general case
    33                 suspend_then(then);                                             // restart last resume
     30                suspend { sout | "Then!"; }                                     // restart last resume
    3431        } // for
    3532}
  • tests/errors/.expect/completeType.txt

    rb7d6a36 r6a490b2  
    2727    void
    2828  )
    29   Environment:( _83_4_DT ) -> instance of struct A with body 0 (no widening)
     29  Environment: -> instance of struct A with body 0 (no widening)
    3030
    3131
     
    5050    void
    5151  )
    52   Environment:( _83_4_DT ) -> instance of struct B with body 1 (no widening)
     52  Environment: -> instance of struct B with body 1 (no widening)
    5353
    5454
     
    127127          void
    128128        )
    129         Environment:( _102_0_T ) -> instance of type T (not function type) (no widening)
     129        Environment: -> instance of type T (not function type) (no widening)
    130130
    131131      Could not satisfy assertion:
    132132?=?: pointer to function
    133133        ... with parameters
    134           reference to instance of type _102_0_T (not function type)
    135           instance of type _102_0_T (not function type)
     134          reference to instance of type _104_0_T (not function type)
     135          instance of type _104_0_T (not function type)
    136136        ... returning
    137           _retval__operator_assign: instance of type _102_0_T (not function type)
     137          _retval__operator_assign: instance of type _104_0_T (not function type)
    138138          ... with attributes:
    139139            Attribute with name: unused
  • tests/manipulatorsOutput1.cfa

    rb7d6a36 r6a490b2  
    77// Created On       : Sat Jun  8 18:04:11 2019
    88// Last Modified By : Peter A. Buhr
    9 // Last Modified On : Mon Jun 10 12:37:28 2019
    10 // Update Count     : 8
     9// Last Modified On : Fri May  1 11:51:44 2020
     10// Update Count     : 9
    1111//
    1212
     
    1717        signed char sc = -12;
    1818        printf( "%hhd %2hhd %5.2hhd %-5.2hhd %hho %#hho %hhx %#hhx %#8hhx %#8.10hhx %#8.3hhX %+-8.3hhd %08hhd\n", sc, sc, sc, sc, sc, sc, sc, sc, sc, sc, sc, sc, sc );
    19         sout | sc | wd(2,sc) | wd(5,2,sc) | left(wd(5,2,sc)) | nobase(oct(sc)) | oct(sc) | nobase(hex(sc)) | hex(sc) | wd(8,hex(sc)) | wd(8,10,hex(sc)) | upcase(wd(8,3,hex(sc))) | left(sign(upcase(wd(8,3,sc)))) | pad0(wd(8,sc));
     19        sout | sc | wd(2,sc) | wd(5,2,sc) | left(wd(5,2,sc)) | nobase(oct(sc)) | oct(sc) | nonl;
     20        sout | nobase(hex(sc)) | hex(sc) | wd(8,hex(sc)) | wd(8,10,hex(sc)) | upcase(wd(8,3,hex(sc))) | nonl;
     21        sout | left(sign(upcase(wd(8,3,sc)))) | pad0(wd(8,sc));
    2022
    2123        sout | "unsigned char";
    2224        unsigned char usc = 12;
    2325        printf( "%hhu %2hhu %5.2hhu %-5.2hhu %hho %#hho %hhx %#hhx %#8hhx %#8.10hhx %#8.3hhX %-8.3hhu %08hhu\n", usc, usc, usc, usc, usc, usc, usc, usc, usc, usc, usc, usc, usc );
    24         sout | usc | wd(2,usc) | wd(5,2,usc) | left(wd(5,2,usc)) | nobase(oct(usc)) | oct(usc) | nobase(hex(usc)) | hex(usc) | wd(8,hex(usc)) | wd(8,10,hex(usc)) | upcase(wd(8,3,hex(usc))) | left(upcase(wd(8,3,usc))) | pad0(wd(8,usc));
     26        sout | usc | wd(2,usc) | wd(5,2,usc) | left(wd(5,2,usc)) | nobase(oct(usc)) | oct(usc) | nonl;
     27        sout | nobase(hex(usc)) | hex(usc) | wd(8,hex(usc)) | wd(8,10,hex(usc)) | upcase(wd(8,3,hex(usc))) | nonl;
     28        sout | left(upcase(wd(8,3,usc))) | pad0(wd(8,usc));
    2529
    2630        sout | "signed short int";
    2731        signed short int si = -12;
    2832        printf( "%hd %2hd %5.2hd %-5.2hd %ho %#ho %hx %#hx %#8hx %#8.10hx %#8.3hX %+-8.3hd %08hd\n", si, si, si, si, si, si, si, si, si, si, si, si, si );
    29         sout | si | wd(2,si) | wd(5,2,si) | left(wd(5,2,si)) | nobase(oct(si)) | oct(si) | nobase(hex(si)) | hex(si) | wd(8,hex(si)) | wd(8,10,hex(si)) | upcase(wd(8,3,hex(si))) | left(sign(upcase(wd(8,3,si)))) | pad0(wd(8,si));
     33        sout | si | wd(2,si) | wd(5,2,si) | left(wd(5,2,si)) | nobase(oct(si)) | oct(si) | nonl;
     34        sout | nobase(hex(si)) | hex(si) | wd(8,hex(si)) | wd(8,10,hex(si)) | upcase(wd(8,3,hex(si))) | nonl;
     35        sout | left(sign(upcase(wd(8,3,si)))) | pad0(wd(8,si));
    3036
    3137        sout | "unsigned short int";
    3238        unsigned short int usi = 12;
    3339        printf( "%hu %2hu %5.2hu %-5.2hu %ho %#ho %hx %#hx %#8hx %#8.10hx %#8.3hX %-8.3hu %08hu\n", usi, usi, usi, usi, usi, usi, usi, usi, usi, usi, usi, usi, usi );
    34         sout | usi | wd(2,usi) | wd(5,2,usi) | left(wd(5,2,usi)) | nobase(oct(usi)) | oct(usi) | nobase(hex(usi)) | hex(usi) | wd(8,hex(usi)) | wd(8,10,hex(usi)) | upcase(wd(8,3,hex(usi))) | left(upcase(wd(8,3,usi))) | pad0(wd(8,usi));
     40        sout | usi | wd(2,usi) | wd(5,2,usi) | left(wd(5,2,usi)) | nobase(oct(usi)) | oct(usi) | nonl;
     41        sout | nobase(hex(usi)) | hex(usi) | wd(8,hex(usi)) | wd(8,10,hex(usi)) | upcase(wd(8,3,hex(usi))) | nonl;
     42        sout | left(upcase(wd(8,3,usi))) | pad0(wd(8,usi));
    3543
    3644        sout | "signed int";
    3745        signed int i = -12;
    3846        printf( "%d %2d %5.2d %-5.2d %o %#o %x %#x %#8x %#8.10x %#8.3X %+-8.3d %08d\n", i, i, i, i, i, i, i, i, i, i, i, i, i );
    39         sout | i | wd(2,i) | wd(5,2,i) | left(wd(5,2,i)) | nobase(oct(i)) | oct(i) | nobase(hex(i)) | hex(i) | wd(8,hex(i)) | wd(8,10,hex(i)) | upcase(wd(8,3,hex(i))) | left(sign(upcase(wd(8,3,i)))) | pad0(wd(8,i));
     47        sout | i | wd(2,i) | wd(5,2,i) | left(wd(5,2,i)) | nobase(oct(i)) | oct(i) | nonl;
     48        sout | nobase(hex(i)) | hex(i) | wd(8,hex(i)) | wd(8,10,hex(i)) | upcase(wd(8,3,hex(i))) | nonl;
     49        sout | left(sign(upcase(wd(8,3,i)))) | pad0(wd(8,i));
    4050
    4151        sout | "unsigned int";
    4252        unsigned int ui = 12;
    4353        printf( "%u %2u %5.2u %-5.2u %o %#o %x %#x %#8x %#8.10x %#8.3X %-8.3u %08u\n", ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui );
    44         sout | ui | wd(2,ui) | wd(5,2,ui) | left(wd(5,2,ui)) | nobase(oct(ui)) | oct(ui) | nobase(hex(ui)) | hex(ui) | wd(8,hex(ui)) | wd(8,10,hex(ui)) | upcase(wd(8,3,hex(ui))) | left(upcase(wd(8,3,ui))) | pad0(wd(8,ui));
     54        sout | ui | wd(2,ui) | wd(5,2,ui) | left(wd(5,2,ui)) | nobase(oct(ui)) | oct(ui) | nonl;
     55        sout | nobase(hex(ui)) | hex(ui) | wd(8,hex(ui)) | wd(8,10,hex(ui)) | upcase(wd(8,3,hex(ui))) | nonl;
     56        sout | left(upcase(wd(8,3,ui))) | pad0(wd(8,ui));
    4557
    4658        sout | "signed long long int";
    4759        signed long long int lli = -12;
    4860        printf( "%lld %2lld %5.2lld %-5.2lld %llo %#llo %llx %#llx %#8llx %#8.10llx %#8.3llX %+-8.3lld %08lld\n", lli, lli, lli, lli, lli, lli, lli, lli, lli, lli, lli, lli, lli );
    49         sout | lli | wd(2,lli) | wd(5,2,lli) | left(wd(5,2,lli)) | nobase(oct(lli)) | oct(lli) | nobase(hex(lli)) | hex(lli) | wd(8,hex(lli)) | wd(8,10,hex(lli)) | upcase(wd(8,3,hex(lli))) | left(sign(upcase(wd(8,3,lli)))) | pad0(wd(8,lli));
     61        sout | lli | wd(2,lli) | wd(5,2,lli) | left(wd(5,2,lli)) | nobase(oct(lli)) | oct(lli) | nonl;
     62        sout | nobase(hex(lli)) | hex(lli) | wd(8,hex(lli)) | wd(8,10,hex(lli)) | upcase(wd(8,3,hex(lli))) | nonl;
     63        sout | left(sign(upcase(wd(8,3,lli)))) | pad0(wd(8,lli));
    5064
    5165        sout | "unsigned long long int";
    5266        unsigned long long int ulli = 12;
    5367        printf( "%llu %2llu %5.2llu %-5.2llu %llo %#llo %llx %#llx %#8llx %#8.10llx %#8.3llX %-8.3llu %08llu\n", ulli, ulli, ulli, ulli, ulli, ulli, ulli, ulli, ulli, ulli, ulli, ulli, ulli );
    54         sout | ulli | wd(2,ulli) | wd(5,2,ulli) | left(wd(5,2,ulli)) | nobase(oct(ulli)) | oct(ulli) | nobase(hex(ulli)) | hex(ulli) | wd(8,hex(ulli)) | wd(8,10,hex(ulli)) | upcase(wd(8,3,hex(ulli))) | left(upcase(wd(8,3,ulli))) | pad0(wd(8,ulli));
     68        sout | ulli | wd(2,ulli) | wd(5,2,ulli) | left(wd(5,2,ulli)) | nobase(oct(ulli)) | oct(ulli) | nonl;
     69        sout | nobase(hex(ulli)) | hex(ulli) | wd(8,hex(ulli)) | wd(8,10,hex(ulli)) | upcase(wd(8,3,hex(ulli))) | nonl;
     70        sout | left(upcase(wd(8,3,ulli))) | pad0(wd(8,ulli));
    5571
    5672        sout | nl | "binary integral";
    57         sout | bin(0) | bin(13) | upcase(bin(13)) | nobase(bin(13)) | left(wd(8,bin(13))) | wd(8,bin(13)) | pad0(left(wd(8,bin(13)))) | pad0(wd(8,bin(13))) | pad0(wd(8,10,bin(13))) | pad0(wd(8,6,bin(13)));
     73        sout | bin(0) | bin(13) | upcase(bin(13)) | nobase(bin(13)) | left(wd(8,bin(13))) | wd(8,bin(13)) | nonl;
     74        sout | pad0(left(wd(8,bin(13)))) | pad0(wd(8,bin(13))) | pad0(wd(8,10,bin(13))) | pad0(wd(8,6,bin(13)));
    5875
    5976
     
    6279        printf( "%g  %8g %#8g %g %8g %8.0g %#8.0g %8.2g %#8.2g %-8.2g %-8.2g %-#8.2g %-+8.2g %-+#8.2g %08.2g %8.2E %8.2a %#8.2A %#8.2e\n",
    6380                    0.0,3.0F,3.0F, f,  f,    f,     f,    f,     f,  3.0F,      f,      f,      f,       f,     f,    f,    f,     f,     f );
    64         sout | 0.0 | wd(8, 3.0F) | nodp(wd(8, 3.0F)) | f | wd(8, f) | ws(8,0, f) | nodp(ws(8,0, f)) | ws(8,2, f) | nodp(ws(8,2, f)) | left(ws(8,2, 3.0F)) | left(ws(8,2, f)) | left(nodp(ws(8,2, f))) | left(sign(ws(8,2, f))) | left(sign(nodp(ws(8,2, f)))) | pad0(ws(8,2, f)) | upcase(wd(8,2, sci(f))) | wd(8,2, hex(f)) | upcase(wd(8,2, hex(f))) | nodp(wd(8,2, sci(f)));
     81        sout | 0.0 | wd(8, 3.0F) | nodp(wd(8, 3.0F)) | f | wd(8, f) | ws(8,0, f) | nodp(ws(8,0, f)) | ws(8,2, f) | nodp(ws(8,2, f)) | nonl;
     82        sout | left(ws(8,2, 3.0F)) | left(ws(8,2, f)) | left(nodp(ws(8,2, f))) | left(sign(ws(8,2, f))) | left(sign(nodp(ws(8,2, f)))) | nonl;
     83        sout | pad0(ws(8,2, f)) | upcase(wd(8,2, sci(f))) | wd(8,2, hex(f)) | upcase(wd(8,2, hex(f))) | nodp(wd(8,2, sci(f)));
    6584
    6685        sout | "double";
     
    6887        printf( "%g  %#8f %g %8f %#8.0f %8.0f %8.2f %-8.2f %-+#8.2f %08.2F %8.2E %8.2a %8.2A %8.2e\n",
    6988                        0.0,  3.0, d,  d,     d,    d,    d,     d,       d,     d,    d,    d,    d,    d );
    70         sout | 0.0 | wd(8, 3.0) | d | wd(8, d) | nodp(wd(8,0, d)) | wd(8,0, d) | wd(8,2, d) | left(wd(8,2, d)) | left(sign(wd(8,2, d))) | pad0(upcase(wd(8,2, d))) | upcase(wd(8,2, sci(d))) | wd(8,2, hex(d)) | upcase(wd(8,2, hex(d))) | wd(8,2, sci(d));
     89        sout | 0.0 | wd(8, 3.0) | d | wd(8, d) | nodp(wd(8,0, d)) | wd(8,0, d) | wd(8,2, d) | nonl;
     90        sout | left(wd(8,2, d)) | left(sign(wd(8,2, d))) | pad0(upcase(wd(8,2, d))) | upcase(wd(8,2, sci(d))) | wd(8,2, hex(d)) | upcase(wd(8,2, hex(d))) | wd(8,2, sci(d));
    7191
    7292        sout | "long double";
     
    7494        printf( "%Lg  %#8Lf %Lg %8Lf %#8.0Lf %8.0Lf %8.2Lf %-8.2Lf %-+#8.2Lf %08.2LF %8.2LE %8.2La %8.2LA %8.2Le\n",
    7595                        0.0L,  3.0L, ld,  ld,     ld,    ld,    ld,     ld,       ld,     ld,    ld,    ld,    ld,    ld );
    76         sout | 0.0L | wd(8, 3.0L) | ld | wd(8, ld) | nodp(wd(8,0, ld)) | wd(8,0, ld) | wd(8,2, ld) | left(wd(8,2, ld)) | left(sign(wd(8,2, ld))) | pad0(upcase(wd(8,2, ld))) | upcase(wd(8,2, sci(ld))) | wd(8,2, hex(ld)) | upcase(wd(8,2, hex(ld))) | wd(8,2, sci(ld));
     96        sout | 0.0L | wd(8, 3.0L) | ld | wd(8, ld) | nodp(wd(8,0, ld)) | wd(8,0, ld) | wd(8,2, ld) | nonl;
     97        sout | left(wd(8,2, ld)) | left(sign(wd(8,2, ld))) | pad0(upcase(wd(8,2, ld))) | upcase(wd(8,2, sci(ld))) | wd(8,2, hex(ld)) | upcase(wd(8,2, hex(ld))) | wd(8,2, sci(ld));
    7798
    7899
     
    80101        char c = 'a';
    81102        printf( "%c %2c %5c %-5c %hho %#hho %hhx %#hhx %#8hhx %#8hhX %-8c %8c\n", c, c, c, c, c, c, c, c, c, c, c, c );
    82         sout | c | ' ' | wd(2,c) | wd(5,c) | left(wd(5,c)) | nobase(oct(c)) | oct(c) | nobase(hex(c)) | hex(c) | wd(8,hex(c)) | upcase(wd(8,hex(c))) | left(wd(8,c)) | wd(8,c);
     103        sout | c | ' ' | wd(2,c) | wd(5,c) | left(wd(5,c)) | nobase(oct(c)) | oct(c) | nonl;
     104        sout | nobase(hex(c)) | hex(c) | wd(8,hex(c)) | upcase(wd(8,hex(c))) | left(wd(8,c)) | wd(8,c);
    83105
    84106        sout | nl | "string";
  • tests/pybin/settings.py

    rb7d6a36 r6a490b2  
    2323class Architecture:
    2424        KnownArchitectures = {
    25                 'x64'           : 'x64',
    26                 'x86-64'        : 'x64',
    27                 'x86_64'        : 'x64',
    28                 'x86'           : 'x86',
    29                 'aarch64'       : 'arm',
    30                 'i386'          : 'x86',
    31                 'i486'          : 'x86',
    32                 'i686'          : 'x86',
    33                 'Intel 80386'   : 'x86',
    34                 'arm'           : 'arm',
    35                 'ARM'           : 'arm',
     25                'x64'         : 'x64',
     26                'x86-64'      : 'x64',
     27                'x86_64'      : 'x64',
     28                'x86'         : 'x86',
     29                'aarch64'     : 'arm',
     30                'i386'        : 'x86',
     31                'i486'        : 'x86',
     32                'i686'        : 'x86',
     33                'Intel 80386' : 'x86',
     34                'arm'         : 'arm',
     35                'ARM'         : 'arm',
    3636        }
    3737
     
    7777                        print("updated to %s" % self.target)
    7878
    79         def match(self, arch):
     79        def filter(self, tests):
     80                return [test for test in tests if not test.arch or self.target == test.arch]
    8081                return True if not arch else self.target == arch
    8182
    82         @classmethod
    83         def make_canonical(_, arch):
     83        @staticmethod
     84        def make_canonical(arch):
    8485                return Architecture.KnownArchitectures[arch]
    8586
     
    104105                self.total  = Timeouts.check(tg)
    105106
    106         @classmethod
    107         def check(_, value):
     107        @staticmethod
     108        def check(value):
    108109                if value < 1:
    109110                        print("Timeouts must be at least 1 second", file=sys.stderr)
     
    113114
    114115def init( options ):
     116        global all_arch
     117        global all_debug
     118        global all_install
    115119        global arch
    116120        global archive
     121        global continue_
    117122        global debug
    118         global distcc
    119123        global dry_run
    120124        global generating
     
    123127        global output_width
    124128        global timeout
     129        global timeout2gdb
    125130
    126         arch         = Architecture(options.arch)
     131        all_arch     = [Architecture(o) for o in list(dict.fromkeys(options.arch   ))] if options.arch else [Architecture(None)]
     132        all_debug    = [Debug(o)        for o in list(dict.fromkeys(options.debug  ))]
     133        all_install  = [Install(o)      for o in list(dict.fromkeys(options.install))]
    127134        archive      = os.path.abspath(os.path.join(original_path, options.archive_errors)) if options.archive_errors else None
    128         debug        = Debug(options.debug)
     135        continue_    = options.continue_
    129136        dry_run      = options.dry_run # must be called before tools.config_hash()
    130         distcc       = "DISTCC_CFA_PATH=~/.cfadistcc/%s/cfa" % tools.config_hash()
    131137        generating   = options.regenerate_expected
    132         install      = Install(options.install)
    133138        make         = ['make']
    134139        output_width = 24
    135140        timeout      = Timeouts(options.timeout, options.global_timeout)
     141        timeout2gdb  = options.timeout_with_gdb
    136142
    137143        # if we distribute, distcc errors will fail tests, use log file for distcc
     
    146152
    147153def validate():
     154        """Validate the current configuration and update globals"""
     155
     156        global distcc
     157        distcc       = "DISTCC_CFA_PATH=~/.cfadistcc/%s/cfa" % tools.config_hash()
    148158        errf = os.path.join(BUILDDIR, ".validate.err")
    149159        make_ret, out = tools.make( ".validate", error_file = errf, output_file=subprocess.DEVNULL, error=subprocess.DEVNULL )
  • tests/pybin/test_run.py

    rb7d6a36 r6a490b2  
    4040                return os.path.normpath( os.path.join(settings.BUILDDIR, self.path, self.name) )
    4141
    42         @classmethod
    43         def valid_name(_, name):
     42        @staticmethod
     43        def valid_name(name):
    4444                return not name.endswith( ('.c', '.cc', '.cpp', '.cfa') )
    4545
    46         @classmethod
    47         def from_target(_, target):
     46        @staticmethod
     47        def new_target(target, arch):
    4848                test = Test()
    4949                test.name = os.path.basename(target)
    5050                test.path = os.path.relpath (os.path.dirname(target), settings.SRCDIR)
    51                 test.arch = settings.arch.target if settings.arch.cross_compile else ''
     51                test.arch = arch.target if arch else ''
    5252                return test
    5353
     
    7272                return text
    7373
    74         @classmethod
    75         def fmtDur( cls, duration ):
     74        @staticmethod
     75        def fmtDur( duration ):
    7676                if duration :
    7777                        hours, rem = divmod(duration, 3600)
  • tests/pybin/tools.py

    rb7d6a36 r6a490b2  
    7575                                        return proc.returncode, out.decode("utf-8") if out else None
    7676                                except subprocess.TimeoutExpired:
    77                                         proc.send_signal(signal.SIGABRT)
    78                                         proc.communicate()
    79                                         return 124, str(None)
     77                                        if settings.timeout2gdb:
     78                                                print("Process {} timeout".format(proc.pid))
     79                                                proc.communicate()
     80                                                return 124, str(None)
     81                                        else:
     82                                                proc.send_signal(signal.SIGABRT)
     83                                                proc.communicate()
     84                                                return 124, str(None)
    8085
    8186        except Exception as ex:
     
    322327        raise argparse.ArgumentTypeError(msg)
    323328
     329# Convert a function that converts a string to one that converts comma separated string.
     330def comma_separated(elements):
     331    return lambda string: [elements(part) for part in string.split(',')]
     332
    324333def fancy_print(text):
    325334        column = which('column')
  • tests/test.py

    rb7d6a36 r6a490b2  
    66
    77import argparse
     8import itertools
    89import re
    910import sys
     
    2930                        test.path = match.group(1)
    3031                        test.arch = match.group(3)[1:] if match.group(3) else None
    31                         if settings.arch.match(test.arch):
    32                                 expected.append(test)
     32                        expected.append(test)
    3333
    3434        path_walk( match_test )
     
    5353                ]
    5454
     55        # sort the test alphabetically for convenience
     56        test_list.sort(key=lambda t: ('~' if t.arch else '') + t.target() + (t.arch if t.arch else ''))
     57
    5558        return test_list
    5659
     
    6467                for testname in options.tests :
    6568                        testname = canonical_path( testname )
     69                        # first check if this is a valid name to regenerate
    6670                        if Test.valid_name(testname):
     71                                # this is a valid name, let's check if it already exists
    6772                                found = [test for test in all_tests if canonical_path( test.target() ) == testname]
    68                                 tests.append( found[0] if len(found) == 1 else Test.from_target(testname) )
     73                                if not found:
     74                                        # it's a new name, create it according to the name and specified architecture
     75                                        if options.arch:
     76                                                # user specified one or multiple architectures, assume the tests will have architecture specific results
     77                                                tests.extend( [Test.new_target(testname, arch) for arch in settings.all_arch] )
     78                                        else:
     79                                                # user didn't specify an architecture, just create a cross platform test
     80                                                tests.append( Test.new_target( testname, None ) )
     81                                elif len(found) == 1 and not found[0].arch:
     82                                        # we found a single test, the user better be wanting to create a cross platform test
     83                                        if options.arch:
     84                                                print('ERROR: "%s", test has no specified architecture but --arch was specified, ignoring it' % testname, file=sys.stderr)
     85                                        else:
     86                                                tests.append( found[0] )
     87                                else:
     88                                        # this test is already cross platform, just add a test for each platform the user asked
     89                                        tests.extend( [Test.new_target(testname, arch) for arch in settings.all_arch] )
     90
     91                                        # print a warning if it users didn't ask for a specific architecture
     92                                        if not options.arch:
     93                                                print('WARNING: "%s", test has architecture specific expected files but --arch was not specified, regenerating only for current host' % testname, file=sys.stderr)
     94
    6995                        else :
    7096                                print('ERROR: "%s", tests are not allowed to end with a C/C++/CFA extension, ignoring it' % testname, file=sys.stderr)
     
    76102
    77103                        if test :
    78                                 tests.append( test[0] )
     104                                tests.extend( test )
    79105                        else :
    80106                                print('ERROR: No expected file for test %s, ignoring it' % testname, file=sys.stderr)
     
    86112        # create a parser with the arguments for the tests script
    87113        parser = argparse.ArgumentParser(description='Script which runs cforall tests')
    88         parser.add_argument('--debug', help='Run all tests in debug or release', type=yes_no, default='yes')
    89         parser.add_argument('--install', help='Run all tests based on installed binaries or tree binaries', type=yes_no, default='no')
    90         parser.add_argument('--arch', help='Test for specific architecture', type=str, default='')
     114        parser.add_argument('--debug', help='Run all tests in debug or release', type=comma_separated(yes_no), default='yes')
     115        parser.add_argument('--install', help='Run all tests based on installed binaries or tree binaries', type=comma_separated(yes_no), default='no')
     116        parser.add_argument('--arch', help='Test for specific architecture', type=comma_separated(str), default=None)
     117        parser.add_argument('--continue', help='When multiple specifications are passed (debug/install/arch), sets whether or not to continue if the last specification failed', type=yes_no, default='yes', dest='continue_')
    91118        parser.add_argument('--timeout', help='Maximum duration in seconds after a single test is considered to have timed out', type=int, default=60)
    92119        parser.add_argument('--global-timeout', help='Maximum cumulative duration in seconds after the ALL tests are considered to have timed out', type=int, default=7200)
     120        parser.add_argument('--timeout-with-gdb', help='Instead of killing the command when it times out, orphan it and print process id to allow gdb to attach', type=yes_no, default="no")
    93121        parser.add_argument('--dry-run', help='Don\'t run the tests, only output the commands', action='store_true')
    94122        parser.add_argument('--list', help='List all test available', action='store_true')
     
    178206
    179207                else:
    180                         with open (out_file, "r") as myfile:
    181                                 error = myfile.read()
     208                        if os.stat(out_file).st_size < 1048576:
     209                                with open (out_file, "r") as myfile:
     210                                        error = myfile.read()
     211                        else:
     212                                error = "Output log can't be read, file is bigger than 1MB, see {} for actual error\n".format(out_file)
    182213
    183214                        ret, info = core_info(exe_file)
     
    215246                return False, ""
    216247        except Exception as ex:
    217                 print("Unexpected error in worker thread: %s" % ex, file=sys.stderr)
     248                print("Unexpected error in worker thread running {}: {}".format(t.target(), ex), file=sys.stderr)
    218249                sys.stderr.flush()
    219250                return False, ""
     
    278309        make('clean', output_file=subprocess.DEVNULL, error=subprocess.DEVNULL)
    279310
    280         return 1 if failed else 0
     311        return failed
    281312
    282313
     
    292323        settings.init( options )
    293324
    294         # fetch the liest of all valid tests
    295         all_tests = list_tests( options.include, options.exclude )
    296 
    297 
    298         # if user wants all tests than no other treatement of the test list is required
    299         if options.all or options.list or options.list_comp or options.include :
    300                 tests = all_tests
    301 
    302         #otherwise we need to validate that the test list that was entered is valid
    303         else :
    304                 tests = valid_tests( options )
    305 
    306         # make sure we have at least some test to run
    307         if not tests :
    308                 print('ERROR: No valid test to run', file=sys.stderr)
    309                 sys.exit(1)
    310 
    311 
    312         # sort the test alphabetically for convenience
    313         tests.sort(key=lambda t: (t.arch if t.arch else '') + t.target())
    314 
    315325        # users may want to simply list the tests
    316326        if options.list_comp :
    317                 print("-h --help --debug --dry-run --list --arch --all --regenerate-expected --archive-errors --install --timeout --global-timeout -j --jobs ", end='')
     327                # fetch the liest of all valid tests
     328                tests = list_tests( None, None )
     329
     330                # print the possible options
     331                print("-h --help --debug --dry-run --list --arch --all --regenerate-expected --archive-errors --install --timeout --global-timeout --timeout-with-gdb -j --jobs -I --include -E --exclude --continue ", end='')
    318332                print(" ".join(map(lambda t: "%s" % (t.target()), tests)))
    319333
    320334        elif options.list :
    321                 print("Listing for %s:%s"% (settings.arch.string, settings.debug.string))
     335                # fetch the liest of all valid tests
     336                tests = list_tests( options.include, options.exclude )
     337
     338                # print the available tests
    322339                fancy_print("\n".join(map(lambda t: t.toString(), tests)))
    323340
    324341        else :
    325                 # check the build configuration works
     342                # fetch the liest of all valid tests
     343                all_tests = list_tests( options.include, options.exclude )
     344
     345                # if user wants all tests than no other treatement of the test list is required
     346                if options.all or options.include :
     347                        tests = all_tests
     348
     349                #otherwise we need to validate that the test list that was entered is valid
     350                else :
     351                        tests = valid_tests( options )
     352
     353                # make sure we have at least some test to run
     354                if not tests :
     355                        print('ERROR: No valid test to run', file=sys.stderr)
     356                        sys.exit(1)
     357
     358                # prep invariants
    326359                settings.prep_output(tests)
    327                 settings.validate()
    328 
    329                 options.jobs, forceJobs = job_count( options, tests )
    330                 settings.update_make_cmd(forceJobs, options.jobs)
    331 
    332                 print('%s %i tests on %i cores (%s:%s)' % (
    333                         'Regenerating' if settings.generating else 'Running',
    334                         len(tests),
    335                         options.jobs,
    336                         settings.arch.string,
    337                         settings.debug.string
    338                 ))
    339 
    340                 # otherwise run all tests and make sure to return the correct error code
    341                 sys.exit( run_tests(tests, options.jobs) )
     360                failed = 0
     361
     362                # for each build configurations, run the test
     363                for arch, debug, install in itertools.product(settings.all_arch, settings.all_debug, settings.all_install):
     364                        settings.arch    = arch
     365                        settings.debug   = debug
     366                        settings.install = install
     367
     368                        # filter out the tests for a different architecture
     369                        # tests are the same across debug/install
     370                        local_tests = settings.arch.filter( tests )
     371                        options.jobs, forceJobs = job_count( options, local_tests )
     372                        settings.update_make_cmd(forceJobs, options.jobs)
     373
     374                        # check the build configuration works
     375                        settings.validate()
     376
     377                        # print configuration
     378                        print('%s %i tests on %i cores (%s:%s)' % (
     379                                'Regenerating' if settings.generating else 'Running',
     380                                len(local_tests),
     381                                options.jobs,
     382                                settings.arch.string,
     383                                settings.debug.string
     384                        ))
     385
     386                        # otherwise run all tests and make sure to return the correct error code
     387                        failed = run_tests(local_tests, options.jobs)
     388                        if failed:
     389                                result = 1
     390                                if not settings.continue_:
     391                                        break
     392
     393
     394                sys.exit( failed )
  • tests/vector.cfa

    rb7d6a36 r6a490b2  
    1414//
    1515
     16#include <vector.hfa>
    1617#include <fstream.hfa>
    17 #include <vector.hfa>
    1818
    1919#undef assert
     
    2828int main() {
    2929        vector( int ) iv;
     30
     31        assert( ((uintptr_t)&iv.storage.storage ) == (((uintptr_t)&iv)) );
     32        assert( ((uintptr_t)&iv.storage.capacity) == (((uintptr_t)&iv) + sizeof(void *)) );
     33        assert( ((uintptr_t)&iv.size            ) == (((uintptr_t)&iv) + sizeof(void *) + sizeof(size_t)) );
    3034
    3135        assert( empty( &iv ) );
  • tools/build/push2dist.sh

    rb7d6a36 r6a490b2  
    1919# echo "Copying to machines : ${hosts} (hash=${hash})"
    2020
    21 files="../../../driver/cfa ../../../driver/cfa-cpp ../../../driver/cc1 ../../../driver/as $(find . -name '*.c*' | tr '\n' ' ')"
     21files="../../../driver/cfa ../../../driver/cfa-cpp ../../../driver/cc1 ../../../driver/as defines.hfa $(find . -name '*.c*' | tr '\n' ' ')"
    2222# echo "Files ${files}"
    2323
  • tools/cfa.nanorc

    rb7d6a36 r6a490b2  
    1414
    1515# Declarations
    16 color brightgreen "\<(struct|union|typedef|trait|coroutine|monitor|thread)\>"
    17 color brightgreen "\<(with)\>"
     16color brightgreen "\<(struct|union|typedef|trait|coroutine|generator)\>"
     17color brightgreen "\<(monitor|thread|with)\>"
    1818
    1919# Control Flow Structures
    2020color brightyellow "\<(if|else|while|do|for|switch|choose|case|default)\>"
    21 color brightyellow "\<(disable|enable|waitfor|when|timeout)\>"
     21color brightyellow "\<(disable|enable|waitfor|when|timeout|suspend)\>"
    2222color brightyellow "\<(try|catch(Resume)?|finally)\>"
    2323
     
    2626
    2727# Escaped Keywords, now Identifiers.
    28 color white "`\w+`"
     28color white "``\w+"
    2929
    3030# Operator Names
     
    3737## Update/Redistribute
    3838# GCC builtins
    39 color cyan "__attribute__[[:space:]]*\(\([^()]*(\([^()]*\)[^()]*)*\)\)"
     39color cyan "__attribute__[[:space:]]*\(\(([^)]|[^)]\))*\)\)"
    4040##color cyan "__(aligned|asm|builtin|hidden|inline|packed|restrict|section|typeof|weak)__"
    4141
  • tools/vscode/uwaterloo.cforall-0.1.0/package.json

    rb7d6a36 r6a490b2  
    22        "name": "cforall",
    33        "version": "0.1.0",
    4         "displayName": "Cforall Language Support",
     4        "displayName": "Cāˆ€ (C-for-all) Language Support",
    55        "description": "Cforall - colorizer, grammar and snippets.",
    66        "publisher": "uwaterloo",
     
    99                "vscode": "^1.5.0"
    1010        },
    11         "icon": "images/icon.svg",
     11        "icon": "images/icon.png",
    1212        "categories": [
    13                 "Languages",
     13                "Programming Languages",
    1414                "Linters",
    1515                "Other"
    1616        ],
     17        "activationEvents": [
     18                "onLanguage:cforall"
     19        ],
     20        "main": "./client/main.js",
    1721        "contributes": {
    1822                "languages": [
     
    2125                                "aliases": [
    2226                                        "Cāˆ€",
     27                                        "CForAll",
    2328                                        "Cforall",
    24                                         "CForAll",
    2529                                        "cforall"
    2630                                ],
    2731                                "extensions": [
    28                                         ".cf"
     32                                        ".cfa",
     33                                        ".hfa",
     34                                        ".ifa"
    2935                                ],
    3036                                "configuration": "./cforall.configuration.json"
     
    3440                        {
    3541                                "language": "cforall",
    36                                 "scopeName": "source.cf",
    37                                 "path": "./syntaxes/cfa.tmLanguage"
     42                                "scopeName": "source.cfa",
     43                                "path": "./syntaxes/cfa.tmLanguage.json"
    3844                        }
    39                 ]
     45                ],
     46                "configuration": {
     47                        "type": "object",
     48                        "title": "Example configuration",
     49                        "properties": {
     50                                "cforall.maxNumberOfProblems": {
     51                                        "scope": "resource",
     52                                        "type": "number",
     53                                        "default": 100,
     54                                        "description": "Controls the maximum number of problems produced by the server."
     55                                },
     56                                "cforall.trace.server": {
     57                                        "scope": "window",
     58                                        "type": "string",
     59                                        "enum": [
     60                                                "off",
     61                                                "messages",
     62                                                "verbose"
     63                                        ],
     64                                        "default": "off",
     65                                        "description": "Traces the communication between VS Code and the language server."
     66                                }
     67                        }
     68                }
     69        },
     70        "dependencies": {
     71                "vscode-languageclient": "^4.1.4"
     72        },
     73        "devDependencies": {
     74                "vscode-languageclient": "^4.1.4"
    4075        }
    4176}
Note: See TracChangeset for help on using the changeset viewer.