Changes in / [6a490b2:b7d6a36]


Ignore:
Files:
11 added
96 deleted
137 edited

Legend:

Unmodified
Added
Removed
  • Jenkinsfile

    r6a490b2 rb7d6a36  
    126126                        }
    127127
    128                         sh "${SrcDir}/configure CXX=${Settings.Compiler.CXX} CC=${Settings.Compiler.CC} ${Settings.Architecture.flags} AR=gcc-ar RANLIB=gcc-ranlib ${targets} --quiet --prefix=${BuildDir}"
     128                        sh "${SrcDir}/configure CXX=${Settings.Compiler.CXX} CC=${Settings.Compiler.CC} ${Settings.Architecture.flags} ${targets} --quiet"
    129129
    130130                        // Configure libcfa
     
    155155                dir (BuildDir) {
    156156                        sh "make -j 8 --no-print-directory -C libcfa/${Settings.Architecture.name}-nodebug"
    157                 }
    158         }
    159 
    160         build_stage('Build : install', true) {
    161                 // Build outside of the src tree to ease cleaning
    162                 dir (BuildDir) {
    163                         sh "make -j 8 --no-print-directory install"
    164157                }
    165158        }
     
    186179                echo "Archiving core dumps"
    187180                dir (BuildDir) {
    188                         archiveArtifacts artifacts: "tests/crashes/**/*,lib/**/lib*.so*", fingerprint: true
     181                        archiveArtifacts artifacts: "tests/crashes/**/*", fingerprint: true
    189182                }
    190183                throw err
     
    332325        public String CXX
    333326        public String CC
    334         public String lto
    335 
    336         CC_Desc(String name, String CXX, String CC, String lto) {
     327
     328        CC_Desc(String name, String CXX, String CC) {
    337329                this.name = name
    338330                this.CXX = CXX
    339                 this.CC  = CC
    340                 this.lto = lto
     331                this.CC = CC
    341332        }
    342333}
     
    373364                switch( param.Compiler ) {
    374365                        case 'gcc-9':
    375                                 this.Compiler = new CC_Desc('gcc-9', 'g++-9', 'gcc-9', '-flto=auto')
     366                                this.Compiler = new CC_Desc('gcc-9', 'g++-9', 'gcc-9')
    376367                        break
    377368                        case 'gcc-8':
    378                                 this.Compiler = new CC_Desc('gcc-8', 'g++-8', 'gcc-8', '-flto=auto')
     369                                this.Compiler = new CC_Desc('gcc-8', 'g++-8', 'gcc-8')
    379370                        break
    380371                        case 'gcc-7':
    381                                 this.Compiler = new CC_Desc('gcc-7', 'g++-7', 'gcc-7', '-flto=auto')
     372                                this.Compiler = new CC_Desc('gcc-7', 'g++-7', 'gcc-7')
    382373                        break
    383374                        case 'gcc-6':
    384                                 this.Compiler = new CC_Desc('gcc-6', 'g++-6', 'gcc-6', '-flto=auto')
     375                                this.Compiler = new CC_Desc('gcc-6', 'g++-6', 'gcc-6')
    385376                        break
    386377                        case 'gcc-5':
    387                                 this.Compiler = new CC_Desc('gcc-5', 'g++-5', 'gcc-5', '-flto=auto')
     378                                this.Compiler = new CC_Desc('gcc-5', 'g++-5', 'gcc-5')
    388379                        break
    389380                        case 'gcc-4.9':
    390                                 this.Compiler = new CC_Desc('gcc-4.9', 'g++-4.9', 'gcc-4.9', '-flto=auto')
     381                                this.Compiler = new CC_Desc('gcc-4.9', 'g++-4.9', 'gcc-4.9')
    391382                        break
    392383                        case 'clang':
    393                                 this.Compiler = new CC_Desc('clang', 'clang++-6.0', 'gcc-6', '-flto=thin -flto-jobs=0')
     384                                this.Compiler = new CC_Desc('clang', 'clang++-6.0', 'gcc-6')
    394385                        break
    395386                        default :
     
    448439        // prepare the properties
    449440        properties ([                                                                                                   \
    450                 buildDiscarder(logRotator(                                                                              \
    451                         artifactDaysToKeepStr: '',                                                                      \
    452                         artifactNumToKeepStr: '',                                                                       \
    453                         daysToKeepStr: '730',                                                                           \
    454                         numToKeepStr: '1000'                                                                            \
    455                 )),                                                                                                             \
    456441                [$class: 'ParametersDefinitionProperty',                                                                \
    457442                        parameterDefinitions: [                                                                         \
  • benchmark/Makefile.am

    r6a490b2 rb7d6a36  
    1111## Created On       : Sun May 31 09:08:15 2015
    1212## Last Modified By : Peter A. Buhr
    13 ## Last Modified On : Tue Mar 10 11:41:18 2020
    14 ## Update Count     : 258
     13## Last Modified On : Sat Jan 25 09:20:44 2020
     14## Update Count     : 255
    1515###############################################################################
    1616
     
    108108creation_cfa_coroutine_DURATION = 100000000
    109109creation_cfa_coroutine_eager_DURATION = 10000000
    110 creation_cfa_generator_DURATION = 1000000000
    111110creation_upp_coroutine_DURATION = ${creation_cfa_coroutine_eager_DURATION}
    112111creation_cfa_thread_DURATION = 10000000
     
    514513compile-typeof$(EXEEXT):
    515514        $(CFACOMPILE) -fsyntax-only -w $(testdir)/typeof.cfa
    516 
    517 ## =========================================================================================================
    518 
    519 size$(EXEEXT) : size-cfa.runquiet
    520 
    521 size-cfa$(EXEEXT):
    522         $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/size/size.cfa
  • benchmark/Makefile.in

    r6a490b2 rb7d6a36  
    447447creation_cfa_coroutine_DURATION = 100000000
    448448creation_cfa_coroutine_eager_DURATION = 10000000
    449 creation_cfa_generator_DURATION = 1000000000
    450449creation_upp_coroutine_DURATION = ${creation_cfa_coroutine_eager_DURATION}
    451450creation_cfa_thread_DURATION = 10000000
     
    11481147        $(CFACOMPILE) -fsyntax-only -w $(testdir)/typeof.cfa
    11491148
    1150 size$(EXEEXT) : size-cfa.runquiet
    1151 
    1152 size-cfa$(EXEEXT):
    1153         $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/size/size.cfa
    1154 
    11551149# Tell versions [3.59,3.63) of GNU make to not export all variables.
    11561150# Otherwise a system limit (for SysV at least) may be exceeded.
  • benchmark/creation/cfa_gen.cfa

    r6a490b2 rb7d6a36  
    1 #include "../bench.h"
     1#include "bench.h"
    22
    3 generator G {
     3struct C {
    44        volatile int restart; // ensure compiler does not optimize away all the code
    55};
    6 void ?{}( G & g ) { g.restart = 0; }
    7 void main( G & ) {}
     6void ?{}( C & c ) { c.restart = 0; }
     7void main( C & ) {}
    88
    99int main( int argc, char * argv[] ) {
     
    1111        BENCH(
    1212                for ( times ) {
    13                          G g;
     13                         C c;
    1414                },
    1515                result
  • benchmark/ctxswitch/cfa_cor.cfa

    r6a490b2 rb7d6a36  
    22#include <thread.hfa>
    33
    4 #include "../bench.h"
     4#include "bench.h"
    55
    6 coroutine C {};
     6coroutine C {} c;
    77void main( __attribute__((unused)) C & ) {
    8         for () {
    9                 suspend;
     8        while () {
     9                suspend();
    1010        }
    1111}
    1212int main( int argc, char * argv[] ) {
    13         C c;
    1413        BENCH_START()
    1514        BENCH(
  • benchmark/ctxswitch/cfa_gen.cfa

    r6a490b2 rb7d6a36  
    11#include "../bench.h"
    22
    3 generator G {};
    4 void main( G & ) {
     3typedef struct {
     4        void * next;
     5} C;
     6
     7void comain( C * c ) {
     8        if ( __builtin_expect(c->next != 0, 1) ) goto *(c->next);
     9        c->next = &&s1;
    510        for () {
    6                 suspend;
     11                return;
     12          s1: ;
    713        }
    814}
    915
    1016int main( int argc, char * argv[] ) {
    11         G g;
    1217        BENCH_START()
     18        C c = { 0 };
    1319        BENCH(
    1420                for ( times ) {
    15                         resume( g );
     21                        comain( &c );
    1622                },
    1723                result
  • configure

    r6a490b2 rb7d6a36  
    25572557# don't use the default CFLAGS as they unconditonnaly add -O2
    25582558: ${CFLAGS=""}
    2559 : ${CXXFLAGS=""}
    25602559
    25612560am__api_version='1.15'
  • configure.ac

    r6a490b2 rb7d6a36  
    1414# don't use the default CFLAGS as they unconditonnaly add -O2
    1515: ${CFLAGS=""}
    16 : ${CXXFLAGS=""}
    1716
    1817AM_INIT_AUTOMAKE([subdir-objects])
  • doc/bibliography/pl.bib

    r6a490b2 rb7d6a36  
    99%    Predefined journal names:
    1010%  acmcs: Computing Surveys             acta: Acta Infomatica
     11@string{acta="Acta Infomatica"}
    1112%  cacm: Communications of the ACM
    1213%  ibmjrd: IBM J. Research & Development ibmsj: IBM Systems Journal
     
    2122%  tcs: Theoretical Computer Science
    2223
    23 @string{acta="Acta Infomatica"}
    2424string{ieeepds="IEEE Transactions on Parallel and Distributed Systems"}
    2525@string{ieeepds="IEEE Trans. Parallel Distrib. Syst."}
     
    124124    series      = {ACM Distinguished Dissertations},
    125125    year        = 1983,
    126 }
    127 
    128 @article{Zhang19,
    129     keywords    = {Algebraic effects, dynamic scoping, exceptions, parametricity, type systems},
    130     author      = {Zhang, Yizhou and Myers, Andrew C.},
    131     title       = {Abstraction-safe Effect Handlers via Tunneling},
    132     journal     = {Proc. ACM Program. Lang.},
    133     issue_date  = {January 2019},
    134     volume      = {3},
    135     number      = {POPL},
    136     month       = jan,
    137     year        = {2019},
    138     issn        = {2475-1421},
    139     pages       = {5:1--5:29},
    140     articleno   = {5},
    141     publisher   = {ACM},
    142     address     = {New York, NY, USA},
    143 }
    144 
    145 @inproceedings{Zhang16,
    146     keywords    = {Exception tunneling, Genus, exception handling},
    147     author      = {Zhang, Yizhou and Salvaneschi, Guido and Beightol, Quinn and Liskov, Barbara and Myers, Andrew C.},
    148     title       = {Accepting Blame for Safe Tunneled Exceptions},
    149     booktitle   = {Proceedings of the 37th ACM SIGPLAN Conference on Programming Language Design and Implementation},
    150     series      = {PLDI'16},
    151     year        = {2016},
    152     location    = {Santa Barbara, CA, USA},
    153     pages       = {281--295},
    154     publisher   = {ACM},
    155     address     = {New York, NY, USA},
    156126}
    157127
     
    428398    journal     = sigplan,
    429399    year        = 1981,
    430     month       = feb,
    431     volume      = 16,
    432     number      = 2,
    433     pages       = {48-52},
     400    month       = feb, volume = 16, number = 2, pages = {48-52},
    434401    comment     = {
    435402        A one-pass, top-down algorithm for overload resolution.  Input is a
     
    510477    title       = {An Alternative to Subclassing},
    511478    journal     = sigplan,
    512     volume      = {21},
    513     number      = {11},
     479    volume      = {21},    number = {11},
    514480    pages       = {424-428},
    515     month       = nov,
    516     year        = 1986,
     481    month       = nov, year = 1986,
    517482    comment     = {
    518483        The Smalltalk class hierarchy has three uses: factoring out code;
     
    568533    isbn        = {3-540-66538-2},
    569534    location    = {Toulouse, France},
     535    doi         = {http://doi.acm.org/10.1145/318773.319251},
    570536    publisher   = {Springer},
    571537    address     = {London, UK},
     
    665631    year        = 2010,
    666632    pages       = {39--50},
     633    numpages    = {12},
    667634    publisher   = {IEEE Computer Society},
    668635    address     = {Washington, DC, USA},
     
    955922}
    956923
    957 @manual{C99,
    958     keywords    = {ISO/IEC C 9899},
    959     contributer = {pabuhr@plg},
    960     key         = {C99},
    961     title       = {C Programming Language {ISO/IEC} 9899:1999(E)},
    962     edition     = {2nd},
    963     publisher   = {International Standard Organization},
    964     address     = {\href{https://webstore.ansi.org/Standards/INCITS/INCITSISOIEC98991999R2005}{https://webstore.ansi.org/\-Standards/\-INCITS/\-INCITSISOIEC98991999R2005}},
    965     year        = 1999,
    966 }
    967 
    968924@manual{C11,
    969925    keywords    = {ISO/IEC C 11},
     
    13491305    location    = {London, United Kingdom},
    13501306    pages       = {41--53},
     1307    numpages    = {13},
     1308    url         = {http://doi.acm.org/10.1145/360204.360207},
     1309    doi         = {10.1145/360204.360207},
     1310    acmid       = {360207},
    13511311    publisher   = {ACM},
    13521312    address     = {New York, NY, USA},
     
    24482408    year        = 1993,
    24492409    pages       = {201--208},
     2410    url         = {http://doi.acm.org/10.1145/155360.155580},
    24502411    publisher   = {ACM},
    24512412    address     = {New York, NY, USA},
     
    26452606    location    = {Boulder, Colorado, USA},
    26462607    pages       = {91--97},
     2608    numpages    = {7},
    26472609    publisher   = {ACM},
    26482610    address     = {New York, NY, USA},
     
    26752637    issn        = {0004-5411},
    26762638    pages       = {215--225},
     2639    numpages    = {11},
     2640    url         = {http://doi.acm.org/10.1145/321879.321884},
     2641    doi         = {10.1145/321879.321884},
     2642    acmid       = {321884},
    26772643    publisher   = {ACM},
    26782644    address     = {New York, NY, USA},
     
    27422708}
    27432709
    2744 @misc{Drepper13,
    2745     keywords    = {thread-local storage},
    2746     contributer = {pabuhr@plg},
    2747     author      = {Ulrich Drepper},
    2748     title       = {{ELF} Handling For Thread-Local Storage},
    2749     year        = 2013,
    2750     month       = aug,
    2751     note        = {WikipediA},
    2752     howpublished= {\href{http://www.akkadia.org/drepper/tls.pdf}
    2753                   {http://\-www.akkadia.org/\-drepper/\-tls.pdf}},
    2754 }
    2755 
    27562710@misc{Turley99,
    27572711    keywords    = {embedded system, micrprocessor},
     
    27642718    howpublished= {\href{https://www.eetimes.com/author.asp?sectionid=36&doc_id=1287712}
    27652719                  {https://\-www.eetimes.com/\-author.asp?sectionid=\-36&doc_id=1287712}},
    2766 }
    2767 
    2768 @article{Xiao19,
    2769     keywords    = {bug classification, fault trigger, Linux operating system, regression bug},
    2770     contributer = {pabuhr@plg},
    2771     author      = {Guanping Xiao and Zheng Zheng and Beibei Yin and Kishor S. Trivedi and Xiaoting Du and Kai-Yuan Cai},
    2772     title       = {An Empirical Study of Fault Triggers in the Linux Operating System: An Evolutionary Perspective},
    2773     journal     = {IEEE Transactions on Reliability},
    2774     month       = dec,
    2775     year        = 2019,
    2776     volume      = 68,
    2777     number      = 4,
    2778     pages       = {1356-1383},
    27792720}
    27802721
     
    31963137}
    31973138
    3198 @inproceedings{Palix11,
    3199     keywords    = {Linux, fault-finding tools},
    3200     contributer = {pabuhr@plg},
    3201     author      = {Nicolas Palix and Ga\"el Thomas and Suman Saha and Christophe Calv\`es and Julia Lawall and Gilles Muller},
    3202     title       = {Faults in Linux: Ten Years Later},
    3203     booktitle   = {Proc. of the 16 International Conf. on Arch. Support for Prog. Lang. and Oper. Sys.},
    3204     series      = {ASPLOS'11},
    3205     month       = mar,
    3206     year        = 2011,
    3207     location    = {Newport Beach, California, USA},
    3208     pages       = {305-318},
    3209     publisher   = {ACM},
    3210     address     = {New York, NY, USA},
    3211 }
    3212 
    32133139@article{Lamport87,
    32143140    keywords    = {software solutions, mutual exclusion, fast},
     
    33323258    issn        = {0001-0782},
    33333259    pages       = {107--115},
     3260    numpages    = {9},
     3261    url         = {http://doi.acm.org/10.1145/1538788.1538814},
     3262    doi         = {10.1145/1538788.1538814},
     3263    acmid       = {1538814},
    33343264    publisher   = {ACM},
    33353265    address     = {New York, NY, USA},
     
    37343664}
    37353665
    3736 @mastersthesis{Radhakrishnan19,
    3737     author      = {Srihari Radhakrishnan},
    3738     title       = {High Performance Web Servers: A Study In Concurrent Programming Models},
    3739     school      = {School of Computer Sc., University of Waterloo},
    3740     year        = 2019,
    3741     optaddress  = {Waterloo, Ontario, Canada, N2L 3G1},
    3742     note        = {\href{https://uwspace.uwaterloo.ca/handle/10012/14706}{https://\-uwspace.uwaterloo.ca/\-handle/\-10012/\-14706}},
    3743 }
    3744 
    37453666@article{katzenelson83b,
    37463667    contributer = {gjditchfield@plg},
     
    37763697    pages       = {115-138},
    37773698    year        = 1971,
    3778 }
    3779 
    3780 @inproceedings{Hagersten03,
    3781     keywords    = {cache storage, parallel architectures, performance evaluation, shared memory systems},
    3782     author      = {Zoran Radovi\'{c} and Erik Hagersten},
    3783     title       = {Hierarchical backoff locks for nonuniform communication architectures},
    3784     booktitle   = {Proceedings of the Ninth International Symposium on High-Performance Computer Architecture},
    3785     year        = {2003},
    3786     location    = {Anaheim, CA, USA},
    3787     pages       = {241-252},
    3788     publisher   = {IEEE},
    37893699}
    37903700
     
    44554365}
    44564366
    4457 @misc{gccValueLabels,
    4458     keywords    = {gcc extension, value labels},
    4459     contributer = {pabuhr@plg},
    4460     key         = {Labels as Values},
    4461     author      = {{gcc Extension}},
    4462     title       = {Labels as Values},
    4463     year        = {since gcc-3},
    4464     howpublished= {\href{https://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html}
    4465                   {https:\-//gcc.gnu.org/\-onlinedocs/\-gcc/\-Labels-as-Values.html}},
    4466 }
    4467 
    44684367@mastersthesis{Clarke90,
    44694368    keywords    = {concurrency, postponing requests},
     
    45244423}
    45254424
    4526 @misc{libfibre,
    4527     key         = {libfibre},
    4528     author      = {Martin Karsten},
    4529     title       = {{libfibre:~User-Level Threading Runtime}},
    4530     howpublished= {\href{https://git.uwaterloo.ca/mkarsten/libfibre}
    4531                   {https://\-git.uwaterloo.ca/\-mkarsten/\-libfibre}},
    4532     note        = {[Online; accessed 2020-04-15]},
    4533 }
    4534 
    45354425@article{Linda,
    45364426    keywords    = {Linda, concurrency},
     
    45664456}
    45674457
    4568 @inproceedings{Fang06,
    4569     author      = {Fang, Yi and McMillan, Kenneth L. and Pnueli, Amir and Zuck, Lenore D.},
    4570     editor      = {Najm, Elie and Pradat-Peyre, Jean-Fran{\c{c}}ois and Donzeau-Gouge, V{\'e}ronique Vigui{\'e}},
    4571     title       = {Liveness by Invisible Invariants},
    4572     booktitle   = {Formal Techniques for Networked and Distributed Systems - FORTE 2006},
    4573     year        = 2006,
    4574     publisher   = {Springer Berlin Heidelberg},
    4575     address     = {Berlin, Heidelberg},
    4576     pages       = {356--371},
    4577 }
    4578 
    45794458@article{Pierce00,
    4580     keywords    = {Scala, polymorphism, subtyping, type inference},
     4459    keywords    = {Scala},
    45814460    contributer = {a3moss@uwaterloo.ca},
    45824461    author      = {Pierce, Benjamin C. and Turner, David N.},
     
    45904469    issn        = {0164-0925},
    45914470    pages       = {1--44},
     4471    numpages    = {44},
     4472    url         = {http://doi.acm.org/10.1145/345099.345100},
     4473    doi         = {10.1145/345099.345100},
     4474    acmid       = {345100},
    45924475    publisher   = {ACM},
    45934476    address     = {New York, NY, USA},
     4477    keywords    = {polymorphism, subtyping, type inference},
    45944478}
    4595 
    4596 @article{Dice15,
    4597     keywords    = {Concurrency, NUMA, hierarchical locks, locks, multicore, mutex, mutual exclusion, spin locks},
    4598     author      = {Dice, David and Marathe, Virendra J. and Shavit, Nir},
    4599     title       = {Lock Cohorting: A General Technique for Designing NUMA Locks},
    4600     journal     = {ACM Trans. Parallel Comput.},
    4601     issue_date  = {January 2015},
    4602     volume      = 1,
    4603     number      = 2,
    4604     month       = feb,
    4605     year        = 2015,
    4606     pages       = {13:1--13:42},
    4607     publisher   = {ACM},
    4608     address     = {New York, NY, USA},
    4609 }
    46104479
    46114480@article{Sundell08,
     
    46854554    journal     = sigplan,
    46864555    year        = 1989,
    4687     month       = jun,
    4688     volume      = 24,
    4689     number      = 6,
    4690     pages       = {37-48},
     4556    month       = jun, volume = 24, number = 6, pages = {37-48},
    46914557    abstract    = {
    46924558        This paper describes a scheme we have used to manage a large
     
    47594625    contributer = {pabuhr@plg},
    47604626    author      = {Gregory R. Andrews},
    4761     title       = {A Method for Solving Synchronization Problems},
     4627    title       = {A Method for Solving Synronization Problems},
    47624628    journal     = scp,
    47634629    volume      = 13,
     
    50844950    title       = {Multiple Inheritance for {C}{\kern-.1em\hbox{\large\texttt{+\kern-.25em+}}}},
    50854951    booktitle   = {Proceedings of the Spring '87 EUUG Conference},
    5086     month       = may,
    5087     year        = 1987,
     4952    month       = may, year = 1987
    50884953}
    50894954
     
    51304995    year        = 1986,
    51314996    pages       = {313--326},
     4997    numpages    = {14},
    51324998    publisher   = {ACM},
    51334999    address     = {New York, NY, USA},
     
    51455011    year        = 1986,
    51465012    pages       = {327--348},
     5013    numpages    = {22},
    51475014    publisher   = {ACM},
    51485015    address     = {New York, NY, USA},
     
    53415208    year        = 2005,
    53425209    pages       = {146-196},
     5210    numpages    = {51},
    53435211    publisher   = {ACM},
    53445212    address     = {New York, NY, USA},
     
    54865354    year        = 2000,
    54875355    pages       = {29-46},
    5488     note        = {OOPSLA'00, Oct. 15--19, 2000, Minneapolis, Minn., U.S.A.},
     5356    note        = {OOPSLA'00, Oct. 15--19, 2000, Minneapolis, Minnesota, U.S.A.},
    54895357}
    54905358
     
    56005468    location    = {San Diego, California, USA},
    56015469    pages       = {101--112},
     5470    numpages    = {12},
     5471    url         = {http://doi.acm.org/10.1145/2535838.2535878},
     5472    doi         = {10.1145/2535838.2535878},
     5473    acmid       = {2535878},
    56025474    publisher   = {ACM},
    56035475    address     = {New York, NY, USA},
     
    57035575    issn        = {0362-1340},
    57045576    pages       = {30--42},
     5577    numpages    = {13},
     5578    url         = {http://doi.acm.org/10.1145/947586.947589},
     5579    doi         = {10.1145/947586.947589},
    57055580    publisher   = {ACM},
    57065581    address     = {New York, NY, USA}
     
    62376112    month       = 9,
    62386113    year        = 2005,
    6239 }
    6240 
    6241 @article{Bauer15,
    6242     keywords    = {resumption exceptions, theory},
    6243     contributer = {pabuhr@plg},
    6244     author      = {Andrej Bauer and Matija Pretnar},
    6245     title       = {Programming with Algebraic Effects and Handlers},
    6246     journal     = {Journal of Logical and Algebraic Methods in Programming},
    6247     publisher   = {Elsevier BV},
    6248     volume      = 84,
    6249     number      = 1,
    6250     month       = jan,
    6251     year        = 2015,
    6252     pages       = {108-123},
    62536114}
    62546115
     
    66386499    issn        = {0164-0925},
    66396500    pages       = {429-475},
     6501    url         = {http://doi.acm.org/10.1145/1133651.1133653},
     6502    doi         = {10.1145/1133651.1133653},
     6503    acmid       = {1133653},
    66406504    publisher   = {ACM},
    66416505    address     = {New York, NY, USA},
     
    66656529    address     = {\href{http://docs.paralleluniverse.co/quasar}{http://\-docs.paralleluniverse.co/\-quasar}},
    66666530    year        = 2018,
    6667 }
    6668 
    6669 @article{Aravind09,
    6670     author      = {Alex A. Aravind and Wim H. Hesselink},
    6671     title       = {A Queue Based Mutual Exclusion Algorithm},
    6672     journal     = acta,
    6673     volume      = 46,
    6674     pages       = {73--86},
    6675     year        = 2009,
    66766531}
    66776532
     
    70246879    issn        = {0001-0782},
    70256880    pages       = {565--569},
     6881    numpages    = {5},
     6882    url         = {http://doi.acm.org/10.1145/359545.359566},
     6883    doi         = {10.1145/359545.359566},
     6884    acmid       = {359566},
    70266885    publisher   = {ACM},
    70276886    address     = {New York, NY, USA}
     
    70416900    issn        = {0362-1340},
    70426901    pages       = {145--147},
     6902    numpages    = {3},
     6903    url         = {http://doi.acm.org/10.1145/122598.122614},
     6904    doi         = {10.1145/122598.122614},
     6905    acmid       = {122614},
    70436906    publisher   = {ACM},
    70446907    address     = {New York, NY, USA},
     
    71437006    issn        = {0362-1340},
    71447007    pages       = {82--87},
     7008    numpages    = {6},
     7009    url         = {http://doi.acm.org/10.1145/947680.947688},
     7010    doi         = {10.1145/947680.947688},
    71457011    publisher   = {ACM},
    71467012    address     = {New York, NY, USA},
     
    72877153}
    72887154
    7289 @article{Cascaval08,
    7290     author      = {Cascaval, Calin and Blundell, Colin and Michael, Maged and Cain, Harold W. and Wu, Peng and Chiras, Stefanie and Chatterjee, Siddhartha},
    7291     title       = {Software Transactional Memory: Why Is It Only a Research Toy?},
    7292     journal     = {Queue},
    7293     volume      = {6},
    7294     number      = {5},
    7295     month       = sep,
    7296     year        = {2008},
    7297     pages       = {40:46--40:58},
    7298     publisher   = {ACM},
    7299     address     = {New York, NY, USA},
    7300 }
    7301 
    73027155@article{Dijkstra65a,
    73037156    keywords    = {N-thread software-solution mutual exclusion},
     
    75107363    year        = 1974,
    75117364    pages       = {261-301},
     7365    issn        = {0360-0300},
     7366    doi         = {http://doi.acm.org/10.1145/356635.356640},
    75127367    publisher   = {ACM},
    75137368    address     = {New York, NY, USA},
     
    75997454    publisher   = {ACM Press},
    76007455    address     = {New York, NY, USA},
     7456    doi         = {http://doi.acm.org/10.1145/356586.356588},
    76017457}
    76027458
     
    78997755    howpublished= {\href{https://projects.eclipse.org/proposals/trace-compass}{https://\-projects.eclipse.org/\-proposals/\-trace-compass}},
    79007756}
    7901 
    7902 @inproceedings{Boehm09,
    7903     author      = {Boehm, Hans-J.},
    7904     title       = {Transactional Memory Should Be an Implementation Technique, Not a Programming Interface},
    7905     booktitle   = {Proceedings of the First USENIX Conference on Hot Topics in Parallelism},
    7906     series      = {HotPar'09},
    7907     year        = {2009},
    7908     location    = {Berkeley, California},
    7909     publisher   = {USENIX Association},
    7910     address     = {Berkeley, CA, USA},
    7911 }
    7912 
     7757 
    79137758@article{Leroy00,
    79147759    keywords    = {type-systems, exceptions},
     
    79607805    number      = {2},
    79617806    pages       = {204-214},
    7962     month       = apr,
    7963     year        = 1988,
     7807    month       = apr, year = 1988,
    79647808    comment     = {
    79657809        Extended record types add fields to their base record.  Assignment
     
    80607904}
    80617905
    8062 @article{Karsten20,
    8063     author      = {Karsten, Martin and Barghi, Saman},
    8064     title       = {{User-level Threading: Have Your Cake and Eat It Too}},
    8065     year        = {2020},
    8066     issue_date  = {March 2020},
    8067     publisher   = {Association for Computing Machinery},
    8068     address     = {New York, NY, USA},
    8069     volume      = {4},
    8070     number      = {1},
    8071     url         = {https://doi.org/10.1145/3379483},
    8072     doi         = {10.1145/3379483},
    8073     journal     = {Proc. ACM Meas. Anal. Comput. Syst.},
    8074     month       = mar,
    8075     numpages    = {30},
    8076 }
    8077 
    80787906@techreport{Harmony,
    80797907    keywords    = {messages, concurrency},
     
    80917919    contributer = {gjditchfield@plg},
    80927920    author      = {Henry Lieverman},
    8093     title       = {Using Prototypical Objects to Implement Shared Behavior in Object Oriented Systems},
     7921    title       = {Using Prototypical Objects to Implement Shared Behavior in
     7922                  Object Oriented Systems},
    80947923    journal     = sigplan,
    8095     month       = nov,
    8096     year        = 1986,
    8097     volume      = 21,
    8098     number      = 11,
    8099     pages       = {214-223}
     7924    month       = nov, year = 1986,
     7925    volume      = 21, number = 11, pages = {214-223}
    81007926}
    81017927
     
    82848110    issn        = {0004-5411},
    82858111    pages       = {245--281},
     8112    numpages    = {37},
     8113    url         = {http://doi.acm.org/10.1145/62.2160},
     8114    doi         = {10.1145/62.2160},
     8115    acmid       = {2160},
    82868116    publisher   = {ACM},
    82878117    address     = {New York, NY, USA},
     
    82968126    contributer = {pabuhr@plg},
    82978127    author      = {Boehm, Hans-J. and Adve, Sarita V.},
    8298     title       = {You Don't Know Jack About Shared Variables or Memory Models},
     8128    title       = {You Don'T Know Jack About Shared Variables or Memory Models},
    82998129    journal     = cacm,
    83008130    volume      = 55,
  • doc/papers/concurrency/Paper.tex

    r6a490b2 rb7d6a36  
    6161\newcommand{\CCseventeen}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}17\xspace} % C++17 symbolic name
    6262\newcommand{\CCtwenty}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}20\xspace} % C++20 symbolic name
    63 \newcommand{\Csharp}{C\raisebox{-0.7ex}{\large$^\sharp$}\xspace} % C# symbolic name
     63\newcommand{\Csharp}{C\raisebox{-0.7ex}{\Large$^\sharp$}\xspace} % C# symbolic name
    6464
    6565%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     
    127127\newcommand*{\etc}{%
    128128        \@ifnextchar{.}{\ETC}%
    129                 {\ETC.\xspace}%
     129        {\ETC.\xspace}%
    130130}}{}%
    131131\@ifundefined{etal}{
    132132\newcommand{\ETAL}{\abbrevFont{et}~\abbrevFont{al}}
    133133\newcommand*{\etal}{%
    134         \@ifnextchar{.}{\ETAL}%
    135                 {\ETAL.\xspace}%
     134        \@ifnextchar{.}{\protect\ETAL}%
     135                {\protect\ETAL.\xspace}%
    136136}}{}%
    137137\@ifundefined{viz}{
     
    163163                __float80, float80, __float128, float128, forall, ftype, generator, _Generic, _Imaginary, __imag, __imag__,
    164164                inline, __inline, __inline__, __int128, int128, __label__, monitor, mutex, _Noreturn, one_t, or,
    165                 otype, restrict, resume, __restrict, __restrict__, __signed, __signed__, _Static_assert, suspend, thread,
     165                otype, restrict, __restrict, __restrict__, __signed, __signed__, _Static_assert, thread,
    166166                _Thread_local, throw, throwResume, timeout, trait, try, ttype, typeof, __typeof, __typeof__,
    167167                virtual, __volatile, __volatile__, waitfor, when, with, zero_t},
    168168        moredirectives={defined,include_next},
    169169        % replace/adjust listing characters that look bad in sanserif
    170         literate={-}{\makebox[1ex][c]{\raisebox{0.5ex}{\rule{0.8ex}{0.1ex}}}}1 {^}{\raisebox{0.6ex}{$\scriptstyle\land\,$}}1
     170        literate={-}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.8ex}{0.1ex}}}}1 {^}{\raisebox{0.6ex}{$\scriptstyle\land\,$}}1
    171171                {~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 % {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1
    172172                {<}{\textrm{\textless}}1 {>}{\textrm{\textgreater}}1
     
    197197                _Else, _Enable, _Event, _Finally, _Monitor, _Mutex, _Nomutex, _PeriodicTask, _RealTimeTask,
    198198                _Resume, _Select, _SporadicTask, _Task, _Timeout, _When, _With, _Throw},
     199}
     200\lstdefinelanguage{Golang}{
     201        morekeywords=[1]{package,import,func,type,struct,return,defer,panic,recover,select,var,const,iota,},
     202        morekeywords=[2]{string,uint,uint8,uint16,uint32,uint64,int,int8,int16,int32,int64,
     203                bool,float32,float64,complex64,complex128,byte,rune,uintptr, error,interface},
     204        morekeywords=[3]{map,slice,make,new,nil,len,cap,copy,close,true,false,delete,append,real,imag,complex,chan,},
     205        morekeywords=[4]{for,break,continue,range,goto,switch,case,fallthrough,if,else,default,},
     206        morekeywords=[5]{Println,Printf,Error,},
     207        sensitive=true,
     208        morecomment=[l]{//},
     209        morecomment=[s]{/*}{*/},
     210        morestring=[b]',
     211        morestring=[b]",
     212        morestring=[s]{`}{`},
    199213}
    200214
     
    227241{}
    228242\lstnewenvironment{uC++}[1][]
    229 {\lstset{language=uC++,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}
     243{\lstset{#1}}
    230244{}
    231245\lstnewenvironment{Go}[1][]
     
    248262}
    249263
    250 \newsavebox{\myboxA}
    251 \newsavebox{\myboxB}
    252 \newsavebox{\myboxC}
    253 \newsavebox{\myboxD}
     264\newbox\myboxA
     265\newbox\myboxB
     266\newbox\myboxC
     267\newbox\myboxD
    254268
    255269\title{\texorpdfstring{Advanced Control-flow and Concurrency in \protect\CFA}{Advanced Control-flow in Cforall}}
     
    268282\CFA is a polymorphic, non-object-oriented, concurrent, backwards-compatible extension of the C programming language.
    269283This paper discusses the design philosophy and implementation of its advanced control-flow and concurrent/parallel features, along with the supporting runtime written in \CFA.
    270 These features are created from scratch as ISO C has only low-level and/or unimplemented concurrency, so C programmers continue to rely on library approaches like pthreads.
     284These features are created from scratch as ISO C has only low-level and/or unimplemented concurrency, so C programmers continue to rely on library features like pthreads.
    271285\CFA introduces modern language-level control-flow mechanisms, like generators, coroutines, user-level threading, and monitors for mutual exclusion and synchronization.
    272286% Library extension for executors, futures, and actors are built on these basic mechanisms.
     
    281295
    282296\begin{document}
    283 \linenumbers                            % comment out to turn off line numbering
     297\linenumbers                                            % comment out to turn off line numbering
    284298
    285299\maketitle
     
    288302\section{Introduction}
    289303
    290 \CFA~\cite{Moss18,Cforall} is a modern, polymorphic, non-object-oriented\footnote{
    291 \CFA has object-oriented features, such as constructors, destructors, virtuals and simple trait/interface inheritance.
    292 % Go interfaces, Rust traits, Swift Protocols, Haskell Type Classes and Java Interfaces.
    293 % "Trait inheritance" works for me. "Interface inheritance" might also be a good choice, and distinguish clearly from implementation inheritance.
    294 % You'll want to be a little bit careful with terms like "structural" and "nominal" inheritance as well. CFA has structural inheritance (I think Go as well) -- it's inferred based on the structure of the code. Java, Rust, and Haskell (not sure about Swift) have nominal inheritance, where there needs to be a specific statement that "this type inherits from this type".
     304This paper discusses the design philosophy and implementation of advanced language-level control-flow and concurrent/parallel features in \CFA~\cite{Moss18,Cforall} and its runtime, which is written entirely in \CFA.
     305\CFA is a modern, polymorphic, non-object-oriented\footnote{
     306\CFA has features often associated with object-oriented programming languages, such as constructors, destructors, virtuals and simple inheritance.
    295307However, functions \emph{cannot} be nested in structures, so there is no lexical binding between a structure and set of functions (member/method) implemented by an implicit \lstinline@this@ (receiver) parameter.},
    296308backwards-compatible extension of the C programming language.
    297 In many ways, \CFA is to C as Scala~\cite{Scala} is to Java, providing a \emph{research vehicle} for new typing and control-flow capabilities on top of a highly popular programming language\footnote{
    298 The TIOBE index~\cite{TIOBE} for December 2019 ranks the top five \emph{popular} programming languages as Java 17\%, C 16\%, Python 10\%, and \CC 6\%, \Csharp 5\% = 54\%, and over the past 30 years, C has always ranked either first or second in popularity.}
    299 allowing immediate dissemination.
    300 This paper discusses the design philosophy and implementation of advanced language-level control-flow and concurrent/parallel features in \CFA and its runtime, which is written entirely in \CFA.
    301 The \CFA control-flow framework extends ISO \Celeven~\cite{C11} with new call/return and concurrent/parallel control-flow.
    302 
    303 % The call/return extensions retain state between callee and caller versus losing the callee's state on return;
    304 % the concurrency extensions allow high-level management of threads.
    305 
    306 Call/return control-flow with argument/parameter passing appeared in the first programming languages.
    307 Over the past 50 years, call/return has been augmented with features like static/dynamic call, exceptions (multi-level return) and generators/coroutines (retain state between calls).
    308 While \CFA has mechanisms for dynamic call (algebraic effects) and exceptions\footnote{
    309 \CFA exception handling will be presented in a separate paper.
    310 The key feature that dovetails with this paper is nonlocal exceptions allowing exceptions to be raised across stacks, with synchronous exceptions raised among coroutines and asynchronous exceptions raised among threads, similar to that in \uC~\cite[\S~5]{uC++}}, this work only discusses retaining state between calls via generators/coroutines.
    311 \newterm{Coroutining} was introduced by Conway~\cite{Conway63} (1963), discussed by Knuth~\cite[\S~1.4.2]{Knuth73V1}, implemented in Simula67~\cite{Simula67}, formalized by Marlin~\cite{Marlin80}, and is now popular and appears in old and new programming languages: CLU~\cite{CLU}, \Csharp~\cite{Csharp}, Ruby~\cite{Ruby}, Python~\cite{Python}, JavaScript~\cite{JavaScript}, Lua~\cite{Lua}, \CCtwenty~\cite{C++20Coroutine19}.
    312 Coroutining is sequential execution requiring direct handoff among coroutines, \ie only the programmer is controlling execution order.
    313 If coroutines transfer to an internal event-engine for scheduling the next coroutines, the program transitions into the realm of concurrency~\cite[\S~3]{Buhr05a}.
    314 Coroutines are only a stepping stone towards concurrency where the commonality is that coroutines and threads retain state between calls.
    315 
    316 \Celeven/\CCeleven define concurrency~\cite[\S~7.26]{C11}, but it is largely wrappers for a subset of the pthreads library~\cite{Pthreads}.\footnote{Pthreads concurrency is based on simple thread fork/join in a function and mutex/condition locks, which is low-level and error-prone}
    317 Interestingly, almost a decade after the \Celeven standard, neither gcc-9, clang-9 nor msvc-19 (most recent versions) support the \Celeven include @threads.h@, indicating no interest in the C11 concurrency approach (possibly because of the recent effort to add concurrency to \CC).
    318 While the \Celeven standard does not state a threading model, the historical association with pthreads suggests implementations would adopt kernel-level threading (1:1)~\cite{ThreadModel}, as for \CC.
     309In many ways, \CFA is to C as Scala~\cite{Scala} is to Java, providing a \emph{research vehicle} for new typing and control-flow capabilities on top of a highly popular programming language allowing immediate dissemination.
     310Within the \CFA framework, new control-flow features are created from scratch because ISO \Celeven defines only a subset of the \CFA extensions, where the overlapping features are concurrency~\cite[\S~7.26]{C11}.
     311However, \Celeven concurrency is largely wrappers for a subset of the pthreads library~\cite{Butenhof97,Pthreads}, and \Celeven and pthreads concurrency is simple, based on thread fork/join in a function and mutex/condition locks, which is low-level and error-prone;
     312no high-level language concurrency features are defined.
     313Interestingly, almost a decade after publication of the \Celeven standard, neither gcc-8, clang-9 nor msvc-19 (most recent versions) support the \Celeven include @threads.h@, indicating little interest in the C11 concurrency approach (possibly because the effort to add concurrency to \CC).
     314Finally, while the \Celeven standard does not state a threading model, the historical association with pthreads suggests implementations would adopt kernel-level threading (1:1)~\cite{ThreadModel}.
     315
    319316In contrast, there has been a renewed interest during the past decade in user-level (M:N, green) threading in old and new programming languages.
    320317As multi-core hardware became available in the 1980/90s, both user and kernel threading were examined.
    321318Kernel threading was chosen, largely because of its simplicity and fit with the simpler operating systems and hardware architectures at the time, which gave it a performance advantage~\cite{Drepper03}.
    322319Libraries like pthreads were developed for C, and the Solaris operating-system switched from user (JDK 1.1~\cite{JDK1.1}) to kernel threads.
    323 As a result, many current languages implementations adopt the 1:1 kernel-threading model, like Java (Scala), Objective-C~\cite{obj-c-book}, \CCeleven~\cite{C11}, C\#~\cite{Csharp} and Rust~\cite{Rust}, with a variety of presentation mechanisms.
    324 From 2000 onwards, several language implementations have championed the M:N user-threading model, like Go~\cite{Go}, Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, D~\cite{D}, and \uC~\cite{uC++,uC++book}, including putting green threads back into Java~\cite{Quasar}, and many user-threading libraries have appeared~\cite{Qthreads,MPC,Marcel}.
    325 The main argument for user-level threading is that it is lighter weight than kernel threading (locking and context switching do not cross the kernel boundary), so there is less restriction on programming styles that encourages large numbers of threads performing medium-sized work to facilitate load balancing by the runtime~\cite{Verch12}.
     320As a result, languages like Java, Scala, Objective-C~\cite{obj-c-book}, \CCeleven~\cite{C11}, and C\#~\cite{Csharp} adopt the 1:1 kernel-threading model, with a variety of presentation mechanisms.
     321From 2000 onwards, languages like Go~\cite{Go}, Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, D~\cite{D}, and \uC~\cite{uC++,uC++book} have championed the M:N user-threading model, and many user-threading libraries have appeared~\cite{Qthreads,MPC,Marcel}, including putting green threads back into Java~\cite{Quasar}.
     322The main argument for user-level threading is that it is lighter weight than kernel threading (locking and context switching do not cross the kernel boundary), so there is less restriction on programming styles that encourage large numbers of threads performing medium work units to facilitate load balancing by the runtime~\cite{Verch12}.
    326323As well, user-threading facilitates a simpler concurrency approach using thread objects that leverage sequential patterns versus events with call-backs~\cite{Adya02,vonBehren03}.
    327324Finally, performant user-threading implementations (both time and space) meet or exceed direct kernel-threading implementations, while achieving the programming advantages of high concurrency levels and safety.
    328325
    329 A further effort over the past two decades is the development of language memory models to deal with the conflict between language features and compiler/hardware optimizations, \eg some language features are unsafe in the presence of aggressive sequential optimizations~\cite{Buhr95a,Boehm05}.
     326A further effort over the past two decades is the development of language memory models to deal with the conflict between language features and compiler/hardware optimizations, \ie some language features are unsafe in the presence of aggressive sequential optimizations~\cite{Buhr95a,Boehm05}.
    330327The consequence is that a language must provide sufficient tools to program around safety issues, as inline and library code is all sequential to the compiler.
    331328One solution is low-level qualifiers and functions (\eg @volatile@ and atomics) allowing \emph{programmers} to explicitly write safe (race-free~\cite{Boehm12}) programs.
    332 A safer solution is high-level language constructs so the \emph{compiler} knows the concurrency boundaries (where mutual exclusion and synchronization are acquired/released) and provide implicit safety at and across these boundaries.
    333 While the optimization problem is best known with respect to concurrency, it applies to other complex control-flow, like exceptions and coroutines.
    334 As well, language solutions allow matching the language paradigm with the approach, \eg matching the functional paradigm with data-flow programming or the imperative paradigm with thread programming.
    335 
    336 Finally, it is important for a language to provide safety over performance \emph{as the default}, allowing careful reduction of safety (unsafe code) for performance when necessary.
    337 Two concurrency violations of this philosophy are \emph{spurious wakeup} (random wakeup~\cite[\S~9]{Buhr05a}) and \emph{barging}\footnote{
    338 Barging is competitive succession instead of direct handoff, \ie after a lock is released both arriving and preexisting waiter threads compete to acquire the lock.
    339 Hence, an arriving thread can temporally \emph{barge} ahead of threads already waiting for an event, which can repeat indefinitely leading to starvation of waiter threads.
     329A safer solution is high-level language constructs so the \emph{compiler} knows the optimization boundaries, and hence, provides implicit safety.
     330This problem is best known with respect to concurrency, but applies to other complex control-flow, like exceptions\footnote{
     331\CFA exception handling will be presented in a separate paper.
     332The key feature that dovetails with this paper is nonlocal exceptions allowing exceptions to be raised across stacks, with synchronous exceptions raised among coroutines and asynchronous exceptions raised among threads, similar to that in \uC~\cite[\S~5]{uC++}
     333} and coroutines.
     334Finally, language solutions allow matching constructs with language paradigm, \ie imperative and functional languages often have different presentations of the same concept to fit their programming model.
     335
     336Finally, it is important for a language to provide safety over performance \emph{as the default}, allowing careful reduction of safety for performance when necessary.
     337Two concurrency violations of this philosophy are \emph{spurious wakeup} (random wakeup~\cite[\S~8]{Buhr05a}) and \emph{barging}\footnote{
     338The notion of competitive succession instead of direct handoff, \ie a lock owner releases the lock and an arriving thread acquires it ahead of preexisting waiter threads.
    340339} (signals-as-hints~\cite[\S~8]{Buhr05a}), where one is a consequence of the other, \ie once there is spurious wakeup, signals-as-hints follow.
    341 (Author experience teaching concurrency is that students are confused by these semantics.)
    342 However, spurious wakeup is \emph{not} a foundational concurrency property~\cite[\S~9]{Buhr05a};
    343 it is a performance design choice.
    344 We argue removing spurious wakeup and signals-as-hints make concurrent programming simpler and safer as there is less local non-determinism to manage.
    345 If barging acquisition is allowed, its specialized performance advantage should be available as an option not the default.
    346 
    347 \CFA embraces language extensions for advanced control-flow, user-level threading, and safety as the default.
    348 We present comparative examples to support our argument that the \CFA control-flow extensions are as expressive and safe as those in other concurrent imperative programming languages, and perform experiments to show the \CFA runtime is competitive with other similar mechanisms.
     340However, spurious wakeup is \emph{not} a foundational concurrency property~\cite[\S~8]{Buhr05a}, it is a performance design choice.
     341Similarly, signals-as-hints are often a performance decision.
     342We argue removing spurious wakeup and signals-as-hints make concurrent programming significantly safer because it removes local non-determinism and matches with programmer expectation.
     343(Author experience teaching concurrency is that students are highly confused by these semantics.)
     344Clawing back performance, when local non-determinism is unimportant, should be an option not the default.
     345
     346\begin{comment}
     347Most augmented traditional (Fortran 18~\cite{Fortran18}, Cobol 14~\cite{Cobol14}, Ada 12~\cite{Ada12}, Java 11~\cite{Java11}) and new languages (Go~\cite{Go}, Rust~\cite{Rust}, and D~\cite{D}), except \CC, diverge from C with different syntax and semantics, only interoperate indirectly with C, and are not systems languages, for those with managed memory.
     348As a result, there is a significant learning curve to move to these languages, and C legacy-code must be rewritten.
     349While \CC, like \CFA, takes an evolutionary approach to extend C, \CC's constantly growing complex and interdependent features-set (\eg objects, inheritance, templates, etc.) mean idiomatic \CC code is difficult to use from C, and C programmers must expend significant effort learning \CC.
     350Hence, rewriting and retraining costs for these languages, even \CC, are prohibitive for companies with a large C software-base.
     351\CFA with its orthogonal feature-set, its high-performance runtime, and direct access to all existing C libraries circumvents these problems.
     352\end{comment}
     353
     354\CFA embraces user-level threading, language extensions for advanced control-flow, and safety as the default.
     355We present comparative examples so the reader can judge if the \CFA control-flow extensions are better and safer than those in other concurrent, imperative programming languages, and perform experiments to show the \CFA runtime is competitive with other similar mechanisms.
    349356The main contributions of this work are:
    350 \begin{itemize}[topsep=3pt,itemsep=0pt]
     357\begin{itemize}[topsep=3pt,itemsep=1pt]
    351358\item
    352 a set of fundamental execution properties that dictate which language-level control-flow features need to be supported,
    353 
     359language-level generators, coroutines and user-level threading, which respect the expectations of C programmers.
    354360\item
    355 integration of these language-level control-flow features, while respecting the style and expectations of C programmers,
    356 
     361monitor synchronization without barging, and the ability to safely acquiring multiple monitors \emph{simultaneously} (deadlock free), while seamlessly integrating these capabilities with all monitor synchronization mechanisms.
    357362\item
    358 monitor synchronization without barging, and the ability to safely acquiring multiple monitors \emph{simultaneously} (deadlock free), while seamlessly integrating these capabilities with all monitor synchronization mechanisms,
    359 
    360 \item
    361 providing statically type-safe interfaces that integrate with the \CFA polymorphic type-system and other language features,
    362 
     363providing statically type-safe interfaces that integrate with the \CFA polymorphic type-system and other language features.
    363364% \item
    364365% library extensions for executors, futures, and actors built on the basic mechanisms.
    365 
    366366\item
    367 a runtime system without spurious wake-up and no performance loss,
    368 
     367a runtime system with no spurious wakeup.
    369368\item
    370 a dynamic partitioning mechanism to segregate groups of executing user and kernel threads performing specialized work (\eg web-server or compute engine) or requiring different scheduling (\eg NUMA or real-time).
    371 
     369a dynamic partitioning mechanism to segregate the execution environment for specialized requirements.
    372370% \item
    373371% a non-blocking I/O library
    374 
    375372\item
    376 experimental results showing comparable performance of the \CFA features with similar mechanisms in other languages.
     373experimental results showing comparable performance of the new features with similar mechanisms in other programming languages.
    377374\end{itemize}
    378375
    379 Section~\ref{s:FundamentalExecutionProperties} presents the compositional hierarchy of execution properties directing the design of control-flow features in \CFA.
    380 Section~\ref{s:StatefulFunction} begins advanced control by introducing sequential functions that retain data and execution state between calls producing constructs @generator@ and @coroutine@.
    381 Section~\ref{s:Concurrency} begins concurrency, or how to create (fork) and destroy (join) a thread producing the @thread@ construct.
     376Section~\ref{s:StatefulFunction} begins advanced control by introducing sequential functions that retain data and execution state between calls, which produces constructs @generator@ and @coroutine@.
     377Section~\ref{s:Concurrency} begins concurrency, or how to create (fork) and destroy (join) a thread, which produces the @thread@ construct.
    382378Section~\ref{s:MutualExclusionSynchronization} discusses the two mechanisms to restricted nondeterminism when controlling shared access to resources (mutual exclusion) and timing relationships among threads (synchronization).
    383379Section~\ref{s:Monitor} shows how both mutual exclusion and synchronization are safely embedded in the @monitor@ and @thread@ constructs.
    384380Section~\ref{s:CFARuntimeStructure} describes the large-scale mechanism to structure (cluster) threads and virtual processors (kernel threads).
    385 Section~\ref{s:Performance} uses a series of microbenchmarks to compare \CFA threading with pthreads, Java 11.0.6, Go 1.12.6, Rust 1.37.0, Python 3.7.6, Node.js 12.14.1, and \uC 7.0.0.
    386 
    387 
    388 \section{Fundamental Execution Properties}
    389 \label{s:FundamentalExecutionProperties}
    390 
    391 The features in a programming language should be composed from a set of fundamental properties rather than an ad hoc collection chosen by the designers.
    392 To this end, the control-flow features created for \CFA are based on the fundamental properties of any language with function-stack control-flow (see also \uC~\cite[pp.~140-142]{uC++}).
    393 The fundamental properties are execution state, thread, and mutual-exclusion/synchronization (MES).
    394 These independent properties can be used alone, in pairs, or in triplets to compose different language features, forming a compositional hierarchy where the most advanced feature has all the properties (state/thread/MES).
    395 While it is possible for a language to only support the most advanced feature~\cite{Hermes90}, this unnecessarily complicates and makes inefficient solutions to certain classes of problems.
    396 As is shown, each of the (non-rejected) composed features solves a particular set of problems, and hence, has a defensible position in a programming language.
    397 If a compositional feature is missing, a programmer has too few/many fundamental properties resulting in a complex and/or is inefficient solution.
    398 
    399 In detail, the fundamental properties are:
    400 \begin{description}[leftmargin=\parindent,topsep=3pt,parsep=0pt]
    401 \item[\newterm{execution state}:]
    402 is the state information needed by a control-flow feature to initialize, manage compute data and execution location(s), and de-initialize.
    403 State is retained in fixed-sized aggregate structures and dynamic-sized stack(s), often allocated in the heap(s) managed by the runtime system.
    404 The lifetime of the state varies with the control-flow feature, where longer life-time and dynamic size provide greater power but also increase usage complexity and cost.
    405 Control-flow transfers among execution states occurs in multiple ways, such as function call, context switch, asynchronous await, etc.
    406 Because the programming language determines what constitutes an execution state, implicitly manages this state, and defines movement mechanisms among states, execution state is an elementary property of the semantics of a programming language.
    407 % An execution-state is related to the notion of a process continuation \cite{Hieb90}.
    408 
    409 \item[\newterm{threading}:]
    410 is execution of code that occurs independently of other execution, \ie the execution resulting from a thread is sequential.
    411 Multiple threads provide \emph{concurrent execution};
    412 concurrent execution becomes parallel when run on multiple processing units (hyper-threading, cores, sockets).
    413 There must be language mechanisms to create, block/unblock, and join with a thread.
    414 
    415 \item[\newterm{MES}:]
    416 is the concurrency mechanisms to perform an action without interruption and establish timing relationships among multiple threads.
    417 These two properties are independent, \ie mutual exclusion cannot provide synchronization and vice versa without introducing additional threads~\cite[\S~4]{Buhr05a}.
    418 Limiting MES, \eg no access to shared data, results in contrived solutions and inefficiency on multi-core von Neumann computers where shared memory is a foundational aspect of its design.
    419 \end{description}
    420 These properties are fundamental because they cannot be built from existing language features, \eg a basic programming language like C99~\cite{C99} cannot create new control-flow features, concurrency, or provide MES using atomic hardware mechanisms.
    421 
    422 
    423 \subsection{Execution Properties}
    424 
    425 Table~\ref{t:ExecutionPropertyComposition} shows how the three fundamental execution properties: state, thread, and mutual exclusion compose a hierarchy of control-flow features needed in a programming language.
    426 (When doing case analysis, not all combinations are meaningful.)
    427 Note, basic von Neumann execution requires at least one thread and an execution state providing some form of call stack.
    428 For table entries missing these minimal components, the property is borrowed from the invoker (caller).
    429 
    430 Case 1 is a function that borrows storage for its state (stack frame/activation) and a thread from its invoker and retains this state across \emph{callees}, \ie function local-variables are retained on the stack across calls.
    431 Case 2 is case 1 with access to shared state so callers are restricted during update (mutual exclusion) and scheduling for other threads (synchronization).
    432 Case 3 is a stateful function supporting resume/suspend along with call/return to retain state across \emph{callers}, but has some restrictions because the function's state is stackless.
    433 Note, stackless functions still borrow the caller's stack and thread, where the stack is used to preserve state across its callees.
    434 Case 4 is cases 2 and 3 with protection to shared state for stackless functions.
    435 Cases 5 and 6 are the same as 3 and 4 but only the thread is borrowed as the function state is stackful, so resume/suspend is a context switch from the caller's to the function's stack.
    436 Cases 7 and 8 are rejected because a function that is given a new thread must have its own stack where the thread begins and stack frames are stored for calls, \ie there is no stack to borrow.
    437 Cases 9 and 10 are rejected because a thread with a fixed state (no stack) cannot accept calls, make calls, block, or be preempted, all of which require an unknown amount of additional dynamic state.
    438 Hence, once started, this kind of thread must execute to completion, \ie computation only, which severely restricts runtime management.
    439 Cases 11 and 12 have a stackful thread with and without safe access to shared state.
    440 Execution properties increase the cost of creation and execution along with complexity of usage.
    441 
    442 \begin{table}
    443 \caption{Execution property composition}
    444 \centering
    445 \label{t:ExecutionPropertyComposition}
    446 \renewcommand{\arraystretch}{1.25}
    447 %\setlength{\tabcolsep}{5pt}
    448 \begin{tabular}{c|c||l|l}
    449 \multicolumn{2}{c||}{execution properties} & \multicolumn{2}{c}{mutual exclusion / synchronization} \\
    450 \hline
    451 stateful                        & thread        & \multicolumn{1}{c|}{No} & \multicolumn{1}{c}{Yes} \\
    452 \hline   
    453 \hline   
    454 No                                      & No            & \textbf{1}\ \ \ function                              & \textbf{2}\ \ \ @monitor@ function    \\
    455 \hline   
    456 Yes (stackless)         & No            & \textbf{3}\ \ \ @generator@                   & \textbf{4}\ \ \ @monitor@ @generator@ \\
    457 \hline   
    458 Yes (stackful)          & No            & \textbf{5}\ \ \ @coroutine@                   & \textbf{6}\ \ \ @monitor@ @coroutine@ \\
    459 \hline   
    460 No                                      & Yes           & \textbf{7}\ \ \ {\color{red}rejected} & \textbf{8}\ \ \ {\color{red}rejected} \\
    461 \hline   
    462 Yes (stackless)         & Yes           & \textbf{9}\ \ \ {\color{red}rejected} & \textbf{10}\ \ \ {\color{red}rejected} \\
    463 \hline   
    464 Yes (stackful)          & Yes           & \textbf{11}\ \ \ @thread@                             & \textbf{12}\ \ @monitor@ @thread@             \\
    465 \end{tabular}
    466 \end{table}
    467 
    468 Given the execution-properties taxonomy, programmers can now answer three basic questions: is state necessary across calls and how much, is a separate thread necessary, is access to shared state necessary.
    469 The answers define the optimal language feature need for implementing a programming problem.
    470 The next sections discusses how \CFA fills in the table with language features, while other programming languages may only provide a subset of the table.
    471 
    472 
    473 \subsection{Design Requirements}
    474 
    475 The following design requirements largely stem from building \CFA on top of C.
    476 \begin{itemize}[topsep=3pt,parsep=0pt]
    477 \item
    478 All communication must be statically type checkable for early detection of errors and efficient code generation.
    479 This requirement is consistent with the fact that C is a statically-typed programming-language.
    480 
    481 \item
    482 Direct interaction among language features must be possible allowing any feature to be selected without restricting comm\-unication.
    483 For example, many concurrent languages do not provide direct communication (calls) among threads, \ie threads only communicate indirectly through monitors, channels, messages, and/or futures.
    484 Indirect communication increases the number of objects, consuming more resources, and require additional synchronization and possibly data transfer.
    485 
    486 \item
    487 All communication is performed using function calls, \ie data is transmitted from argument to parameter and results are returned from function calls.
    488 Alternative forms of communication, such as call-backs, message passing, channels, or communication ports, step outside of C's normal form of communication.
    489 
    490 \item
    491 All stateful features must follow the same declaration scopes and lifetimes as other language data.
    492 For C that means at program startup, during block and function activation, and on demand using dynamic allocation.
    493 
    494 \item
    495 MES must be available implicitly in language constructs as well as explicitly for specialized requirements, because requiring programmers to build MES using low-level locks often leads to incorrect programs.
    496 Furthermore, reducing synchronization scope by encapsulating it within language constructs further reduces errors in concurrent programs.
    497 
    498 \item
    499 Both synchronous and asynchronous communication are needed.
    500 However, we believe the best way to provide asynchrony, such as call-buffering/chaining and/or returning futures~\cite{multilisp}, is building it from expressive synchronous features.
    501 
    502 \item
    503 Synchronization must be able to control the service order of requests including prioritizing selection from different kinds of outstanding requests, and postponing a request for an unspecified time while continuing to accept new requests.
    504 Otherwise, certain concurrency problems are difficult, e.g.\ web server, disk scheduling, and the amount of concurrency is inhibited~\cite{Gentleman81}.
    505 \end{itemize}
    506 We have satisfied these requirements in \CFA while maintaining backwards compatibility with the huge body of legacy C programs.
    507 % In contrast, other new programming languages must still access C programs (\eg operating-system service routines), but do so through fragile C interfaces.
    508 
    509 
    510 \subsection{Asynchronous Await / Call}
    511 
    512 Asynchronous await/call is a caller mechanism for structuring programs and/or increasing concurrency, where the caller (client) postpones an action into the future, which is subsequently executed by a callee (server).
    513 The caller detects the action's completion through a \newterm{future}/\newterm{promise}.
    514 The benefit is asynchronous caller execution with respect to the callee until future resolution.
    515 For single-threaded languages like JavaScript, an asynchronous call passes a callee action, which is queued in the event-engine, and continues execution with a promise.
    516 When the caller needs the promise to be fulfilled, it executes @await@.
    517 A promise-completion call-back can be part of the callee action or the caller is rescheduled;
    518 in either case, the call back is executed after the promise is fulfilled.
    519 While asynchronous calls generate new callee (server) events, we content this mechanism is insufficient for advanced control-flow mechanisms like generators or coroutines (which are discussed next).
    520 Specifically, control between caller and callee occurs indirectly through the event-engine precluding direct handoff and cycling among events, and requires complex resolution of a control promise and data.
    521 Note, @async-await@ is just syntactic-sugar over the event engine so it does not solve these deficiencies.
    522 For multi-threaded languages like Java, the asynchronous call queues a callee action with an executor (server), which subsequently executes the work by a thread in the executor thread-pool.
    523 The problem is when concurrent work-units need to interact and/or block as this effects the executor, \eg stops threads.
    524 While it is possible to extend this approach to support the necessary mechanisms, \eg message passing in Actors, we show monitors and threads provide an equally competitive approach that does not deviate from normal call communication and can be used to build asynchronous call, as is done in Java.
     381Section~\ref{s:Performance} uses a series of microbenchmarks to compare \CFA threading with pthreads, Java OpenJDK-9, Go 1.12.6 and \uC 7.0.0.
    525382
    526383
     
    528385\label{s:StatefulFunction}
    529386
    530 A \emph{stateful function} has the ability to remember state between calls, where state can be either data or execution, \eg plugin, device driver, finite-state machine (FSM).
    531 A simple technique to retain data state between calls is @static@ declarations within a function, which is often implemented by hoisting the declarations to the global scope but hiding the names within the function using name mangling.
    532 However, each call starts the function at the top making it difficult to determine the last point of execution in an algorithm, and requiring multiple flag variables and testing to reestablish the continuation point.
    533 Hence, the next step of generalizing function state is implicitly remembering the return point between calls and reentering the function at this point rather than the top, called \emph{generators}\,/\,\emph{iterators} or \emph{stackless coroutines}.
    534 For example, a Fibonacci generator retains data and execution state allowing it to remember prior values needed to generate the next value and the location in the algorithm to compute that value.
    535 The next step of generalization is instantiating the function to allow multiple named instances, \eg multiple Fibonacci generators, where each instance has its own state, and hence, can generate an independent sequence of values.
    536 Note, a subset of generator state is a function \emph{closure}, \ie the technique of capturing lexical references when returning a nested function.
    537 A further generalization is adding a stack to a generator's state, called a \emph{coroutine}, so it can suspend outside of itself, \eg call helper functions to arbitrary depth before suspending back to its resumer without unwinding these calls.
    538 For example, a coroutine iterator for a binary tree can stop the traversal at the visit point (pre, infix, post traversal), return the node value to the caller, and then continue the recursive traversal from the current node on the next call.
    539 
    540 There are two styles of activating a stateful function, \emph{asymmetric} or \emph{symmetric}, identified by resume/suspend (no cycles) and resume/resume (cycles).
    541 These styles \emph{do not} cause incremental stack growth, \eg a million resume/suspend or resume/resume cycles do not remember each cycle just the last resumer for each cycle.
    542 Selecting between stackless/stackful semantics and asymmetric/symmetric style is a tradeoff between programming requirements, performance, and design, where stackless is faster and smaller (modified call/return between closures), stackful is more general but slower and larger (context switching between distinct stacks), and asymmetric is simpler control-flow than symmetric.
    543 Additionally, storage management for the closure/stack (especially in unmanaged languages, \ie no garbage collection) must be factored into design and performance.
    544 Note, creation cost (closure/stack) is amortized across usage, so activation cost (resume/suspend) is usually the dominant factor.
    545 
    546 % The stateful function is an old idea~\cite{Conway63,Marlin80} that is new again~\cite{C++20Coroutine19}, where execution is temporarily suspended and later resumed, \eg plugin, device driver, finite-state machine.
    547 % Hence, a stateful function may not end when it returns to its caller, allowing it to be restarted with the data and execution location present at the point of suspension.
    548 % If the closure is fixed size, we call it a \emph{generator} (or \emph{stackless}), and its control flow is restricted, \eg suspending outside the generator is prohibited.
    549 % If the closure is variable size, we call it a \emph{coroutine} (or \emph{stackful}), and as the names implies, often implemented with a separate stack with no programming restrictions.
    550 % Hence, refactoring a stackless coroutine may require changing it to stackful.
    551 % A foundational property of all \emph{stateful functions} is that resume/suspend \emph{do not} cause incremental stack growth, \ie resume/suspend operations are remembered through the closure not the stack.
    552 % As well, activating a stateful function is \emph{asymmetric} or \emph{symmetric}, identified by resume/suspend (no cycles) and resume/resume (cycles).
    553 % A fixed closure activated by modified call/return is faster than a variable closure activated by context switching.
    554 % Additionally, any storage management for the closure (especially in unmanaged languages, \ie no garbage collection) must also be factored into design and performance.
    555 % Therefore, selecting between stackless and stackful semantics is a tradeoff between programming requirements and performance, where stackless is faster and stackful is more general.
    556 % nppNote, creation cost is amortized across usage, so activation cost is usually the dominant factor.
    557 
    558 For example, Python presents asymmetric generators as a function object, \uC presents symmetric coroutines as a \lstinline[language=C++]|class|-like object, and many languages present threading using function pointers, @pthreads@~\cite{Butenhof97}, \Csharp~\cite{Csharp}, Go~\cite{Go}, and Scala~\cite{Scala}.
    559 \begin{center}
    560 \begin{tabular}{@{}l|l|l@{}}
    561 \multicolumn{1}{@{}c|}{Python asymmetric generator} & \multicolumn{1}{c|}{\uC symmetric coroutine} & \multicolumn{1}{c@{}}{Pthreads thread} \\
    562 \hline
    563 \begin{python}
    564 `def Gen():` $\LstCommentStyle{\color{red}// function}$
    565         ... yield val ...
    566 gen = Gen()
    567 for i in range( 10 ):
    568         print( next( gen ) )
    569 \end{python}
    570 &
    571 \begin{uC++}
    572 `_Coroutine Cycle {` $\LstCommentStyle{\color{red}// class}$
    573         Cycle * p;
    574         void main() { p->cycle(); }
    575         void cycle() { resume(); }  `};`
    576 Cycle c1, c2; c1.p=&c2; c2.p=&c1; c1.cycle();
    577 \end{uC++}
    578 &
    579 \begin{cfa}
    580 void * rtn( void * arg ) { ... }
    581 int i = 3, rc;
    582 pthread_t t; $\C{// thread id}$
    583 $\LstCommentStyle{\color{red}// function pointer}$
    584 rc=pthread_create(&t, `rtn`, (void *)i);
    585 \end{cfa}
    586 \end{tabular}
    587 \end{center}
    588 \CFA's preferred presentation model for generators/coroutines/threads is a hybrid of functions and classes, giving an object-oriented flavour.
    589 Essentially, the generator/coroutine/thread function is semantically coupled with a generator/coroutine/thread custom type via the type's name.
    590 The custom type solves several issues, while accessing the underlying mechanisms used by the custom types is still allowed for flexibility reasons.
    591 Each custom type is discussed in detail in the following sections.
    592 
    593 
    594 \subsection{Generator}
    595 
    596 Stackless generators (Table~\ref{t:ExecutionPropertyComposition} case 3) have the potential to be very small and fast, \ie as small and fast as function call/return for both creation and execution.
    597 The \CFA goal is to achieve this performance target, possibly at the cost of some semantic complexity.
    598 A series of different kinds of generators and their implementation demonstrate how this goal is accomplished.\footnote{
    599 The \CFA operator syntax uses \lstinline|?| to denote operands, which allows precise definitions for pre, post, and infix operators, \eg \lstinline|?++|, \lstinline|++?|, and \lstinline|?+?|, in addition \lstinline|?\{\}| denotes a constructor, as in \lstinline|foo `f` = `\{`...`\}`|, \lstinline|^?\{\}| denotes a destructor, and \lstinline|?()| is \CC function call \lstinline|operator()|.
    600 Operator \lstinline+|+ is overloaded for printing, like bit-shift \lstinline|<<| in \CC.
    601 The \CFA \lstinline|with| clause opens an aggregate scope making its fields directly accessible, like Pascal \lstinline|with|, but using parallel semantics;
    602 multiple aggregates may be opened.
    603 \CFA has rebindable references \lstinline|int i, & ip = i, j; `&ip = &j;`| and non-rebindable references \lstinline|int i, & `const` ip = i, j; `&ip = &j;` // disallowed|.
    604 }%
     387The stateful function is an old idea~\cite{Conway63,Marlin80} that is new again~\cite{C++20Coroutine19}, where execution is temporarily suspended and later resumed, \eg plugin, device driver, finite-state machine.
     388Hence, a stateful function may not end when it returns to its caller, allowing it to be restarted with the data and execution location present at the point of suspension.
     389This capability is accomplished by retaining a data/execution \emph{closure} between invocations.
     390If the closure is fixed size, we call it a \emph{generator} (or \emph{stackless}), and its control flow is restricted, \eg suspending outside the generator is prohibited.
     391If the closure is variable size, we call it a \emph{coroutine} (or \emph{stackful}), and as the names implies, often implemented with a separate stack with no programming restrictions.
     392Hence, refactoring a stackless coroutine may require changing it to stackful.
     393A foundational property of all \emph{stateful functions} is that resume/suspend \emph{do not} cause incremental stack growth, \ie resume/suspend operations are remembered through the closure not the stack.
     394As well, activating a stateful function is \emph{asymmetric} or \emph{symmetric}, identified by resume/suspend (no cycles) and resume/resume (cycles).
     395A fixed closure activated by modified call/return is faster than a variable closure activated by context switching.
     396Additionally, any storage management for the closure (especially in unmanaged languages, \ie no garbage collection) must also be factored into design and performance.
     397Therefore, selecting between stackless and stackful semantics is a tradeoff between programming requirements and performance, where stackless is faster and stackful is more general.
     398Note, creation cost is amortized across usage, so activation cost is usually the dominant factor.
    605399
    606400\begin{figure}
     
    616410
    617411
    618 
    619 
    620412        int fn = f->fn; f->fn = f->fn1;
    621413                f->fn1 = f->fn + fn;
    622414        return fn;
     415
    623416}
    624417int main() {
     
    639432void `main(Fib & fib)` with(fib) {
    640433
    641 
    642434        [fn1, fn] = [1, 0];
    643435        for () {
     
    659451\begin{cfa}[aboveskip=0pt,belowskip=0pt]
    660452typedef struct {
    661         int `restart`, fn1, fn;
     453        int fn1, fn;  void * `next`;
    662454} Fib;
    663 #define FibCtor { `0`, 1, 0 }
     455#define FibCtor { 1, 0, NULL }
    664456Fib * comain( Fib * f ) {
    665         `static void * states[] = {&&s0, &&s1};`
    666         `goto *states[f->restart];`
    667   s0: f->`restart` = 1;
     457        if ( f->next ) goto *f->next;
     458        f->next = &&s1;
    668459        for ( ;; ) {
    669460                return f;
    670461          s1:; int fn = f->fn + f->fn1;
    671                 f->fn1 = f->fn; f->fn = fn;
     462                        f->fn1 = f->fn; f->fn = fn;
    672463        }
    673464}
     
    681472\end{lrbox}
    682473
    683 \subfloat[C]{\label{f:CFibonacci}\usebox\myboxA}
     474\subfloat[C asymmetric generator]{\label{f:CFibonacci}\usebox\myboxA}
    684475\hspace{3pt}
    685476\vrule
    686477\hspace{3pt}
    687 \subfloat[\CFA]{\label{f:CFAFibonacciGen}\usebox\myboxB}
     478\subfloat[\CFA asymmetric generator]{\label{f:CFAFibonacciGen}\usebox\myboxB}
    688479\hspace{3pt}
    689480\vrule
    690481\hspace{3pt}
    691 \subfloat[C generated code for \CFA version]{\label{f:CFibonacciSim}\usebox\myboxC}
     482\subfloat[C generator implementation]{\label{f:CFibonacciSim}\usebox\myboxC}
    692483\caption{Fibonacci (output) asymmetric generator}
    693484\label{f:FibonacciAsymmetricGenerator}
     
    702493};
    703494void ?{}( Fmt & fmt ) { `resume(fmt);` } // constructor
    704 void ^?{}( Fmt & f ) with(f) { $\C[2.25in]{// destructor}$
     495void ^?{}( Fmt & f ) with(f) { $\C[1.75in]{// destructor}$
    705496        if ( g != 0 || b != 0 ) sout | nl; }
    706497void `main( Fmt & f )` with(f) {
     
    708499                for ( ; g < 5; g += 1 ) { $\C{// groups}$
    709500                        for ( ; b < 4; b += 1 ) { $\C{// blocks}$
    710                                 do { `suspend;` $\C{// wait for character}$
    711                                 while ( ch == '\n' ); // ignore newline
    712                                 sout | ch;                      $\C{// print character}$
    713                         } sout | " ";  $\C{// block separator}$
    714                 } sout | nl; $\C{// group separator}$
     501                                `suspend;` $\C{// wait for character}$
     502                                while ( ch == '\n' ) `suspend;` // ignore
     503                                sout | ch;                                              // newline
     504                        } sout | " ";  // block spacer
     505                } sout | nl; // group newline
    715506        }
    716507}
     
    730521\begin{cfa}[aboveskip=0pt,belowskip=0pt]
    731522typedef struct {
    732         int `restart`, g, b;
     523        void * next;
    733524        char ch;
     525        int g, b;
    734526} Fmt;
    735527void comain( Fmt * f ) {
    736         `static void * states[] = {&&s0, &&s1};`
    737         `goto *states[f->restart];`
    738   s0: f->`restart` = 1;
     528        if ( f->next ) goto *f->next;
     529        f->next = &&s1;
    739530        for ( ;; ) {
    740531                for ( f->g = 0; f->g < 5; f->g += 1 ) {
    741532                        for ( f->b = 0; f->b < 4; f->b += 1 ) {
    742                                 do { return;  s1: ;
    743                                 } while ( f->ch == '\n' );
     533                                return;
     534                          s1:;  while ( f->ch == '\n' ) return;
    744535                                printf( "%c", f->ch );
    745536                        } printf( " " );
     
    748539}
    749540int main() {
    750         Fmt fmt = { `0` };  comain( &fmt ); // prime
     541        Fmt fmt = { NULL };  comain( &fmt ); // prime
    751542        for ( ;; ) {
    752543                scanf( "%c", &fmt.ch );
     
    759550\end{lrbox}
    760551
    761 \subfloat[\CFA]{\label{f:CFAFormatGen}\usebox\myboxA}
    762 \hspace{35pt}
     552\subfloat[\CFA asymmetric generator]{\label{f:CFAFormatGen}\usebox\myboxA}
     553\hspace{3pt}
    763554\vrule
    764555\hspace{3pt}
    765 \subfloat[C generated code for \CFA version]{\label{f:CFormatGenImpl}\usebox\myboxB}
     556\subfloat[C generator simulation]{\label{f:CFormatSim}\usebox\myboxB}
    766557\hspace{3pt}
    767558\caption{Formatter (input) asymmetric generator}
     
    769560\end{figure}
    770561
    771 Figure~\ref{f:FibonacciAsymmetricGenerator} shows an unbounded asymmetric generator for an infinite sequence of Fibonacci numbers written (left to right) in C, \CFA, and showing the underlying C implementation for the \CFA version.
     562Stateful functions appear as generators, coroutines, and threads, where presentations are based on function objects or pointers~\cite{Butenhof97, C++14, MS:VisualC++, BoostCoroutines15}.
     563For example, Python presents generators as a function object:
     564\begin{python}
     565def Gen():
     566        ... `yield val` ...
     567gen = Gen()
     568for i in range( 10 ):
     569        print( next( gen ) )
     570\end{python}
     571Boost presents coroutines in terms of four functor object-types:
     572\begin{cfa}
     573asymmetric_coroutine<>::pull_type
     574asymmetric_coroutine<>::push_type
     575symmetric_coroutine<>::call_type
     576symmetric_coroutine<>::yield_type
     577\end{cfa}
     578and many languages present threading using function pointers, @pthreads@~\cite{Butenhof97}, \Csharp~\cite{Csharp}, Go~\cite{Go}, and Scala~\cite{Scala}, \eg pthreads:
     579\begin{cfa}
     580void * rtn( void * arg ) { ... }
     581int i = 3, rc;
     582pthread_t t; $\C{// thread id}$
     583`rc = pthread_create( &t, rtn, (void *)i );` $\C{// create and initialized task, type-unsafe input parameter}$
     584\end{cfa}
     585% void mycor( pthread_t cid, void * arg ) {
     586%       int * value = (int *)arg;                               $\C{// type unsafe, pointer-size only}$
     587%       // thread body
     588% }
     589% int main() {
     590%       int input = 0, output;
     591%       coroutine_t cid = coroutine_create( &mycor, (void *)&input ); $\C{// type unsafe, pointer-size only}$
     592%       coroutine_resume( cid, (void *)input, (void **)&output ); $\C{// type unsafe, pointer-size only}$
     593% }
     594\CFA's preferred presentation model for generators/coroutines/threads is a hybrid of objects and functions, with an object-oriented flavour.
     595Essentially, the generator/coroutine/thread function is semantically coupled with a generator/coroutine/thread custom type.
     596The custom type solves several issues, while accessing the underlying mechanisms used by the custom types is still allowed.
     597
     598
     599\subsection{Generator}
     600
     601Stackless generators have the potential to be very small and fast, \ie as small and fast as function call/return for both creation and execution.
     602The \CFA goal is to achieve this performance target, possibly at the cost of some semantic complexity.
     603A series of different kinds of generators and their implementation demonstrate how this goal is accomplished.
     604
     605Figure~\ref{f:FibonacciAsymmetricGenerator} shows an unbounded asymmetric generator for an infinite sequence of Fibonacci numbers written in C and \CFA, with a simple C implementation for the \CFA version.
    772606This generator is an \emph{output generator}, producing a new result on each resumption.
    773607To compute Fibonacci, the previous two values in the sequence are retained to generate the next value, \ie @fn1@ and @fn@, plus the execution location where control restarts when the generator is resumed, \ie top or middle.
     
    777611The C version only has the middle execution state because the top execution state is declaration initialization.
    778612Figure~\ref{f:CFAFibonacciGen} shows the \CFA approach, which also has a manual closure, but replaces the structure with a custom \CFA @generator@ type.
    779 Each generator type must have a function named \lstinline|main|,
    780 % \footnote{
    781 % The name \lstinline|main| has special meaning in C, specifically the function where a program starts execution.
    782 % Leveraging starting semantics to this name for generator/coroutine/thread is a logical extension.}
    783 called a \emph{generator main} (leveraging the starting semantics for program @main@ in C), which is connected to the generator type via its single reference parameter.
     613This generator type is then connected to a function that \emph{must be named \lstinline|main|},\footnote{
     614The name \lstinline|main| has special meaning in C, specifically the function where a program starts execution.
     615Hence, overloading this name for other starting points (generator/coroutine/thread) is a logical extension.}
     616called a \emph{generator main},which takes as its only parameter a reference to the generator type.
    784617The generator main contains @suspend@ statements that suspend execution without ending the generator versus @return@.
    785 For the Fibonacci generator-main,
     618For the Fibonacci generator-main,\footnote{
     619The \CFA \lstinline|with| opens an aggregate scope making its fields directly accessible, like Pascal \lstinline|with|, but using parallel semantics.
     620Multiple aggregates may be opened.}
    786621the top initialization state appears at the start and the middle execution state is denoted by statement @suspend@.
    787622Any local variables in @main@ \emph{are not retained} between calls;
     
    792627Resuming an ended (returned) generator is undefined.
    793628Function @resume@ returns its argument generator so it can be cascaded in an expression, in this case to print the next Fibonacci value @fn@ computed in the generator instance.
    794 Figure~\ref{f:CFibonacciSim} shows the C implementation of the \CFA asymmetric generator.
    795 Only one execution-state field, @restart@, is needed to subscript the suspension points in the generator.
    796 At the start of the generator main, the @static@ declaration, @states@, is initialized to the N suspend points in the generator (where operator @&&@ dereferences/references a label~\cite{gccValueLabels}).
    797 Next, the computed @goto@ selects the last suspend point and branches to it.
    798 The  cost of setting @restart@ and branching via the computed @goto@ adds very little cost to the suspend/resume calls.
    799 
    800 An advantage of the \CFA explicit generator type is the ability to allow multiple type-safe interface functions taking and returning arbitrary types.
     629Figure~\ref{f:CFibonacciSim} shows the C implementation of the \CFA generator only needs one additional field, @next@, to handle retention of execution state.
     630The computed @goto@ at the start of the generator main, which branches after the previous suspend, adds very little cost to the resume call.
     631Finally, an explicit generator type provides both design and performance benefits, such as multiple type-safe interface functions taking and returning arbitrary types.\footnote{
     632The \CFA operator syntax uses \lstinline|?| to denote operands, which allows precise definitions for pre, post, and infix operators, \eg \lstinline|++?|, \lstinline|?++|, and \lstinline|?+?|, in addition \lstinline|?\{\}| denotes a constructor, as in \lstinline|foo `f` = `\{`...`\}`|, \lstinline|^?\{\}| denotes a destructor, and \lstinline|?()| is \CC function call \lstinline|operator()|.
     633}%
    801634\begin{cfa}
    802635int ?()( Fib & fib ) { return `resume( fib )`.fn; } $\C[3.9in]{// function-call interface}$
    803 int ?()( Fib & fib, int N ) { for ( N - 1 ) `fib()`; return `fib()`; } $\C{// add parameter to skip N values}$
    804 double ?()( Fib & fib ) { return (int)`fib()` / 3.14159; } $\C{// different return type, cast prevents recursive call}$
    805 Fib f;  int i;  double d;
    806 i = f();  i = f( 2 );  d = f();                                         $\C{// alternative interfaces}\CRT$
     636int ?()( Fib & fib, int N ) { for ( N - 1 ) `fib()`; return `fib()`; } $\C{// use function-call interface to skip N values}$
     637double ?()( Fib & fib ) { return (int)`fib()` / 3.14159; } $\C{// different return type, cast prevents recursive call}\CRT$
     638sout | (int)f1() | (double)f1() | f2( 2 ); // alternative interface, cast selects call based on return type, step 2 values
    807639\end{cfa}
    808640Now, the generator can be a separately compiled opaque-type only accessed through its interface functions.
    809641For contrast, Figure~\ref{f:PythonFibonacci} shows the equivalent Python Fibonacci generator, which does not use a generator type, and hence only has a single interface, but an implicit closure.
    810642
    811 \begin{figure}
    812 %\centering
    813 \newbox\myboxA
    814 \begin{lrbox}{\myboxA}
    815 \begin{python}[aboveskip=0pt,belowskip=0pt]
    816 def Fib():
    817         fn1, fn = 0, 1
    818         while True:
    819                 `yield fn1`
    820                 fn1, fn = fn, fn1 + fn
    821 f1 = Fib()
    822 f2 = Fib()
    823 for i in range( 10 ):
    824         print( next( f1 ), next( f2 ) )
    825 
    826 
    827 
    828 
    829 
    830 
    831 
    832 
    833 
    834 
    835 \end{python}
    836 \end{lrbox}
    837 
    838 \newbox\myboxB
    839 \begin{lrbox}{\myboxB}
    840 \begin{python}[aboveskip=0pt,belowskip=0pt]
    841 def Fmt():
    842         try:
    843                 while True:                                             $\C[2.5in]{\# until destructor call}$
    844                         for g in range( 5 ):            $\C{\# groups}$
    845                                 for b in range( 4 ):    $\C{\# blocks}$
    846                                         while True:
    847                                                 ch = (yield)    $\C{\# receive from send}$
    848                                                 if '\n' not in ch: $\C{\# ignore newline}$
    849                                                         break
    850                                         print( ch, end='' )     $\C{\# print character}$
    851                                 print( '  ', end='' )   $\C{\# block separator}$
    852                         print()                                         $\C{\# group separator}$
    853         except GeneratorExit:                           $\C{\# destructor}$
    854                 if g != 0 | b != 0:                             $\C{\# special case}$
    855                         print()
    856 fmt = Fmt()
    857 `next( fmt )`                                                   $\C{\# prime, next prewritten}$
    858 for i in range( 41 ):
    859         `fmt.send( 'a' );`                                      $\C{\# send to yield}$
    860 \end{python}
    861 \end{lrbox}
    862 
    863 \hspace{30pt}
    864 \subfloat[Fibonacci]{\label{f:PythonFibonacci}\usebox\myboxA}
    865 \hspace{3pt}
    866 \vrule
    867 \hspace{3pt}
    868 \subfloat[Formatter]{\label{f:PythonFormatter}\usebox\myboxB}
    869 \caption{Python generator}
    870 \label{f:PythonGenerator}
    871 \end{figure}
    872 
    873 Having to manually create the generator closure by moving local-state variables into the generator type is an additional programmer burden (removed by the coroutine in Section~\ref{s:Coroutine}).
    874 This manual requirement follows from the generality of allowing variable-size local-state, \eg local state with a variable-length array requires dynamic allocation as the array size is unknown at compile time.
     643Having to manually create the generator closure by moving local-state variables into the generator type is an additional programmer burden.
     644(This restriction is removed by the coroutine in Section~\ref{s:Coroutine}.)
     645This requirement follows from the generality of variable-size local-state, \eg local state with a variable-length array requires dynamic allocation because the array size is unknown at compile time.
    875646However, dynamic allocation significantly increases the cost of generator creation/destruction and is a showstopper for embedded real-time programming.
    876647But more importantly, the size of the generator type is tied to the local state in the generator main, which precludes separate compilation of the generator main, \ie a generator must be inlined or local state must be dynamically allocated.
    877 With respect to safety, we believe static analysis can discriminate persistent generator state from temporary generator-main state and raise a compile-time error for temporary usage spanning suspend points.
    878 Our experience using generators is that the problems have simple data state, including local state, but complex execution state, so the burden of creating the generator type is small.
     648With respect to safety, we believe static analysis can discriminate local state from temporary variables in a generator, \ie variable usage spanning @suspend@, and generate a compile-time error.
     649Finally, our current experience is that most generator problems have simple data state, including local state, but complex execution state, so the burden of creating the generator type is small.
    879650As well, C programmers are not afraid of this kind of semantic programming requirement, if it results in very small, fast generators.
    880651
     
    898669The example takes advantage of resuming a generator in the constructor to prime the loops so the first character sent for formatting appears inside the nested loops.
    899670The destructor provides a newline, if formatted text ends with a full line.
    900 Figure~\ref{f:CFormatGenImpl} shows the C implementation of the \CFA input generator with one additional field and the computed @goto@.
    901 For contrast, Figure~\ref{f:PythonFormatter} shows the equivalent Python format generator with the same properties as the format generator.
    902 
    903 % https://dl-acm-org.proxy.lib.uwaterloo.ca/
    904 
    905 Figure~\ref{f:DeviceDriverGen} shows an important application for an asymmetric generator, a device-driver, because device drivers are a significant source of operating-system errors: 85\% in Windows XP~\cite[p.~78]{Swift05} and 51.6\% in Linux~\cite[p.~1358,]{Xiao19}. %\cite{Palix11}
    906 Swift \etal~\cite[p.~86]{Swift05} restructure device drivers using the Extension Procedure Call (XPC) within the kernel via functions @nooks_driver_call@ and @nooks_kernel_call@, which have coroutine properties context switching to separate stacks with explicit hand-off calls;
    907 however, the calls do not retain execution state, and hence always start from the top.
    908 The alternative approach for implementing device drivers is using stack-ripping.
    909 However, Adya \etal~\cite{Adya02} argue against stack ripping in Section 3.2 and suggest a hybrid approach in Section 4 using cooperatively scheduled \emph{fibers}, which is coroutining.
    910 
    911 As an example, the following protocol:
     671Figure~\ref{f:CFormatSim} shows the C implementation of the \CFA input generator with one additional field and the computed @goto@.
     672For contrast, Figure~\ref{f:PythonFormatter} shows the equivalent Python format generator with the same properties as the Fibonacci generator.
     673
     674Figure~\ref{f:DeviceDriverGen} shows a \emph{killer} asymmetric generator, a device-driver, because device drivers caused 70\%-85\% of failures in Windows/Linux~\cite{Swift05}.
     675Device drives follow the pattern of simple data state but complex execution state, \ie finite state-machine (FSM) parsing a protocol.
     676For example, the following protocol:
    912677\begin{center}
    913678\ldots\, STX \ldots\, message \ldots\, ESC ETX \ldots\, message \ldots\, ETX 2-byte crc \ldots
    914679\end{center}
    915 is for a simple network message beginning with the control character STX, ending with an ETX, and followed by a 2-byte cyclic-redundancy check.
     680is a network message beginning with the control character STX, ending with an ETX, and followed by a 2-byte cyclic-redundancy check.
    916681Control characters may appear in a message if preceded by an ESC.
    917682When a message byte arrives, it triggers an interrupt, and the operating system services the interrupt by calling the device driver with the byte read from a hardware register.
    918 The device driver returns a status code of its current state, and when a complete message is obtained, the operating system read the message accumulated in the supplied buffer.
    919 Hence, the device driver is an input/output generator, where the cost of resuming the device-driver generator is the same as call/return, so performance in an operating-system kernel is excellent.
    920 The key benefits of using a generator are correctness, safety, and maintenance because the execution states are transcribed directly into the programming language rather than table lookup or stack ripping.
    921 The conclusion is that FSMs are complex and occur in important domains, so direct generator support is important in a system programming language.
     683The device driver returns a status code of its current state, and when a complete message is obtained, the operating system knows the message is in the message buffer.
     684Hence, the device driver is an input/output generator.
     685
     686Note, the cost of creating and resuming the device-driver generator, @Driver@, is virtually identical to call/return, so performance in an operating-system kernel is excellent.
     687As well, the data state is small, where variables @byte@ and @msg@ are communication variables for passing in message bytes and returning the message, and variables @lnth@, @crc@, and @sum@ are local variable that must be retained between calls and are manually hoisted into the generator type.
     688% Manually, detecting and hoisting local-state variables is easy when the number is small.
     689In contrast, the execution state is large, with one @resume@ and seven @suspend@s.
     690Hence, the key benefits of the generator are correctness, safety, and maintenance because the execution states are transcribed directly into the programming language rather than using a table-driven approach.
     691Because FSMs can be complex and frequently occur in important domains, direct generator support is important in a system programming language.
    922692
    923693\begin{figure}
    924694\centering
     695\newbox\myboxA
     696\begin{lrbox}{\myboxA}
     697\begin{python}[aboveskip=0pt,belowskip=0pt]
     698def Fib():
     699        fn1, fn = 0, 1
     700        while True:
     701                `yield fn1`
     702                fn1, fn = fn, fn1 + fn
     703f1 = Fib()
     704f2 = Fib()
     705for i in range( 10 ):
     706        print( next( f1 ), next( f2 ) )
     707
     708
     709
     710
     711
     712
     713\end{python}
     714\end{lrbox}
     715
     716\newbox\myboxB
     717\begin{lrbox}{\myboxB}
     718\begin{python}[aboveskip=0pt,belowskip=0pt]
     719def Fmt():
     720        try:
     721                while True:
     722                        for g in range( 5 ):
     723                                for b in range( 4 ):
     724                                        print( `(yield)`, end='' )
     725                                print( '  ', end='' )
     726                        print()
     727        except GeneratorExit:
     728                if g != 0 | b != 0:
     729                        print()
     730fmt = Fmt()
     731`next( fmt )`                    # prime, next prewritten
     732for i in range( 41 ):
     733        `fmt.send( 'a' );`      # send to yield
     734\end{python}
     735\end{lrbox}
     736\subfloat[Fibonacci]{\label{f:PythonFibonacci}\usebox\myboxA}
     737\hspace{3pt}
     738\vrule
     739\hspace{3pt}
     740\subfloat[Formatter]{\label{f:PythonFormatter}\usebox\myboxB}
     741\caption{Python generator}
     742\label{f:PythonGenerator}
     743
     744\bigskip
     745
    925746\begin{tabular}{@{}l|l@{}}
    926747\begin{cfa}[aboveskip=0pt,belowskip=0pt]
     
    929750`generator` Driver {
    930751        Status status;
    931         char byte, * msg; // communication
    932         int lnth, sum;      // local state
    933         short int crc;
     752        unsigned char byte, * msg; // communication
     753        unsigned int lnth, sum;      // local state
     754        unsigned short int crc;
    934755};
    935756void ?{}( Driver & d, char * m ) { d.msg = m; }
     
    979800(The trivial cycle is a generator resuming itself.)
    980801This control flow is similar to recursion for functions but without stack growth.
    981 Figure~\ref{f:PingPongFullCoroutineSteps} shows the steps for symmetric control-flow are creating, executing, and terminating the cycle.
     802The steps for symmetric control-flow are creating, executing, and terminating the cycle.
    982803Constructing the cycle must deal with definition-before-use to close the cycle, \ie, the first generator must know about the last generator, which is not within scope.
    983804(This issue occurs for any cyclic data structure.)
    984 The example creates the generators, @ping@/@pong@, and then assigns the partners that form the cycle.
    985 % (Alternatively, the constructor can assign the partners as they are declared, except the first, and the first-generator partner is set after the last generator declaration to close the cycle.)
    986 Once the cycle is formed, the program main resumes one of the generators, @ping@, and the generators can then traverse an arbitrary cycle using @resume@ to activate partner generator(s).
     805% The example creates all the generators and then assigns the partners that form the cycle.
     806% Alternatively, the constructor can assign the partners as they are declared, except the first, and the first-generator partner is set after the last generator declaration to close the cycle.
     807Once the cycle is formed, the program main resumes one of the generators, and the generators can then traverse an arbitrary cycle using @resume@ to activate partner generator(s).
    987808Terminating the cycle is accomplished by @suspend@ or @return@, both of which go back to the stack frame that started the cycle (program main in the example).
    988 Note, the creator and starter may be different, \eg if the creator calls another function that starts the cycle.
    989809The starting stack-frame is below the last active generator because the resume/resume cycle does not grow the stack.
    990 Also, since local variables are not retained in the generator function, there are no objects with destructors to be called, so the cost is the same as a function return.
    991 Destructor cost occurs when the generator instance is deallocated by the creator.
     810Also, since local variables are not retained in the generator function, it does not contain any objects with destructors that must be called, so the  cost is the same as a function return.
     811Destructor cost occurs when the generator instance is deallocated, which is easily controlled by the programmer.
     812
     813Figure~\ref{f:CPingPongSim} shows the implementation of the symmetric generator, where the complexity is the @resume@, which needs an extension to the calling convention to perform a forward rather than backward jump.
     814This jump-starts at the top of the next generator main to re-execute the normal calling convention to make space on the stack for its local variables.
     815However, before the jump, the caller must reset its stack (and any registers) equivalent to a @return@, but subsequently jump forward.
     816This semantics is basically a tail-call optimization, which compilers already perform.
     817The example shows the assembly code to undo the generator's entry code before the direct jump.
     818This assembly code depends on what entry code is generated, specifically if there are local variables and the level of optimization.
     819To provide this new calling convention requires a mechanism built into the compiler, which is beyond the scope of \CFA at this time.
     820Nevertheless, it is possible to hand generate any symmetric generators for proof of concept and performance testing.
     821A compiler could also eliminate other artifacts in the generator simulation to further increase performance, \eg LLVM has various coroutine support~\cite{CoroutineTS}, and \CFA can leverage this support should it fork @clang@.
    992822
    993823\begin{figure}
     
    996826\begin{cfa}[aboveskip=0pt,belowskip=0pt]
    997827`generator PingPong` {
    998         int N, i;                               // local state
    999828        const char * name;
     829        int N;
     830        int i;                          // local state
    1000831        PingPong & partner; // rebindable reference
    1001832};
    1002833
    1003834void `main( PingPong & pp )` with(pp) {
    1004 
    1005 
    1006835        for ( ; i < N; i += 1 ) {
    1007836                sout | name | i;
     
    1021850\begin{cfa}[escapechar={},aboveskip=0pt,belowskip=0pt]
    1022851typedef struct PingPong {
    1023         int restart, N, i;
    1024852        const char * name;
     853        int N, i;
    1025854        struct PingPong * partner;
     855        void * next;
    1026856} PingPong;
    1027 #define PPCtor(name, N) {0, N, 0, name, NULL}
     857#define PPCtor(name, N) {name,N,0,NULL,NULL}
    1028858void comain( PingPong * pp ) {
    1029         static void * states[] = {&&s0, &&s1};
    1030         goto *states[pp->restart];
    1031   s0: pp->restart = 1;
     859        if ( pp->next ) goto *pp->next;
     860        pp->next = &&cycle;
    1032861        for ( ; pp->i < pp->N; pp->i += 1 ) {
    1033862                printf( "%s %d\n", pp->name, pp->i );
    1034863                asm( "mov  %0,%%rdi" : "=m" (pp->partner) );
    1035864                asm( "mov  %rdi,%rax" );
    1036                 asm( "add  $16, %rsp" );
    1037                 asm( "popq %rbp" );
     865                asm( "popq %rbx" );
    1038866                asm( "jmp  comain" );
    1039           s1: ;
     867          cycle: ;
    1040868        }
    1041869}
     
    1053881\end{figure}
    1054882
    1055 \begin{figure}
    1056 \centering
    1057 \input{FullCoroutinePhases.pstex_t}
    1058 \vspace*{-10pt}
    1059 \caption{Symmetric coroutine steps: Ping / Pong}
    1060 \label{f:PingPongFullCoroutineSteps}
    1061 \end{figure}
    1062 
    1063 Figure~\ref{f:CPingPongSim} shows the C implementation of the \CFA symmetric generator, where there is still only one additional field, @restart@, but @resume@ is more complex because it does a forward rather than backward jump.
    1064 Before the jump, the parameter for the next call @partner@ is placed into the register used for the first parameter, @rdi@, and the remaining registers are reset for a return.
    1065 The @jmp comain@ restarts the function but with a different parameter, so the new call's behaviour depends on the state of the coroutine type, i.e., branch to restart location with different data state.
    1066 While the semantics of call forward is a tail-call optimization, which compilers perform, the generator state is different on each call rather a common state for a tail-recursive function (i.e., the parameter to the function never changes during the forward calls.
    1067 However, this assembler code depends on what entry code is generated, specifically if there are local variables and the level of optimization.
    1068 Hence, internal compiler support is necessary for any forward call (or backwards return), \eg LLVM has various coroutine support~\cite{CoroutineTS}, and \CFA can leverage this support should it eventually fork @clang@.
    1069 For this reason, \CFA does not support general symmetric generators at this time, but, it is possible to hand generate any symmetric generators (as in Figure~\ref{f:CPingPongSim}) for proof of concept and performance testing.
    1070 
    1071 Finally, part of this generator work was inspired by the recent \CCtwenty coroutine proposal~\cite{C++20Coroutine19}, which uses the general term coroutine to mean generator.
     883Finally, part of this generator work was inspired by the recent \CCtwenty generator proposal~\cite{C++20Coroutine19} (which they call coroutines).
    1072884Our work provides the same high-performance asymmetric generators as \CCtwenty, and extends their work with symmetric generators.
    1073885An additional \CCtwenty generator feature allows @suspend@ and @resume@ to be followed by a restricted compound statement that is executed after the current generator has reset its stack but before calling the next generator, specified with \CFA syntax:
     
    1084896\label{s:Coroutine}
    1085897
    1086 Stackful coroutines (Table~\ref{t:ExecutionPropertyComposition} case 5) extend generator semantics, \ie there is an implicit closure and @suspend@ may appear in a helper function called from the coroutine main.
     898Stackful coroutines extend generator semantics, \ie there is an implicit closure and @suspend@ may appear in a helper function called from the coroutine main.
    1087899A coroutine is specified by replacing @generator@ with @coroutine@ for the type.
    1088 Coroutine generality results in higher cost for creation, due to dynamic stack allocation, for execution, due to context switching among stacks, and for terminating, due to possible stack unwinding and dynamic stack deallocation.
     900Coroutine generality results in higher cost for creation, due to dynamic stack allocation, execution, due to context switching among stacks, and terminating, due to possible stack unwinding and dynamic stack deallocation.
    1089901A series of different kinds of coroutines and their implementations demonstrate how coroutines extend generators.
    1090902
    1091903First, the previous generator examples are converted to their coroutine counterparts, allowing local-state variables to be moved from the generator type into the coroutine main.
    1092 \begin{center}
    1093 \begin{tabular}{@{}l|l|l|l@{}}
    1094 \multicolumn{1}{c|}{Fibonacci} & \multicolumn{1}{c|}{Formatter} & \multicolumn{1}{c|}{Device Driver} & \multicolumn{1}{c}{PingPong} \\
    1095 \hline
     904\begin{description}
     905\item[Fibonacci]
     906Move the declaration of @fn1@ to the start of coroutine main.
    1096907\begin{cfa}[xleftmargin=0pt]
    1097 void main( Fib & fib ) ...
     908void main( Fib & fib ) with(fib) {
    1098909        `int fn1;`
    1099 
    1100 
    1101 \end{cfa}
    1102 &
     910\end{cfa}
     911\item[Formatter]
     912Move the declaration of @g@ and @b@ to the for loops in the coroutine main.
    1103913\begin{cfa}[xleftmargin=0pt]
    1104914for ( `g`; 5 ) {
    1105915        for ( `b`; 4 ) {
    1106 
    1107 
    1108 \end{cfa}
    1109 &
     916\end{cfa}
     917\item[Device Driver]
     918Move the declaration of @lnth@ and @sum@ to their points of initialization.
    1110919\begin{cfa}[xleftmargin=0pt]
    1111 status = CONT;
    1112 `int lnth = 0, sum = 0;`
    1113 ...
    1114 `short int crc = byte << 8;`
    1115 \end{cfa}
    1116 &
     920        status = CONT;
     921        `unsigned int lnth = 0, sum = 0;`
     922        ...
     923        `unsigned short int crc = byte << 8;`
     924\end{cfa}
     925\item[PingPong]
     926Move the declaration of @i@ to the for loop in the coroutine main.
    1117927\begin{cfa}[xleftmargin=0pt]
    1118 void main( PingPong & pp ) ...
     928void main( PingPong & pp ) with(pp) {
    1119929        for ( `i`; N ) {
    1120 
    1121 
    1122 \end{cfa}
    1123 \end{tabular}
    1124 \end{center}
     930\end{cfa}
     931\end{description}
    1125932It is also possible to refactor code containing local-state and @suspend@ statements into a helper function, like the computation of the CRC for the device driver.
    1126933\begin{cfa}
    1127 int Crc() {
     934unsigned int Crc() {
    1128935        `suspend;`
    1129         short int crc = byte << 8;
     936        unsigned short int crc = byte << 8;
    1130937        `suspend;`
    1131938        status = (crc | byte) == sum ? MSG : ECRC;
     
    1138945
    1139946\begin{comment}
    1140 Figure~\ref{f:Coroutine3States} creates a @coroutine@ type, @`coroutine` Fib { int fn; }@, which provides communication, @fn@, for the \newterm{coroutine main}, @main@, which runs on the coroutine stack, and possibly multiple interface functions, \eg @restart@.
     947Figure~\ref{f:Coroutine3States} creates a @coroutine@ type, @`coroutine` Fib { int fn; }@, which provides communication, @fn@, for the \newterm{coroutine main}, @main@, which runs on the coroutine stack, and possibly multiple interface functions, \eg @next@.
    1141948Like the structure in Figure~\ref{f:ExternalState}, the coroutine type allows multiple instances, where instances of this type are passed to the (overloaded) coroutine main.
    1142949The coroutine main's stack holds the state for the next generation, @f1@ and @f2@, and the code represents the three states in the Fibonacci formula via the three suspend points, to context switch back to the caller's @resume@.
    1143 The interface function @restart@, takes a Fibonacci instance and context switches to it using @resume@;
     950The interface function @next@, takes a Fibonacci instance and context switches to it using @resume@;
    1144951on restart, the Fibonacci field, @fn@, contains the next value in the sequence, which is returned.
    1145952The first @resume@ is special because it allocates the coroutine stack and cocalls its coroutine main on that stack;
     
    13071114\begin{figure}
    13081115\centering
     1116\lstset{language=CFA,escapechar={},moredelim=**[is][\protect\color{red}]{`}{`}}% allow $
    13091117\begin{tabular}{@{}l@{\hspace{2\parindentlnth}}l@{}}
    13101118\begin{cfa}
    13111119`coroutine` Prod {
    1312         Cons & c;                       $\C[1.5in]{// communication}$
     1120        Cons & c;                       // communication
    13131121        int N, money, receipt;
    13141122};
    13151123void main( Prod & prod ) with( prod ) {
    1316         for ( i; N ) {          $\C{// 1st resume}\CRT$
     1124        // 1st resume starts here
     1125        for ( i; N ) {
    13171126                int p1 = random( 100 ), p2 = random( 100 );
     1127                sout | p1 | " " | p2;
    13181128                int status = delivery( c, p1, p2 );
     1129                sout | " $" | money | nl | status;
    13191130                receipt += 1;
    13201131        }
    13211132        stop( c );
     1133        sout | "prod stops";
    13221134}
    13231135int payment( Prod & prod, int money ) {
     
    13401152\begin{cfa}
    13411153`coroutine` Cons {
    1342         Prod & p;                       $\C[1.5in]{// communication}$
     1154        Prod & p;                       // communication
    13431155        int p1, p2, status;
    13441156        bool done;
    13451157};
    13461158void ?{}( Cons & cons, Prod & p ) {
    1347         &cons.p = &p;           $\C{// reassignable reference}$
     1159        &cons.p = &p; // reassignable reference
    13481160        cons.[status, done ] = [0, false];
    13491161}
    13501162void main( Cons & cons ) with( cons ) {
    1351         int money = 1, receipt; $\C{// 1st resume}\CRT$
     1163        // 1st resume starts here
     1164        int money = 1, receipt;
    13521165        for ( ; ! done; ) {
     1166                sout | p1 | " " | p2 | nl | " $" | money;
    13531167                status += 1;
    13541168                receipt = payment( p, money );
     1169                sout | " #" | receipt;
    13551170                money += 1;
    13561171        }
     1172        sout | "cons stops";
    13571173}
    13581174int delivery( Cons & cons, int p1, int p2 ) {
     
    13751191This example is illustrative because both producer/consumer have two interface functions with @resume@s that suspend execution in these interface (helper) functions.
    13761192The program main creates the producer coroutine, passes it to the consumer coroutine in its initialization, and closes the cycle at the call to @start@ along with the number of items to be produced.
    1377 The call to @start@ is the first @resume@ of @prod@, which remembers the program main as the starter and creates @prod@'s stack with a frame for @prod@'s coroutine main at the top, and context switches to it.
    1378 @prod@'s coroutine main starts, creates local-state variables that are retained between coroutine activations, and executes $N$ iterations, each generating two random values, calling the consumer's @deliver@ function to transfer the values, and printing the status returned from the consumer.
     1193The first @resume@ of @prod@ creates @prod@'s stack with a frame for @prod@'s coroutine main at the top, and context switches to it.
     1194@prod@'s coroutine main starts, creates local-state variables that are retained between coroutine activations, and executes $N$ iterations, each generating two random values, calling the consumer to deliver the values, and printing the status returned from the consumer.
     1195
    13791196The producer call to @delivery@ transfers values into the consumer's communication variables, resumes the consumer, and returns the consumer status.
    1380 Similarly on the first resume, @cons@'s stack is created and initialized, holding local-state variables retained between subsequent activations of the coroutine.
    1381 The symmetric coroutine cycle forms when the consumer calls the producer's @payment@ function, which resumes the producer in the consumer's delivery function.
    1382 When the producer calls @delivery@ again, it resumes the consumer in the @payment@ function.
    1383 Both interface function than return to the their corresponding coroutine-main functions for the next cycle.
     1197On the first resume, @cons@'s stack is created and initialized, holding local-state variables retained between subsequent activations of the coroutine.
     1198The consumer iterates until the @done@ flag is set, prints the values delivered by the producer, increments status, and calls back to the producer via @payment@, and on return from @payment@, prints the receipt from the producer and increments @money@ (inflation).
     1199The call from the consumer to @payment@ introduces the cycle between producer and consumer.
     1200When @payment@ is called, the consumer copies values into the producer's communication variable and a resume is executed.
     1201The context switch restarts the producer at the point where it last context switched, so it continues in @delivery@ after the resume.
     1202@delivery@ returns the status value in @prod@'s coroutine main, where the status is printed.
     1203The loop then repeats calling @delivery@, where each call resumes the consumer coroutine.
     1204The context switch to the consumer continues in @payment@.
     1205The consumer increments and returns the receipt to the call in @cons@'s coroutine main.
     1206The loop then repeats calling @payment@, where each call resumes the producer coroutine.
    13841207Figure~\ref{f:ProdConsRuntimeStacks} shows the runtime stacks of the program main, and the coroutine mains for @prod@ and @cons@ during the cycling.
    1385 As a consequence of a coroutine retaining its last resumer for suspending back, these reverse pointers allow @suspend@ to cycle \emph{backwards} around a symmetric coroutine cycle.
    13861208
    13871209\begin{figure}
     
    13921214\caption{Producer / consumer runtime stacks}
    13931215\label{f:ProdConsRuntimeStacks}
     1216
     1217\medskip
     1218
     1219\begin{center}
     1220\input{FullCoroutinePhases.pstex_t}
     1221\end{center}
     1222\vspace*{-10pt}
     1223\caption{Ping / Pong coroutine steps}
     1224\label{f:PingPongFullCoroutineSteps}
    13941225\end{figure}
    13951226
    13961227Terminating a coroutine cycle is more complex than a generator cycle, because it requires context switching to the program main's \emph{stack} to shutdown the program, whereas generators started by the program main run on its stack.
    1397 Furthermore, each deallocated coroutine must execute all destructors for object allocated in the coroutine type \emph{and} allocated on the coroutine's stack at the point of suspension, which can be arbitrarily deep.
    1398 In the example, termination begins with the producer's loop stopping after N iterations and calling the consumer's @stop@ function, which sets the @done@ flag, resumes the consumer in function @payment@, terminating the call, and the consumer's loop in its coroutine main.
    1399 % (Not shown is having @prod@ raise a nonlocal @stop@ exception at @cons@ after it finishes generating values and suspend back to @cons@, which catches the @stop@ exception to terminate its loop.)
    1400 When the consumer's main ends, its stack is already unwound so any stack allocated objects with destructors are finalized.
    1401 The question now is where does control continue?
    1402 
     1228Furthermore, each deallocated coroutine must guarantee all destructors are run for object allocated in the coroutine type \emph{and} allocated on the coroutine's stack at the point of suspension, which can be arbitrarily deep.
     1229When a coroutine's main ends, its stack is already unwound so any stack allocated objects with destructors have been finalized.
    14031230The na\"{i}ve semantics for coroutine-cycle termination is to context switch to the last resumer, like executing a @suspend@/@return@ in a generator.
    14041231However, for coroutines, the last resumer is \emph{not} implicitly below the current stack frame, as for generators, because each coroutine's stack is independent.
    14051232Unfortunately, it is impossible to determine statically if a coroutine is in a cycle and unrealistic to check dynamically (graph-cycle problem).
    14061233Hence, a compromise solution is necessary that works for asymmetric (acyclic) and symmetric (cyclic) coroutines.
    1407 Our solution is to retain a coroutine's starter (first resumer), and context switch back to the starter when the coroutine ends.
    1408 Hence, the consumer restarts its first resumer, @prod@, in @stop@, and when the producer ends, it restarts its first resumer, program main, in @start@ (see dashed lines from the end of the coroutine mains in Figure~\ref{f:ProdConsRuntimeStacks}).
     1234
     1235Our solution is to context switch back to the first resumer (starter) once the coroutine ends.
    14091236This semantics works well for the most common asymmetric and symmetric coroutine usage patterns.
    1410 For asymmetric coroutines, it is common for the first resumer (starter) coroutine to be the only resumer;
    1411 for symmetric coroutines, it is common for the cycle creator to persist for the lifetime of the cycle.
     1237For asymmetric coroutines, it is common for the first resumer (starter) coroutine to be the only resumer.
     1238All previous generators converted to coroutines have this property.
     1239For symmetric coroutines, it is common for the cycle creator to persist for the lifetime of the cycle.
     1240Hence, the starter coroutine is remembered on the first resume and ending the coroutine resumes the starter.
     1241Figure~\ref{f:ProdConsRuntimeStacks} shows this semantic by the dashed lines from the end of the coroutine mains: @prod@ starts @cons@ so @cons@ resumes @prod@ at the end, and the program main starts @prod@ so @prod@ resumes the program main at the end.
    14121242For other scenarios, it is always possible to devise a solution with additional programming effort, such as forcing the cycle forward (backward) to a safe point before starting termination.
    14131243
    1414 Note, the producer/consumer example does not illustrate the full power of the starter semantics because @cons@ always ends first.
    1415 Assume generator @PingPong@ in Figure~\ref{f:PingPongSymmetricGenerator} is converted to a coroutine.
    1416 Unlike generators, coroutines have a starter structure with multiple levels, where the program main starts @ping@ and @ping@ starts @pong@.
    1417 By adjusting $N$ for either @ping@/@pong@, it is possible to have either finish first.
    1418 If @pong@ ends first, it resumes its starter @ping@ in its coroutine main, then @ping@ ends and resumes its starter the program main on return;
    1419 if @ping@ ends first, it resumes its starter the program main on return.
    1420 Regardless of the cycle complexity, the starter structure always leads back to the program main, but the path can be entered at an arbitrary point.
    1421 Once back at the program main (creator), coroutines @ping@ and @pong@ are deallocated, runnning any destructors for objects within the coroutine and possibly deallocating any coroutine stacks for non-terminated coroutines, where stack deallocation implies stack unwinding to find destructors for allocated objects on the stack.
    1422 Hence, the \CFA termination semantics for the generator and coroutine ensure correct deallocation semnatics, regardless of the coroutine's state (terminated or active), like any other aggregate object.
     1244The producer/consumer example does not illustrate the full power of the starter semantics because @cons@ always ends first.
     1245Assume generator @PingPong@ is converted to a coroutine.
     1246Figure~\ref{f:PingPongFullCoroutineSteps} shows the creation, starter, and cyclic execution steps of the coroutine version.
     1247The program main creates (declares) coroutine instances @ping@ and @pong@.
     1248Next, program main resumes @ping@, making it @ping@'s starter, and @ping@'s main resumes @pong@'s main, making it @pong@'s starter.
     1249Execution forms a cycle when @pong@ resumes @ping@, and cycles $N$ times.
     1250By adjusting $N$ for either @ping@/@pong@, it is possible to have either one finish first, instead of @pong@ always ending first.
     1251If @pong@ ends first, it resumes its starter @ping@ in its coroutine main, then @ping@ ends and resumes its starter the program main in function @start@.
     1252If @ping@ ends first, it resumes its starter the program main in function @start@.
     1253Regardless of the cycle complexity, the starter stack always leads back to the program main, but the stack can be entered at an arbitrary point.
     1254Once back at the program main, coroutines @ping@ and @pong@ are deallocated.
     1255For generators, deallocation runs the destructors for all objects in the generator type.
     1256For coroutines, deallocation deals with objects in the coroutine type and must also run the destructors for any objects pending on the coroutine's stack for any unterminated coroutine.
     1257Hence, if a coroutine's destructor detects the coroutine is not ended, it implicitly raises a cancellation exception (uncatchable exception) at the coroutine and resumes it so the cancellation exception can propagate to the root of the coroutine's stack destroying all local variable on the stack.
     1258So the \CFA semantics for the generator and coroutine, ensure both can be safely deallocated at any time, regardless of their current state, like any other aggregate object.
     1259Explicitly raising normal exceptions at another coroutine can replace flag variables, like @stop@, \eg @prod@ raises a @stop@ exception at @cons@ after it finishes generating values and resumes @cons@, which catches the @stop@ exception to terminate its loop.
     1260
     1261Finally, there is an interesting effect for @suspend@ with symmetric coroutines.
     1262A coroutine must retain its last resumer to suspend back because the resumer is on a different stack.
     1263These reverse pointers allow @suspend@ to cycle \emph{backwards}, which may be useful in certain cases.
     1264However, there is an anomaly if a coroutine resumes itself, because it overwrites its last resumer with itself, losing the ability to resume the last external resumer.
     1265To prevent losing this information, a self-resume does not overwrite the last resumer.
    14231266
    14241267
     
    14511294Users wanting to extend custom types or build their own can only do so in ways offered by the language.
    14521295Furthermore, implementing custom types without language support may display the power of a programming language.
    1453 \CFA blends the two approaches, providing custom type for idiomatic \CFA code, while extending and building new custom types is still possible, similar to Java concurrency with builtin and library (@java.util.concurrent@) monitors.
     1296\CFA blends the two approaches, providing custom type for idiomatic \CFA code, while extending and building new custom types is still possible, similar to Java concurrency with builtin and library.
    14541297
    14551298Part of the mechanism to generalize custom types is the \CFA trait~\cite[\S~2.3]{Moss18}, \eg the definition for custom-type @coroutine@ is anything satisfying the trait @is_coroutine@, and this trait both enforces and restricts the coroutine-interface functions.
     
    14611304forall( `dtype` T | is_coroutine(T) ) void $suspend$( T & ), resume( T & );
    14621305\end{cfa}
    1463 Note, copying generators/coroutines/threads is undefined because muliple objects cannot execute on a shared stack and stack copying does not work in unmanaged languages (no garbage collection), like C, because the stack may contain pointers to objects within it that require updating for the copy.
     1306Note, copying generators/coroutines/threads is not meaningful.
     1307For example, both the resumer and suspender descriptors can have bidirectional pointers;
     1308copying these coroutines does not update the internal pointers so behaviour of both copies would be difficult to understand.
     1309Furthermore, two coroutines cannot logically execute on the same stack.
     1310A deep coroutine copy, which copies the stack, is also meaningless in an unmanaged language (no garbage collection), like C, because the stack may contain pointers to object within it that require updating for the copy.
    14641311The \CFA @dtype@ property provides no \emph{implicit} copying operations and the @is_coroutine@ trait provides no \emph{explicit} copying operations, so all coroutines must be passed by reference (pointer).
    14651312The function definitions ensure there is a statically typed @main@ function that is the starting point (first stack frame) of a coroutine, and a mechanism to get (read) the coroutine descriptor from its handle.
     
    15051352The combination of custom types and fundamental @trait@ description of these types allows a concise specification for programmers and tools, while more advanced programmers can have tighter control over memory layout and initialization.
    15061353
    1507 Figure~\ref{f:CoroutineMemoryLayout} shows different memory-layout options for a coroutine (where a thread is similar).
     1354Figure~\ref{f:CoroutineMemoryLayout} shows different memory-layout options for a coroutine (where a task is similar).
    15081355The coroutine handle is the @coroutine@ instance containing programmer specified type global/communication variables across interface functions.
    15091356The coroutine descriptor contains all implicit declarations needed by the runtime, \eg @suspend@/@resume@, and can be part of the coroutine handle or separate.
    15101357The coroutine stack can appear in a number of locations and be fixed or variable sized.
    1511 Hence, the coroutine's stack could be a variable-length structure (VLS)\footnote{
    1512 We are examining VLSs, where fields can be variable-sized structures or arrays.
     1358Hence, the coroutine's stack could be a VLS\footnote{
     1359We are examining variable-sized structures (VLS), where fields can be variable-sized structures or arrays.
    15131360Once allocated, a VLS is fixed sized.}
    15141361on the allocating stack, provided the allocating stack is large enough.
    15151362For a VLS stack allocation/deallocation is an inexpensive adjustment of the stack pointer, modulo any stack constructor costs (\eg initial frame setup).
    1516 For stack allocation in the heap, allocation/deallocation is an expensive allocation, where the heap can be a shared resource, modulo any stack constructor costs.
    1517 It is also possible to use a split (segmented) stack calling convention, available with gcc and clang, allowing a variable-sized stack via a set of connected blocks in the heap.
     1363For heap stack allocation, allocation/deallocation is an expensive heap allocation (where the heap can be a shared resource), modulo any stack constructor costs.
     1364With heap stack allocation, it is also possible to use a split (segmented) stack calling convention, available with gcc and clang, so the stack is variable sized.
    15181365Currently, \CFA supports stack/heap allocated descriptors but only fixed-sized heap allocated stacks.
    15191366In \CFA debug-mode, the fixed-sized stack is terminated with a write-only page, which catches most stack overflows.
    15201367Experience teaching concurrency with \uC~\cite{CS343} shows fixed-sized stacks are rarely an issue for students.
    1521 Split-stack allocation is under development but requires recompilation of legacy code, which is not always possible.
     1368Split-stack allocation is under development but requires recompilation of legacy code, which may be impossible.
    15221369
    15231370\begin{figure}
     
    15331380
    15341381Concurrency is nondeterministic scheduling of independent sequential execution paths (threads), where each thread has its own stack.
    1535 A single thread with multiple stacks, \ie coroutining, does \emph{not} imply concurrency~\cite[\S~3]{Buhr05a}.
    1536 Coroutining self-schedule the thread across stacks so execution is deterministic.
     1382A single thread with multiple call stacks, \newterm{coroutining}~\cite{Conway63,Marlin80}, does \emph{not} imply concurrency~\cite[\S~2]{Buhr05a}.
     1383In coroutining, coroutines self-schedule the thread across stacks so execution is deterministic.
    15371384(It is \emph{impossible} to generate a concurrency error when coroutining.)
    1538 
    1539 The transition to concurrency, even for a single thread with multiple stacks, occurs when coroutines context switch to a \newterm{scheduling coroutine}, introducing non-determinism from the coroutine perspective~\cite[\S~3]{Buhr05a}.
     1385However, coroutines are a stepping stone towards concurrency.
     1386
     1387The transition to concurrency, even for a single thread with multiple stacks, occurs when coroutines context switch to a \newterm{scheduling coroutine}, introducing non-determinism from the coroutine perspective~\cite[\S~3,]{Buhr05a}.
    15401388Therefore, a minimal concurrency system requires coroutines \emph{in conjunction with a nondeterministic scheduler}.
    1541 The resulting execution system now follows a cooperative threading-model~\cite{Adya02,libdill} because context-switching points to the scheduler (blocking) are known, but the next unblocking point is unknown due to the scheduler.
    1542 Adding \newterm{preemption} introduces \newterm{non-cooperative} or \newterm{preemptive} scheduling, where context switching points to the scheduler are unknown as they can occur randomly between any two instructions often based on a timer interrupt.
     1389The resulting execution system now follows a cooperative threading model~\cite{Adya02,libdill}, called \newterm{non-preemptive scheduling}.
     1390Adding \newterm{preemption} introduces non-cooperative scheduling, where context switching occurs randomly between any two instructions often based on a timer interrupt, called \newterm{preemptive scheduling}.
     1391While a scheduler introduces uncertain execution among explicit context switches, preemption introduces uncertainty by introducing implicit context switches.
    15431392Uncertainty gives the illusion of parallelism on a single processor and provides a mechanism to access and increase performance on multiple processors.
    15441393The reason is that the scheduler/runtime have complete knowledge about resources and how to best utilized them.
    1545 However, the introduction of unrestricted nondeterminism results in the need for \newterm{mutual exclusion} and \newterm{synchronization}~\cite[\S~4]{Buhr05a}, which restrict nondeterminism for correctness;
     1394However, the introduction of unrestricted nondeterminism results in the need for \newterm{mutual exclusion} and \newterm{synchronization}, which restrict nondeterminism for correctness;
    15461395otherwise, it is impossible to write meaningful concurrent programs.
    15471396Optimal concurrent performance is often obtained by having as much nondeterminism as mutual exclusion and synchronization correctness allow.
    15481397
    1549 A scheduler can also be stackless or stackful.
     1398A scheduler can either be a stackless or stackful.
    15501399For stackless, the scheduler performs scheduling on the stack of the current coroutine and switches directly to the next coroutine, so there is one context switch.
    15511400For stackful, the current coroutine switches to the scheduler, which performs scheduling, and it then switches to the next coroutine, so there are two context switches.
     
    15561405\label{s:threads}
    15571406
    1558 Threading (Table~\ref{t:ExecutionPropertyComposition} case 11) needs the ability to start a thread and wait for its completion.
     1407Threading needs the ability to start a thread and wait for its completion.
    15591408A common API for this ability is @fork@ and @join@.
    1560 \vspace{4pt}
    1561 \par\noindent
    1562 \begin{tabular}{@{}l|l|l@{}}
    1563 \multicolumn{1}{c|}{\textbf{Java}} & \multicolumn{1}{c|}{\textbf{\Celeven}} & \multicolumn{1}{c}{\textbf{pthreads}} \\
    1564 \hline
    1565 \begin{cfa}
    1566 class MyThread extends Thread {...}
    1567 mythread t = new MyThread(...);
     1409\begin{cquote}
     1410\begin{tabular}{@{}lll@{}}
     1411\multicolumn{1}{c}{\textbf{Java}} & \multicolumn{1}{c}{\textbf{\Celeven}} & \multicolumn{1}{c}{\textbf{pthreads}} \\
     1412\begin{cfa}
     1413class MyTask extends Thread {...}
     1414mytask t = new MyTask(...);
    15681415`t.start();` // start
    15691416// concurrency
     
    15721419&
    15731420\begin{cfa}
    1574 class MyThread { ... } // functor
    1575 MyThread mythread;
    1576 `thread t( mythread, ... );` // start
     1421class MyTask { ... } // functor
     1422MyTask mytask;
     1423`thread t( mytask, ... );` // start
    15771424// concurrency
    15781425`t.join();` // wait
     
    15871434\end{cfa}
    15881435\end{tabular}
    1589 \vspace{1pt}
    1590 \par\noindent
     1436\end{cquote}
    15911437\CFA has a simpler approach using a custom @thread@ type and leveraging declaration semantics (allocation/deallocation), where threads implicitly @fork@ after construction and @join@ before destruction.
    15921438\begin{cfa}
    1593 thread MyThread {};
    1594 void main( MyThread & this ) { ... }
     1439thread MyTask {};
     1440void main( MyTask & this ) { ... }
    15951441int main() {
    1596         MyThread team`[10]`; $\C[2.5in]{// allocate stack-based threads, implicit start after construction}$
     1442        MyTask team`[10]`; $\C[2.5in]{// allocate stack-based threads, implicit start after construction}$
    15971443        // concurrency
    15981444} $\C{// deallocate stack-based threads, implicit joins before destruction}$
     
    16021448Arbitrary topologies are possible using dynamic allocation, allowing threads to outlive their declaration scope, identical to normal dynamic allocation.
    16031449\begin{cfa}
    1604 MyThread * factory( int N ) { ... return `anew( N )`; } $\C{// allocate heap-based threads, implicit start after construction}$
     1450MyTask * factory( int N ) { ... return `anew( N )`; } $\C{// allocate heap-based threads, implicit start after construction}$
    16051451int main() {
    1606         MyThread * team = factory( 10 );
     1452        MyTask * team = factory( 10 );
    16071453        // concurrency
    16081454        `delete( team );` $\C{// deallocate heap-based threads, implicit joins before destruction}\CRT$
     
    16501496
    16511497Threads in \CFA are user level run by runtime kernel threads (see Section~\ref{s:CFARuntimeStructure}), where user threads provide concurrency and kernel threads provide parallelism.
    1652 Like coroutines, and for the same design reasons, \CFA provides a custom @thread@ type and a @trait@ to enforce and restrict the thread-interface functions.
     1498Like coroutines, and for the same design reasons, \CFA provides a custom @thread@ type and a @trait@ to enforce and restrict the task-interface functions.
    16531499\begin{cquote}
    16541500\begin{tabular}{@{}c@{\hspace{3\parindentlnth}}c@{}}
     
    16811527\label{s:MutualExclusionSynchronization}
    16821528
    1683 Unrestricted nondeterminism is meaningless as there is no way to know when a result is completed and safe to access.
     1529Unrestricted nondeterminism is meaningless as there is no way to know when the result is completed without synchronization.
    16841530To produce meaningful execution requires clawing back some determinism using mutual exclusion and synchronization, where mutual exclusion provides access control for threads using shared data, and synchronization is a timing relationship among threads~\cite[\S~4]{Buhr05a}.
    1685 The shared data protected by mutual exlusion is called a \newterm{critical section}~\cite{Dijkstra65}, and the protection can be simple (only 1 thread) or complex (only N kinds of threads, \eg group~\cite{Joung00} or readers/writer~\cite{Courtois71}).
    1686 Without synchronization control in a critical section, an arriving thread can barge ahead of preexisting waiter threads resulting in short/long-term starvation, staleness/freshness problems, and/or incorrect transfer of data.
    1687 Preventing or detecting barging is a challenge with low-level locks, but made easier through higher-level constructs.
    1688 This challenge is often split into two different approaches: barging \emph{avoidance} and \emph{prevention}.
    1689 Approaches that unconditionally releasing a lock for competing threads to acquire must use barging avoidance with flag/counter variable(s) to force barging threads to wait;
    1690 approaches that conditionally hold locks during synchronization, \eg baton-passing~\cite{Andrews89}, prevent barging completely.
    1691 
    1692 At the lowest level, concurrent control is provided by atomic operations, upon which different kinds of locking mechanisms are constructed, \eg spin locks, semaphores~\cite{Dijkstra68b}, barriers, and path expressions~\cite{Campbell74}.
    1693 However, for productivity it is always desirable to use the highest-level construct that provides the necessary efficiency~\cite{Hochstein05}.
    1694 A significant challenge with locks is composability because it takes careful organization for multiple locks to be used while preventing deadlock.
    1695 Easing composability is another feature higher-level mutual-exclusion mechanisms can offer.
    1696 Some concurrent systems eliminate mutable shared-state by switching to non-shared communication like message passing~\cite{Thoth,Harmony,V-Kernel,MPI} (Erlang, MPI), channels~\cite{CSP} (CSP,Go), actors~\cite{Akka} (Akka, Scala), or functional techniques (Haskell).
     1531Some concurrent systems eliminate mutable shared-state by switching to stateless communication like message passing~\cite{Thoth,Harmony,V-Kernel,MPI} (Erlang, MPI), channels~\cite{CSP} (CSP,Go), actors~\cite{Akka} (Akka, Scala), or functional techniques (Haskell).
    16971532However, these approaches introduce a new communication mechanism for concurrency different from the standard communication using function call/return.
    16981533Hence, a programmer must learn and manipulate two sets of design/programming patterns.
    16991534While this distinction can be hidden away in library code, effective use of the library still has to take both paradigms into account.
    1700 In contrast, approaches based on shared-state models more closely resemble the standard call/return programming model, resulting in a single programming paradigm.
    1701 Finally, a newer approach for restricting non-determinism is transactional memory~\cite{Herlihy93}.
    1702 While this approach is pursued in hardware~\cite{Nakaike15} and system languages, like \CC~\cite{Cpp-Transactions}, the performance and feature set is still too restrictive~\cite{Cascaval08,Boehm09} to be the main concurrency paradigm for system languages.
     1535In contrast, approaches based on stateful models more closely resemble the standard call/return programming model, resulting in a single programming paradigm.
     1536
     1537At the lowest level, concurrent control is implemented by atomic operations, upon which different kinds of locking mechanisms are constructed, \eg semaphores~\cite{Dijkstra68b}, barriers, and path expressions~\cite{Campbell74}.
     1538However, for productivity it is always desirable to use the highest-level construct that provides the necessary efficiency~\cite{Hochstein05}.
     1539A newer approach for restricting non-determinism is transactional memory~\cite{Herlihy93}.
     1540While this approach is pursued in hardware~\cite{Nakaike15} and system languages, like \CC~\cite{Cpp-Transactions}, the performance and feature set is still too restrictive to be the main concurrency paradigm for system languages, which is why it is rejected as the core paradigm for concurrency in \CFA.
     1541
     1542One of the most natural, elegant, and efficient mechanisms for mutual exclusion and synchronization for shared-memory systems is the \emph{monitor}.
     1543First proposed by Brinch Hansen~\cite{Hansen73} and later described and extended by C.A.R.~Hoare~\cite{Hoare74}, many concurrent programming languages provide monitors as an explicit language construct: \eg Concurrent Pascal~\cite{ConcurrentPascal}, Mesa~\cite{Mesa}, Modula~\cite{Modula-2}, Turing~\cite{Turing:old}, Modula-3~\cite{Modula-3}, NeWS~\cite{NeWS}, Emerald~\cite{Emerald}, \uC~\cite{Buhr92a} and Java~\cite{Java}.
     1544In addition, operating-system kernels and device drivers have a monitor-like structure, although they often use lower-level primitives such as mutex locks or semaphores to simulate monitors.
     1545For these reasons, \CFA selected monitors as the core high-level concurrency construct, upon which higher-level approaches can be easily constructed.
     1546
     1547
     1548\subsection{Mutual Exclusion}
     1549
     1550A group of instructions manipulating a specific instance of shared data that must be performed atomically is called a \newterm{critical section}~\cite{Dijkstra65}, which is enforced by \newterm{simple mutual-exclusion}.
     1551The generalization is called a \newterm{group critical-section}~\cite{Joung00}, where multiple tasks with the same session use the resource simultaneously and different sessions are segregated, which is enforced by \newterm{complex mutual-exclusion} providing the correct kind and number of threads using a group critical-section.
     1552The readers/writer problem~\cite{Courtois71} is an instance of a group critical-section, where readers share a session but writers have a unique session.
     1553
     1554However, many solutions exist for mutual exclusion, which vary in terms of performance, flexibility and ease of use.
     1555Methods range from low-level locks, which are fast and flexible but require significant attention for correctness, to higher-level concurrency techniques, which sacrifice some performance to improve ease of use.
     1556Ease of use comes by either guaranteeing some problems cannot occur, \eg deadlock free, or by offering a more explicit coupling between shared data and critical section.
     1557For example, the \CC @std::atomic<T>@ offers an easy way to express mutual-exclusion on a restricted set of operations, \eg reading/writing, for numerical types.
     1558However, a significant challenge with locks is composability because it takes careful organization for multiple locks to be used while preventing deadlock.
     1559Easing composability is another feature higher-level mutual-exclusion mechanisms can offer.
     1560
     1561
     1562\subsection{Synchronization}
     1563
     1564Synchronization enforces relative ordering of execution, and synchronization tools provide numerous mechanisms to establish these timing relationships.
     1565Low-level synchronization primitives offer good performance and flexibility at the cost of ease of use;
     1566higher-level mechanisms often simplify usage by adding better coupling between synchronization and data, \eg receive-specific versus receive-any thread in message passing or offering specialized solutions, \eg barrier lock.
     1567Often synchronization is used to order access to a critical section, \eg ensuring a waiting writer thread enters the critical section before a calling reader thread.
     1568If the calling reader is scheduled before the waiting writer, the reader has barged.
     1569Barging can result in staleness/freshness problems, where a reader barges ahead of a writer and reads temporally stale data, or a writer barges ahead of another writer overwriting data with a fresh value preventing the previous value from ever being read (lost computation).
     1570Preventing or detecting barging is an involved challenge with low-level locks, which is made easier through higher-level constructs.
     1571This challenge is often split into two different approaches: barging avoidance and prevention.
     1572Algorithms that unconditionally releasing a lock for competing threads to acquire use barging avoidance during synchronization to force a barging thread to wait;
     1573algorithms that conditionally hold locks during synchronization, \eg baton-passing~\cite{Andrews89}, prevent barging completely.
    17031574
    17041575
     
    17061577\label{s:Monitor}
    17071578
    1708 One of the most natural, elegant, efficient, high-level mechanisms for mutual exclusion and synchronization for shared-memory systems is the \emph{monitor} (Table~\ref{t:ExecutionPropertyComposition} case 2).
    1709 First proposed by Brinch Hansen~\cite{Hansen73} and later described and extended by C.A.R.~Hoare~\cite{Hoare74}, many concurrent programming languages provide monitors as an explicit language construct: \eg Concurrent Pascal~\cite{ConcurrentPascal}, Mesa~\cite{Mesa}, Modula~\cite{Modula-2}, Turing~\cite{Turing:old}, Modula-3~\cite{Modula-3}, NeWS~\cite{NeWS}, Emerald~\cite{Emerald}, \uC~\cite{Buhr92a} and Java~\cite{Java}.
    1710 In addition, operating-system kernels and device drivers have a monitor-like structure, although they often use lower-level primitives such as mutex locks or semaphores to manually implement a monitor.
    1711 For these reasons, \CFA selected monitors as the core high-level concurrency construct, upon which higher-level approaches can be easily constructed.
    1712 
    1713 Specifically, a \textbf{monitor} is a set of functions that ensure mutual exclusion when accessing shared state.
    1714 More precisely, a monitor is a programming technique that implicitly binds mutual exclusion to static function scope by call/return, as opposed to locks, where mutual-exclusion is defined by acquire/release calls, independent of lexical context (analogous to block and heap storage allocation).
     1579A \textbf{monitor} is a set of functions that ensure mutual exclusion when accessing shared state.
     1580More precisely, a monitor is a programming technique that implicitly binds mutual exclusion to static function scope, as opposed to locks, where mutual-exclusion is defined by acquire/release calls, independent of lexical context (analogous to block and heap storage allocation).
    17151581Restricting acquire/release points eases programming, comprehension, and maintenance, at a slight cost in flexibility and efficiency.
    17161582\CFA uses a custom @monitor@ type and leverages declaration semantics (deallocation) to protect active or waiting threads in a monitor.
    17171583
    17181584The following is a \CFA monitor implementation of an atomic counter.
    1719 \begin{cfa}
     1585\begin{cfa}[morekeywords=nomutex]
    17201586`monitor` Aint { int cnt; }; $\C[4.25in]{// atomic integer counter}$
    1721 int ++?( Aint & `mutex` this ) with( this ) { return ++cnt; } $\C{// increment}$
    1722 int ?=?( Aint & `mutex` lhs, int rhs ) with( lhs ) { cnt = rhs; } $\C{// conversions with int, mutex optional}\CRT$
    1723 int ?=?( int & lhs, Aint & `mutex` rhs ) with( rhs ) { lhs = cnt; }
    1724 \end{cfa}
    1725 The operators use the parameter-only declaration type-qualifier @mutex@ to mark which parameters require locking during function execution to protect from race conditions.
    1726 The assignment operators provide bidirectional conversion between an atomic and normal integer without accessing field @cnt@.
    1727 (These operations only need @mutex@, if reading/writing the implementation type is not atomic.)
    1728 The atomic counter is used without any explicit mutual-exclusion and provides thread-safe semantics.
     1587int ++?( Aint & `mutex`$\(_{opt}\)$ this ) with( this ) { return ++cnt; } $\C{// increment}$
     1588int ?=?( Aint & `mutex`$\(_{opt}\)$ lhs, int rhs ) with( lhs ) { cnt = rhs; } $\C{// conversions with int}\CRT$
     1589int ?=?( int & lhs, Aint & `mutex`$\(_{opt}\)$ rhs ) with( rhs ) { lhs = cnt; }
     1590\end{cfa}
     1591% The @Aint@ constructor, @?{}@, uses the \lstinline[morekeywords=nomutex]@nomutex@ qualifier indicating mutual exclusion is unnecessary during construction because an object is inaccessible (private) until after it is initialized.
     1592% (While a constructor may publish its address into a global variable, doing so generates a race-condition.)
     1593The prefix increment operation, @++?@, is normally @mutex@, indicating mutual exclusion is necessary during function execution, to protect the incrementing from race conditions, unless there is an atomic increment instruction for the implementation type.
     1594The assignment operators provide bidirectional conversion between an atomic and normal integer without accessing field @cnt@;
     1595these operations only need @mutex@, if reading/writing the implementation type is not atomic.
     1596The atomic counter is used without any explicit mutual-exclusion and provides thread-safe semantics, which is similar to the \CC template @std::atomic@.
    17291597\begin{cfa}
    17301598int i = 0, j = 0, k = 5;
     
    17341602i = x; j = y; k = z;
    17351603\end{cfa}
    1736 Note, like other concurrent programming languages, \CFA has specializations for the basic types using atomic instructions for performance and a general trait similar to the \CC template @std::atomic@.
    17371604
    17381605\CFA monitors have \newterm{multi-acquire} semantics so the thread in the monitor may acquire it multiple times without deadlock, allowing recursion and calling other interface functions.
    1739 \newpage
    17401606\begin{cfa}
    17411607monitor M { ... } m;
     
    17461612\end{cfa}
    17471613\CFA monitors also ensure the monitor lock is released regardless of how an acquiring function ends (normal or exceptional), and returning a shared variable is safe via copying before the lock is released.
    1748 Similar safety is offered by \emph{explicit} opt-in disciplines like \CC RAII versus the monitor \emph{implicit} language-enforced safety guarantee ensuring no programmer usage errors.
     1614Similar safety is offered by \emph{explicit} mechanisms like \CC RAII;
     1615monitor \emph{implicit} safety ensures no programmer usage errors.
    17491616Furthermore, RAII mechanisms cannot handle complex synchronization within a monitor, where the monitor lock may not be released on function exit because it is passed to an unblocking thread;
    17501617RAII is purely a mutual-exclusion mechanism (see Section~\ref{s:Scheduling}).
     
    17721639\end{cquote}
    17731640The @dtype@ property prevents \emph{implicit} copy operations and the @is_monitor@ trait provides no \emph{explicit} copy operations, so monitors must be passed by reference (pointer).
     1641% Copying a lock is insecure because it is possible to copy an open lock and then use the open copy when the original lock is closed to simultaneously access the shared data.
     1642% Copying a monitor is secure because both the lock and shared data are copies, but copying the shared data is meaningless because it no longer represents a unique entity.
    17741643Similarly, the function definitions ensures there is a mechanism to get (read) the monitor descriptor from its handle, and a special destructor to prevent deallocation if a thread using the shared data.
    17751644The custom monitor type also inserts any locks needed to implement the mutual exclusion semantics.
     
    17831652For example, a monitor may be passed through multiple helper functions before it is necessary to acquire the monitor's mutual exclusion.
    17841653
    1785 \CFA requires programmers to identify the kind of parameter with the @mutex@ keyword and uses no keyword to mean \lstinline[morekeywords=nomutex]@nomutex@, because @mutex@ parameters are rare and no keyword is the \emph{normal} parameter semantics.
    1786 Hence, @mutex@ parameters are documentation, at the function and its prototype, to both programmer and compiler, without other redundant keywords.
    1787 Furthermore, \CFA relies heavily on traits as an abstraction mechanism, so the @mutex@ qualifier prevents coincidentally matching of a monitor trait with a type that is not a monitor, similar to coincidental inheritance where a shape and playing card can both be drawable.
     1654The benefit of mandatory monitor qualifiers is self-documentation, but requiring both @mutex@ and \lstinline[morekeywords=nomutex]@nomutex@ for all monitor parameters is redundant.
     1655Instead, the semantics has one qualifier as the default and the other required.
     1656For example, make the safe @mutex@ qualifier the default because assuming \lstinline[morekeywords=nomutex]@nomutex@ may cause subtle errors.
     1657Alternatively, make the unsafe \lstinline[morekeywords=nomutex]@nomutex@ qualifier the default because it is the \emph{normal} parameter semantics while @mutex@ parameters are rare.
     1658Providing a default qualifier implies knowing whether a parameter is a monitor.
     1659Since \CFA relies heavily on traits as an abstraction mechanism, types can coincidentally match the monitor trait but not be a monitor, similar to inheritance where a shape and playing card can both be drawable.
     1660For this reason, \CFA requires programmers to identify the kind of parameter with the @mutex@ keyword and uses no keyword to mean \lstinline[morekeywords=nomutex]@nomutex@.
    17881661
    17891662The next semantic decision is establishing which parameter \emph{types} may be qualified with @mutex@.
     
    17991672Function @f3@ has a multiple object matrix, and @f4@ a multiple object data structure.
    18001673While shown shortly, multiple object acquisition is possible, but the number of objects must be statically known.
    1801 Therefore, \CFA only acquires one monitor per parameter with exactly one level of indirection, and exclude pointer types to unknown sized arrays.
     1674Therefore, \CFA only acquires one monitor per parameter with at most one level of indirection, excluding pointers as it is impossible to statically determine the size.
    18021675
    18031676For object-oriented monitors, \eg Java, calling a mutex member \emph{implicitly} acquires mutual exclusion of the receiver object, @`rec`.foo(...)@.
     
    18061679While object-oriented monitors can be extended with a mutex qualifier for multiple-monitor members, no prior example of this feature could be found.}
    18071680called \newterm{bulk acquire}.
    1808 \CFA guarantees bulk acquisition order is consistent across calls to @mutex@ functions using the same monitors as arguments, so acquiring multiple monitors in a bulk acquire is safe from deadlock.
     1681\CFA guarantees acquisition order is consistent across calls to @mutex@ functions using the same monitors as arguments, so acquiring multiple monitors is safe from deadlock.
    18091682Figure~\ref{f:BankTransfer} shows a trivial solution to the bank transfer problem~\cite{BankTransfer}, where two resources must be locked simultaneously, using \CFA monitors with implicit locking and \CC with explicit locking.
    18101683A \CFA programmer only has to manage when to acquire mutual exclusion;
     
    18261699void transfer( BankAccount & `mutex` my,
    18271700        BankAccount & `mutex` your, int me2you ) {
    1828         // bulk acquire
     1701
    18291702        deposit( my, -me2you ); // debit
    18301703        deposit( your, me2you ); // credit
     
    18561729void transfer( BankAccount & my,
    18571730                        BankAccount & your, int me2you ) {
    1858         `scoped_lock lock( my.m, your.m );` // bulk acquire
     1731        `scoped_lock lock( my.m, your.m );`
    18591732        deposit( my, -me2you ); // debit
    18601733        deposit( your, me2you ); // credit
     
    18841757\end{figure}
    18851758
    1886 Users can still force the acquiring order by using or not using @mutex@.
     1759Users can still force the acquiring order by using @mutex@/\lstinline[morekeywords=nomutex]@nomutex@.
    18871760\begin{cfa}
    18881761void foo( M & mutex m1, M & mutex m2 ); $\C{// acquire m1 and m2}$
    1889 void bar( M & mutex m1, M & m2 ) { $\C{// only acquire m1}$
     1762void bar( M & mutex m1, M & /* nomutex */ m2 ) { $\C{// acquire m1}$
    18901763        ... foo( m1, m2 ); ... $\C{// acquire m2}$
    18911764}
    1892 void baz( M & m1, M & mutex m2 ) { $\C{// only acquire m2}$
     1765void baz( M & /* nomutex */ m1, M & mutex m2 ) { $\C{// acquire m2}$
    18931766        ... foo( m1, m2 ); ... $\C{// acquire m1}$
    18941767}
     
    19331806% There are many aspects of scheduling in a concurrency system, all related to resource utilization by waiting threads, \ie which thread gets the resource next.
    19341807% Different forms of scheduling include access to processors by threads (see Section~\ref{s:RuntimeStructureCluster}), another is access to a shared resource by a lock or monitor.
    1935 This section discusses scheduling for waiting threads eligible for monitor entry, \ie which user thread gets the shared resource next. (See Section~\ref{s:RuntimeStructureCluster} for scheduling kernel threads on virtual processors.)
    1936 While monitor mutual-exclusion provides safe access to its shared data, the data may indicate a thread cannot proceed, \eg a bounded buffer may be full/\-empty so produce/consumer threads must block.
    1937 Leaving the monitor and retrying (busy waiting) is impractical for high-level programming.
    1938 
    1939 Monitors eliminate busy waiting by providing synchronization within the monitor critical-section to schedule threads needing access to the shared data, where threads block versus spin.
     1808This section discusses monitor scheduling for waiting threads eligible for entry, \ie which thread gets the shared resource next. (See Section~\ref{s:RuntimeStructureCluster} for scheduling threads on virtual processors.)
     1809While monitor mutual-exclusion provides safe access to shared data, the monitor data may indicate that a thread accessing it cannot proceed, \eg a bounded buffer may be full/empty so produce/consumer threads must block.
     1810Leaving the monitor and trying again (busy waiting) is impractical for high-level programming.
     1811Monitors eliminate busy waiting by providing synchronization to schedule threads needing access to the shared data, where threads block versus spinning.
    19401812Synchronization is generally achieved with internal~\cite{Hoare74} or external~\cite[\S~2.9.2]{uC++} scheduling.
    1941 \newterm{Internal} (largely) schedules threads located \emph{inside} the monitor and is accomplished using condition variables with signal and wait.
    1942 \newterm{External} (largely) schedules threads located \emph{outside} the monitor and is accomplished with the @waitfor@ statement.
    1943 Note, internal scheduling has a small amount of external scheduling and vice versus, so the naming denotes where the majority of the block threads reside (inside or outside) for scheduling.
    1944 For complex scheduling, the approaches can be combined, so there can be an equal number of threads waiting inside and outside.
    1945 
    1946 \CFA monitors do not allow calling threads to barge ahead of signalled threads (via barging prevention), which simplifies synchronization among threads in the monitor and increases correctness.
    1947 A direct consequence of this semantics is that unblocked waiting threads are not required to recheck the waiting condition, \ie waits are not in a starvation-prone busy-loop as required by the signals-as-hints style with barging.
    1948 Preventing barging comes directly from Hoare's semantics in the seminal paper on monitors~\cite[p.~550]{Hoare74}.
     1813\newterm{Internal scheduling} is characterized by each thread entering the monitor and making an individual decision about proceeding or blocking, while \newterm{external scheduling} is characterized by an entering thread making a decision about proceeding for itself and on behalf of other threads attempting entry.
     1814Finally, \CFA monitors do not allow calling threads to barge ahead of signalled threads, which simplifies synchronization among threads in the monitor and increases correctness.
     1815If barging is allowed, synchronization between a signaller and signallee is difficult, often requiring additional flags and multiple unblock/block cycles.
     1816In fact, signals-as-hints is completely opposite from that proposed by Hoare in the seminal paper on monitors~\cite[p.~550]{Hoare74}.
    19491817% \begin{cquote}
    19501818% However, we decree that a signal operation be followed immediately by resumption of a waiting program, without possibility of an intervening procedure call from yet a third program.
    19511819% It is only in this way that a waiting program has an absolute guarantee that it can acquire the resource just released by the signalling program without any danger that a third program will interpose a monitor entry and seize the resource instead.~\cite[p.~550]{Hoare74}
    19521820% \end{cquote}
    1953 Furthermore, \CFA concurrency has no spurious wakeup~\cite[\S~9]{Buhr05a}, which eliminates an implicit self barging.
    1954 
    1955 Monitor mutual-exclusion means signalling cannot have the signaller and signalled thread in the monitor simultaneously, so only the signaller or signallee can proceed.
    1956 Figure~\ref{f:MonitorScheduling} shows internal/external scheduling for the bounded-buffer examples in Figure~\ref{f:GenericBoundedBuffer}.
    1957 For internal scheduling in Figure~\ref{f:BBInt}, the @signal@ moves the signallee (front thread of the specified condition queue) to urgent and the signaller continues (solid line).
    1958 Multiple signals move multiple signallees to urgent until the condition queue is empty.
    1959 When the signaller exits or waits, a thread is implicitly unblocked from urgent (if available) before unblocking a calling thread to prevent barging.
     1821Furthermore, \CFA concurrency has no spurious wakeup~\cite[\S~9]{Buhr05a}, which eliminates an implicit form of self barging.
     1822Hence, a \CFA @wait@ statement is not enclosed in a @while@ loop retesting a blocking predicate, which can cause thread starvation due to barging.
     1823
     1824Figure~\ref{f:MonitorScheduling} shows general internal/external scheduling (for the bounded-buffer example in Figure~\ref{f:InternalExternalScheduling}).
     1825External calling threads block on the calling queue, if the monitor is occupied, otherwise they enter in FIFO order.
     1826Internal threads block on condition queues via @wait@ and reenter from the condition in FIFO order.
     1827Alternatively, internal threads block on urgent from the @signal_block@ or @waitfor@, and reenter implicitly when the monitor becomes empty, \ie, the thread in the monitor exits or waits.
     1828
     1829There are three signalling mechanisms to unblock waiting threads to enter the monitor.
     1830Note, signalling cannot have the signaller and signalled thread in the monitor simultaneously because of the mutual exclusion, so either the signaller or signallee can proceed.
     1831For internal scheduling, threads are unblocked from condition queues using @signal@, where the signallee is moved to urgent and the signaller continues (solid line).
     1832Multiple signals move multiple signallees to urgent until the condition is empty.
     1833When the signaller exits or waits, a thread blocked on urgent is processed before calling threads to prevent barging.
    19601834(Java conceptually moves the signalled thread to the calling queue, and hence, allows barging.)
    1961 Signal is used when the signaller is providing the cooperation needed by the signallee (\eg creating an empty slot in a buffer for a producer) and the signaller immediately exits the monitor to run concurrently (consume the buffer element) and passes control of the monitor to the signalled thread, which can immediately take advantage of the state change.
    1962 Specifically, the @wait@ function atomically blocks the calling thread and implicitly releases the monitor lock(s) for all monitors in the function's parameter list.
    1963 Signalling is unconditional because signalling an empty condition queue does nothing.
    1964 It is common to declare condition queues as monitor fields to prevent shared access, hence no locking is required for access as the queues are protected by the monitor lock.
    1965 In \CFA, a condition queue can be created/stored independently.
     1835The alternative unblock is in the opposite order using @signal_block@, where the signaller is moved to urgent and the signallee continues (dashed line), and is implicitly unblocked from urgent when the signallee exits or waits.
     1836
     1837For external scheduling, the condition queues are not used;
     1838instead threads are unblocked directly from the calling queue using @waitfor@ based on function names requesting mutual exclusion.
     1839(The linear search through the calling queue to locate a particular call can be reduced to $O(1)$.)
     1840The @waitfor@ has the same semantics as @signal_block@, where the signalled thread executes before the signallee, which waits on urgent.
     1841Executing multiple @waitfor@s from different signalled functions causes the calling threads to move to urgent.
     1842External scheduling requires urgent to be a stack, because the signaller expects to execute immediately after the specified monitor call has exited or waited.
     1843Internal scheduling behaves the same for an urgent stack or queue, except for multiple signalling, where the threads unblock from urgent in reverse order from signalling.
     1844If the restart order is important, multiple signalling by a signal thread can be transformed into daisy-chain signalling among threads, where each thread signals the next thread.
     1845We tried both a stack for @waitfor@ and queue for signalling, but that resulted in complex semantics about which thread enters next.
     1846Hence, \CFA uses a single urgent stack to correctly handle @waitfor@ and adequately support both forms of signalling.
    19661847
    19671848\begin{figure}
     
    19811862\end{figure}
    19821863
     1864Figure~\ref{f:BBInt} shows a \CFA generic bounded-buffer with internal scheduling, where producers/consumers enter the monitor, detect the buffer is full/empty, and block on an appropriate condition variable, @full@/@empty@.
     1865The @wait@ function atomically blocks the calling thread and implicitly releases the monitor lock(s) for all monitors in the function's parameter list.
     1866The appropriate condition variable is signalled to unblock an opposite kind of thread after an element is inserted/removed from the buffer.
     1867Signalling is unconditional, because signalling an empty condition variable does nothing.
     1868It is common to declare condition variables as monitor fields to prevent shared access, hence no locking is required for access as the conditions are protected by the monitor lock.
     1869In \CFA, a condition variable can be created/stored independently.
     1870% To still prevent expensive locking on access, a condition variable is tied to a \emph{group} of monitors on first use, called \newterm{branding}, resulting in a low-cost boolean test to detect sharing from other monitors.
     1871
     1872% Signalling semantics cannot have the signaller and signalled thread in the monitor simultaneously, which means:
     1873% \begin{enumerate}
     1874% \item
     1875% The signalling thread returns immediately and the signalled thread continues.
     1876% \item
     1877% The signalling thread continues and the signalled thread is marked for urgent unblocking at the next scheduling point (exit/wait).
     1878% \item
     1879% The signalling thread blocks but is marked for urgent unblocking at the next scheduling point and the signalled thread continues.
     1880% \end{enumerate}
     1881% The first approach is too restrictive, as it precludes solving a reasonable class of problems, \eg dating service (see Figure~\ref{f:DatingService}).
     1882% \CFA supports the next two semantics as both are useful.
     1883
    19831884\begin{figure}
    19841885\centering
     
    19921893                T elements[10];
    19931894        };
    1994         void ?{}( Buffer(T) & buf ) with(buf) {
     1895        void ?{}( Buffer(T) & buffer ) with(buffer) {
    19951896                front = back = count = 0;
    19961897        }
    1997 
    1998         void insert(Buffer(T) & mutex buf, T elm) with(buf){
    1999                 if ( count == 10 ) `wait( empty )`; // full ?
    2000                 // insert elm into buf
     1898        void insert( Buffer(T) & mutex buffer, T elem )
     1899                                with(buffer) {
     1900                if ( count == 10 ) `wait( empty )`;
     1901                // insert elem into buffer
    20011902                `signal( full )`;
    20021903        }
    2003         T remove( Buffer(T) & mutex buf ) with(buf) {
    2004                 if ( count == 0 ) `wait( full )`; // empty ?
    2005                 // remove elm from buf
     1904        T remove( Buffer(T) & mutex buffer ) with(buffer) {
     1905                if ( count == 0 ) `wait( full )`;
     1906                // remove elem from buffer
    20061907                `signal( empty )`;
    2007                 return elm;
     1908                return elem;
    20081909        }
    20091910}
    20101911\end{cfa}
    20111912\end{lrbox}
     1913
     1914% \newbox\myboxB
     1915% \begin{lrbox}{\myboxB}
     1916% \begin{cfa}[aboveskip=0pt,belowskip=0pt]
     1917% forall( otype T ) { // distribute forall
     1918%       monitor Buffer {
     1919%
     1920%               int front, back, count;
     1921%               T elements[10];
     1922%       };
     1923%       void ?{}( Buffer(T) & buffer ) with(buffer) {
     1924%               [front, back, count] = 0;
     1925%       }
     1926%       T remove( Buffer(T) & mutex buffer ); // forward
     1927%       void insert( Buffer(T) & mutex buffer, T elem )
     1928%                               with(buffer) {
     1929%               if ( count == 10 ) `waitfor( remove, buffer )`;
     1930%               // insert elem into buffer
     1931%
     1932%       }
     1933%       T remove( Buffer(T) & mutex buffer ) with(buffer) {
     1934%               if ( count == 0 ) `waitfor( insert, buffer )`;
     1935%               // remove elem from buffer
     1936%
     1937%               return elem;
     1938%       }
     1939% }
     1940% \end{cfa}
     1941% \end{lrbox}
    20121942
    20131943\newbox\myboxB
    20141944\begin{lrbox}{\myboxB}
    20151945\begin{cfa}[aboveskip=0pt,belowskip=0pt]
    2016 forall( otype T ) { // distribute forall
    2017         monitor Buffer {
    2018 
    2019                 int front, back, count;
    2020                 T elements[10];
    2021         };
    2022         void ?{}( Buffer(T) & buf ) with(buf) {
    2023                 front = back = count = 0;
    2024         }
    2025         T remove( Buffer(T) & mutex buf ); // forward
    2026         void insert(Buffer(T) & mutex buf, T elm) with(buf){
    2027                 if ( count == 10 ) `waitfor( remove : buf )`;
    2028                 // insert elm into buf
    2029 
    2030         }
    2031         T remove( Buffer(T) & mutex buf ) with(buf) {
    2032                 if ( count == 0 ) `waitfor( insert : buf )`;
    2033                 // remove elm from buf
    2034 
    2035                 return elm;
    2036         }
    2037 }
    2038 \end{cfa}
    2039 \end{lrbox}
    2040 
    2041 \subfloat[Internal scheduling]{\label{f:BBInt}\usebox\myboxA}
    2042 \hspace{1pt}
    2043 \vrule
    2044 \hspace{3pt}
    2045 \subfloat[External scheduling]{\label{f:BBExt}\usebox\myboxB}
    2046 
    2047 \caption{Generic bounded buffer}
    2048 \label{f:GenericBoundedBuffer}
    2049 \end{figure}
    2050 
    2051 The @signal_block@ provides the opposite unblocking order, where the signaller is moved to urgent and the signallee continues and a thread is implicitly unblocked from urgent when the signallee exits or waits (dashed line).
    2052 Signal block is used when the signallee is providing the cooperation needed by the signaller (\eg if the buffer is removed and a producer hands off an item to a consumer, as in Figure~\ref{f:DatingSignalBlock}) so the signaller must wait until the signallee unblocks, provides the cooperation, exits the monitor to run concurrently, and passes control of the monitor to the signaller, which can immediately take advantage of the state change.
    2053 Using @signal@ or @signal_block@ can be a dynamic decision based on whether the thread providing the cooperation arrives before or after the thread needing the cooperation.
    2054 
    2055 External scheduling in Figure~\ref{f:BBExt} simplifies internal scheduling by eliminating condition queues and @signal@/@wait@ (cases where it cannot are discussed shortly), and has existed in the programming language Ada for almost 40 years with variants in other languages~\cite{SR,ConcurrentC++,uC++}.
    2056 While prior languages use external scheduling solely for thread interaction, \CFA generalizes it to both monitors and threads.
    2057 External scheduling allows waiting for events from other threads while restricting unrelated events, that would otherwise have to wait on condition queues in the monitor.
    2058 Scheduling is controlled by the @waitfor@ statement, which atomically blocks the calling thread, releases the monitor lock, and restricts the function calls that can next acquire mutual exclusion.
    2059 Specifically, a thread calling the monitor is unblocked directly from the calling queue based on function names that can fulfill the cooperation required by the signaller.
    2060 (The linear search through the calling queue to locate a particular call can be reduced to $O(1)$.)
    2061 Hence, the @waitfor@ has the same semantics as @signal_block@, where the signallee thread from the calling queue executes before the signaller, which waits on urgent.
    2062 Now when a producer/consumer detects a full/empty buffer, the necessary cooperation for continuation is specified by indicating the next function call that can occur.
    2063 For example, a producer detecting a full buffer must have cooperation from a consumer to remove an item so function @remove@ is accepted, which prevents producers from entering the monitor, and after a consumer calls @remove@, the producer waiting on urgent is \emph{implicitly} unblocked because it can now continue its insert operation.
    2064 Hence, this mechanism is done in terms of control flow, next call, versus in terms of data, channels, as in Go/Rust @select@.
    2065 While both mechanisms have strengths and weaknesses, \CFA uses the control-flow mechanism to be consistent with other language features.
    2066 
    2067 Figure~\ref{f:ReadersWriterLock} shows internal/external scheduling for a readers/writer lock with no barging and threads are serviced in FIFO order to eliminate staleness/freshness among the reader/writer threads.
    2068 For internal scheduling in Figure~\ref{f:RWInt}, the readers and writers wait on the same condition queue in FIFO order, making it impossible to tell if a waiting thread is a reader or writer.
    2069 To clawback the kind of thread, a \CFA condition can store user data in the node for a blocking thread at the @wait@, \ie whether the thread is a @READER@ or @WRITER@.
    2070 An unblocked reader thread checks if the thread at the front of the queue is a reader and unblock it, \ie the readers daisy-chain signal the next group of readers demarcated by the next writer or end of the queue.
    2071 For external scheduling in Figure~\ref{f:RWExt}, a waiting reader checks if a writer is using the resource, and if so, restricts further calls until the writer exits by calling @EndWrite@.
    2072 The writer does a similar action for each reader or writer using the resource.
    2073 Note, no new calls to @StartRead@/@StartWrite@ may occur when waiting for the call to @EndRead@/@EndWrite@.
    2074 
    2075 \begin{figure}
    2076 \centering
    2077 \newbox\myboxA
    2078 \begin{lrbox}{\myboxA}
    2079 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
    2080 enum RW { READER, WRITER };
    20811946monitor ReadersWriter {
    2082         int rcnt, wcnt; // readers/writer using resource
    2083         `condition RWers;`
     1947        int rcnt, wcnt; // readers/writer using resource
    20841948};
    20851949void ?{}( ReadersWriter & rw ) with(rw) {
     
    20881952void EndRead( ReadersWriter & mutex rw ) with(rw) {
    20891953        rcnt -= 1;
    2090         if ( rcnt == 0 ) `signal( RWers )`;
    20911954}
    20921955void EndWrite( ReadersWriter & mutex rw ) with(rw) {
    20931956        wcnt = 0;
    2094         `signal( RWers );`
    20951957}
    20961958void StartRead( ReadersWriter & mutex rw ) with(rw) {
    2097         if ( wcnt !=0 || ! empty( RWers ) )
    2098                 `wait( RWers, READER )`;
     1959        if ( wcnt > 0 ) `waitfor( EndWrite, rw );`
    20991960        rcnt += 1;
    2100         if ( ! empty(RWers) && `front(RWers) == READER` )
    2101                 `signal( RWers )`;  // daisy-chain signalling
    21021961}
    21031962void StartWrite( ReadersWriter & mutex rw ) with(rw) {
    2104         if ( wcnt != 0 || rcnt != 0 ) `wait( RWers, WRITER )`;
    2105 
     1963        if ( wcnt > 0 ) `waitfor( EndWrite, rw );`
     1964        else while ( rcnt > 0 ) `waitfor( EndRead, rw );`
    21061965        wcnt = 1;
    21071966}
     1967
    21081968\end{cfa}
    21091969\end{lrbox}
    21101970
    2111 \newbox\myboxB
    2112 \begin{lrbox}{\myboxB}
    2113 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
    2114 
    2115 monitor ReadersWriter {
    2116         int rcnt, wcnt; // readers/writer using resource
    2117 
    2118 };
    2119 void ?{}( ReadersWriter & rw ) with(rw) {
    2120         rcnt = wcnt = 0;
    2121 }
    2122 void EndRead( ReadersWriter & mutex rw ) with(rw) {
    2123         rcnt -= 1;
    2124 
    2125 }
    2126 void EndWrite( ReadersWriter & mutex rw ) with(rw) {
    2127         wcnt = 0;
    2128 
    2129 }
    2130 void StartRead( ReadersWriter & mutex rw ) with(rw) {
    2131         if ( wcnt > 0 ) `waitfor( EndWrite : rw );`
    2132 
    2133         rcnt += 1;
    2134 
    2135 
    2136 }
    2137 void StartWrite( ReadersWriter & mutex rw ) with(rw) {
    2138         if ( wcnt > 0 ) `waitfor( EndWrite : rw );`
    2139         else while ( rcnt > 0 ) `waitfor( EndRead : rw );`
    2140         wcnt = 1;
    2141 }
    2142 \end{cfa}
    2143 \end{lrbox}
    2144 
    2145 \subfloat[Internal scheduling]{\label{f:RWInt}\usebox\myboxA}
    2146 \hspace{1pt}
     1971\subfloat[Generic bounded buffer, internal scheduling]{\label{f:BBInt}\usebox\myboxA}
     1972\hspace{3pt}
    21471973\vrule
    21481974\hspace{3pt}
    2149 \subfloat[External scheduling]{\label{f:RWExt}\usebox\myboxB}
    2150 
    2151 \caption{Readers / writer lock}
    2152 \label{f:ReadersWriterLock}
     1975\subfloat[Readers / writer lock, external scheduling]{\label{f:RWExt}\usebox\myboxB}
     1976
     1977\caption{Internal / external scheduling}
     1978\label{f:InternalExternalScheduling}
    21531979\end{figure}
    21541980
    2155 Finally, external scheduling requires urgent to be a stack, because the signaller expects to execute immediately after the specified monitor call has exited or waited.
    2156 Internal schedulling performing multiple signalling results in unblocking from urgent in the reverse order from signalling.
    2157 It is rare for the unblocking order to be important as an unblocked thread can be time-sliced immediately after leaving the monitor.
    2158 If the unblocking order is important, multiple signalling can be restructured into daisy-chain signalling, where each thread signals the next thread.
    2159 Hence, \CFA uses a single urgent stack to correctly handle @waitfor@ and adequately support both forms of signalling.
    2160 (Advanced @waitfor@ features are discussed in Section~\ref{s:ExtendedWaitfor}.)
     1981Figure~\ref{f:BBInt} can be transformed into external scheduling by removing the condition variables and signals/waits, and adding the following lines at the locations of the current @wait@s in @insert@/@remove@, respectively.
     1982\begin{cfa}[aboveskip=2pt,belowskip=1pt]
     1983if ( count == 10 ) `waitfor( remove, buffer )`;       |      if ( count == 0 ) `waitfor( insert, buffer )`;
     1984\end{cfa}
     1985Here, the producers/consumers detects a full/\-empty buffer and prevents more producers/consumers from entering the monitor until there is a free/empty slot in the buffer.
     1986External scheduling is controlled by the @waitfor@ statement, which atomically blocks the calling thread, releases the monitor lock, and restricts the function calls that can next acquire mutual exclusion.
     1987If the buffer is full, only calls to @remove@ can acquire the buffer, and if the buffer is empty, only calls to @insert@ can acquire the buffer.
     1988Threads calling excluded functions block outside of (external to) the monitor on the calling queue, versus blocking on condition queues inside of (internal to) the monitor.
     1989Figure~\ref{f:RWExt} shows a readers/writer lock written using external scheduling, where a waiting reader detects a writer using the resource and restricts further calls until the writer exits by calling @EndWrite@.
     1990The writer does a similar action for each reader or writer using the resource.
     1991Note, no new calls to @StarRead@/@StartWrite@ may occur when waiting for the call to @EndRead@/@EndWrite@.
     1992External scheduling allows waiting for events from other threads while restricting unrelated events, that would otherwise have to wait on conditions in the monitor.
     1993The mechnaism can be done in terms of control flow, \eg Ada @accept@ or \uC @_Accept@, or in terms of data, \eg Go @select@ on channels.
     1994While both mechanisms have strengths and weaknesses, this project uses the control-flow mechanism to be consistent with other language features.
     1995% Two challenges specific to \CFA for external scheduling are loose object-definitions (see Section~\ref{s:LooseObjectDefinitions}) and multiple-monitor functions (see Section~\ref{s:Multi-MonitorScheduling}).
     1996
     1997Figure~\ref{f:DatingService} shows a dating service demonstrating non-blocking and blocking signalling.
     1998The dating service matches girl and boy threads with matching compatibility codes so they can exchange phone numbers.
     1999A thread blocks until an appropriate partner arrives.
     2000The complexity is exchanging phone numbers in the monitor because of the mutual-exclusion property.
     2001For signal scheduling, the @exchange@ condition is necessary to block the thread finding the match, while the matcher unblocks to take the opposite number, post its phone number, and unblock the partner.
     2002For signal-block scheduling, the implicit urgent-queue replaces the explict @exchange@-condition and @signal_block@ puts the finding thread on the urgent condition and unblocks the matcher.
     2003The dating service is an example of a monitor that cannot be written using external scheduling because it requires knowledge of calling parameters to make scheduling decisions, and parameters of waiting threads are unavailable;
     2004as well, an arriving thread may not find a partner and must wait, which requires a condition variable, and condition variables imply internal scheduling.
     2005Furthermore, barging corrupts the dating service during an exchange because a barger may also match and change the phone numbers, invalidating the previous exchange phone number.
     2006Putting loops around the @wait@s does not correct the problem;
     2007the simple solution must be restructured to account for barging.
    21612008
    21622009\begin{figure}
     
    21722019};
    21732020int girl( DS & mutex ds, int phNo, int ccode ) {
    2174         if ( empty( Boys[ccode] ) ) {
     2021        if ( is_empty( Boys[ccode] ) ) {
    21752022                wait( Girls[ccode] );
    21762023                GirlPhNo = phNo;
     
    21992046};
    22002047int girl( DS & mutex ds, int phNo, int ccode ) {
    2201         if ( empty( Boys[ccode] ) ) { // no compatible
     2048        if ( is_empty( Boys[ccode] ) ) { // no compatible
    22022049                wait( Girls[ccode] ); // wait for boy
    22032050                GirlPhNo = phNo; // make phone number available
     
    22192066\qquad
    22202067\subfloat[\lstinline@signal_block@]{\label{f:DatingSignalBlock}\usebox\myboxB}
    2221 \caption{Dating service Monitor}
    2222 \label{f:DatingServiceMonitor}
     2068\caption{Dating service}
     2069\label{f:DatingService}
    22232070\end{figure}
    22242071
    2225 Figure~\ref{f:DatingServiceMonitor} shows a dating service demonstrating non-blocking and blocking signalling.
    2226 The dating service matches girl and boy threads with matching compatibility codes so they can exchange phone numbers.
    2227 A thread blocks until an appropriate partner arrives.
    2228 The complexity is exchanging phone numbers in the monitor because of the mutual-exclusion property.
    2229 For signal scheduling, the @exchange@ condition is necessary to block the thread finding the match, while the matcher unblocks to take the opposite number, post its phone number, and unblock the partner.
    2230 For signal-block scheduling, the implicit urgent-queue replaces the explicit @exchange@-condition and @signal_block@ puts the finding thread on the urgent stack and unblocks the matcher.
    2231 
    2232 The dating service is an important example of a monitor that cannot be written using external scheduling.
    2233 First, because scheduling requires knowledge of calling parameters to make matching decisions, and parameters of calling threads are unavailable within the monitor.
    2234 For example, a girl thread within the monitor cannot examine the @ccode@ of boy threads waiting on the calling queue to determine if there is a matching partner.
    2235 Second, because a scheduling decision may be delayed when there is no immediate match, which requires a condition queue for waiting, and condition queues imply internal scheduling.
    2236 For example, if a girl thread could determine there is no calling boy with the same @ccode@, it must wait until a matching boy arrives.
    2237 Finally, barging corrupts the dating service during an exchange because a barger may also match and change the phone numbers, invalidating the previous exchange phone number.
    2238 This situation shows rechecking the waiting condition and waiting again (signals-as-hints) fails, requiring significant restructured to account for barging.
     2072In summation, for internal scheduling, non-blocking signalling (as in the producer/consumer example) is used when the signaller is providing the cooperation for a waiting thread;
     2073the signaller enters the monitor and changes state, detects a waiting threads that can use the state, performs a non-blocking signal on the condition queue for the waiting thread, and exits the monitor to run concurrently.
     2074The waiter unblocks next from the urgent queue, uses/takes the state, and exits the monitor.
     2075Blocking signal is the reverse, where the waiter is providing the cooperation for the signalling thread;
     2076the signaller enters the monitor, detects a waiting thread providing the necessary state, performs a blocking signal to place it on the urgent queue and unblock the waiter.
     2077The waiter changes state and exits the monitor, and the signaller unblocks next from the urgent queue to use/take the state.
    22392078
    22402079Both internal and external scheduling extend to multiple monitors in a natural way.
    22412080\begin{cquote}
    2242 \begin{tabular}{@{}l@{\hspace{2\parindentlnth}}l@{}}
     2081\begin{tabular}{@{}l@{\hspace{3\parindentlnth}}l@{}}
    22432082\begin{cfa}
    22442083monitor M { `condition e`; ... };
     
    22512090&
    22522091\begin{cfa}
    2253 void rtn$\(_1\)$( M & mutex m1, M & mutex m2 ); // overload rtn
     2092void rtn$\(_1\)$( M & mutex m1, M & mutex m2 );
    22542093void rtn$\(_2\)$( M & mutex m1 );
    22552094void bar( M & mutex m1, M & mutex m2 ) {
    2256         ... waitfor( `rtn`${\color{red}\(_1\)}$ ); ...       // $\LstCommentStyle{waitfor( rtn\(_1\) : m1, m2 )}$
    2257         ... waitfor( `rtn${\color{red}\(_2\)}$ : m1` ); ...
     2095        ... waitfor( `rtn` ); ...       // $\LstCommentStyle{waitfor( rtn\(_1\), m1, m2 )}$
     2096        ... waitfor( `rtn, m1` ); ... // $\LstCommentStyle{waitfor( rtn\(_2\), m1 )}$
    22582097}
    22592098\end{cfa}
     
    22622101For @wait( e )@, the default semantics is to atomically block the signaller and release all acquired mutex parameters, \ie @wait( e, m1, m2 )@.
    22632102To override the implicit multi-monitor wait, specific mutex parameter(s) can be specified, \eg @wait( e, m1 )@.
    2264 Wait cannot statically verify the released monitors are the acquired mutex-parameters without disallowing separately compiled helper functions calling @wait@.
    2265 While \CC supports bulk locking, @wait@ only accepts a single lock for a condition queue, so bulk locking with condition queues is asymmetric.
     2103Wait cannot statically verifies the released monitors are the acquired mutex-parameters without disallowing separately compiled helper functions calling @wait@.
     2104While \CC supports bulk locking, @wait@ only accepts a single lock for a condition variable, so bulk locking with condition variables is asymmetric.
    22662105Finally, a signaller,
    22672106\begin{cfa}
     
    22722111must have acquired at least the same locks as the waiting thread signalled from a condition queue to allow the locks to be passed, and hence, prevent barging.
    22732112
    2274 Similarly, for @waitfor( rtn )@, the default semantics is to atomically block the acceptor and release all acquired mutex parameters, \ie @waitfor( rtn : m1, m2 )@.
    2275 To override the implicit multi-monitor wait, specific mutex parameter(s) can be specified, \eg @waitfor( rtn : m1 )@.
     2113Similarly, for @waitfor( rtn )@, the default semantics is to atomically block the acceptor and release all acquired mutex parameters, \ie @waitfor( rtn, m1, m2 )@.
     2114To override the implicit multi-monitor wait, specific mutex parameter(s) can be specified, \eg @waitfor( rtn, m1 )@.
    22762115@waitfor@ does statically verify the monitor types passed are the same as the acquired mutex-parameters of the given function or function pointer, hence the function (pointer) prototype must be accessible.
    22772116% When an overloaded function appears in an @waitfor@ statement, calls to any function with that name are accepted.
     
    22812120void rtn( M & mutex m );
    22822121`int` rtn( M & mutex m );
    2283 waitfor( (`int` (*)( M & mutex ))rtn : m );
    2284 \end{cfa}
    2285 
    2286 The ability to release a subset of acquired monitors can result in a \newterm{nested monitor}~\cite{Lister77} deadlock (see Section~\ref{s:MutexAcquisition}).
    2287 \newpage
     2122waitfor( (`int` (*)( M & mutex ))rtn, m );
     2123\end{cfa}
     2124
     2125The ability to release a subset of acquired monitors can result in a \newterm{nested monitor}~\cite{Lister77} deadlock.
    22882126\begin{cfa}
    22892127void foo( M & mutex m1, M & mutex m2 ) {
    2290         ... wait( `e, m1` ); ...                                $\C{// release m1, keeping m2 acquired}$
    2291 void bar( M & mutex m1, M & mutex m2 ) {        $\C{// must acquire m1 and m2}$
     2128        ... wait( `e, m1` ); ...                                $\C{// release m1, keeping m2 acquired )}$
     2129void bar( M & mutex m1, M & mutex m2 ) {        $\C{// must acquire m1 and m2 )}$
    22922130        ... signal( `e` ); ...
    22932131\end{cfa}
    22942132The @wait@ only releases @m1@ so the signalling thread cannot acquire @m1@ and @m2@ to enter @bar@ and @signal@ the condition.
    2295 While deadlock can occur with multiple/nesting acquisition, this is a consequence of locks, and by extension monitor locking is not perfectly composable.
     2133While deadlock can occur with multiple/nesting acquisition, this is a consequence of locks, and by extension monitors, not being perfectly composable.
     2134
    22962135
    22972136
    22982137\subsection{\texorpdfstring{Extended \protect\lstinline@waitfor@}{Extended waitfor}}
    2299 \label{s:ExtendedWaitfor}
    23002138
    23012139Figure~\ref{f:ExtendedWaitfor} shows the extended form of the @waitfor@ statement to conditionally accept one of a group of mutex functions, with an optional statement to be performed \emph{after} the mutex function finishes.
     
    23082146Hence, the terminating @else@ clause allows a conditional attempt to accept a call without blocking.
    23092147If both @timeout@ and @else@ clause are present, the @else@ must be conditional, or the @timeout@ is never triggered.
    2310 There is also a traditional future wait queue (not shown) (\eg Microsoft @WaitForMultipleObjects@), to wait for a specified number of future elements in the queue.
    2311 Finally, there is a shorthand for specifying multiple functions using the same set of monitors: @waitfor( f, g, h : m1, m2, m3 )@.
     2148There is also a traditional future wait queue (not shown) (\eg Microsoft (@WaitForMultipleObjects@)), to wait for a specified number of future elements in the queue.
    23122149
    23132150\begin{figure}
     
    23362173The right example accepts either @mem1@ or @mem2@ if @C1@ and @C2@ are true.
    23372174
    2338 An interesting use of @waitfor@ is accepting the @mutex@ destructor to know when an object is deallocated, \eg assume the bounded buffer is restructured from a monitor to a thread with the following @main@.
     2175An interesting use of @waitfor@ is accepting the @mutex@ destructor to know when an object is deallocated, \eg assume the bounded buffer is restructred from a monitor to a thread with the following @main@.
    23392176\begin{cfa}
    23402177void main( Buffer(T) & buffer ) with(buffer) {
    23412178        for () {
    2342                 `waitfor( ^?{} : buffer )` break;
    2343                 or when ( count != 20 ) waitfor( insert : buffer ) { ... }
    2344                 or when ( count != 0 ) waitfor( remove : buffer ) { ... }
     2179                `waitfor( ^?{}, buffer )` break;
     2180                or when ( count != 20 ) waitfor( insert, buffer ) { ... }
     2181                or when ( count != 0 ) waitfor( remove, buffer ) { ... }
    23452182        }
    23462183        // clean up
     
    24342271To support this efficient semantics (and prevent barging), the implementation maintains a list of monitors acquired for each blocked thread.
    24352272When a signaller exits or waits in a monitor function/statement, the front waiter on urgent is unblocked if all its monitors are released.
    2436 Implementing a fast subset check for the necessary released monitors is important and discussed in the following sections.
     2273Implementing a fast subset check for the necessary released monitors is important.
    24372274% The benefit is encapsulating complexity into only two actions: passing monitors to the next owner when they should be released and conditionally waking threads if all conditions are met.
    24382275
    24392276
    2440 \subsection{\texorpdfstring{\protect\lstinline@waitfor@ Implementation}{waitfor Implementation}}
    2441 \label{s:waitforImplementation}
    2442 
    2443 In a statically-typed object-oriented programming language, a class has an exhaustive list of members, even when members are added via static inheritance (see Figure~\ref{f:uCinheritance}).
    2444 Knowing all members at compilation (even separate compilation) allows uniquely numbered them so the accept-statement implementation can use a fast/compact bit mask with $O(1)$ compare.
    2445 
    2446 \begin{figure}
    2447 \centering
    2448 \begin{lrbox}{\myboxA}
    2449 \begin{uC++}[aboveskip=0pt,belowskip=0pt]
    2450 $\emph{translation unit 1}$
    2451 _Monitor B { // common type in .h file
    2452         _Mutex virtual void `f`( ... );
    2453         _Mutex virtual void `g`( ... );
    2454         _Mutex virtual void w1( ... ) { ... _Accept(`f`, `g`); ... }
    2455 };
    2456 $\emph{translation unit 2}$
    2457 // include B
    2458 _Monitor D : public B { // inherit
    2459         _Mutex void `h`( ... ); // add
    2460         _Mutex void w2( ... ) { ... _Accept(`f`, `h`); ... }
    2461 };
    2462 \end{uC++}
    2463 \end{lrbox}
    2464 
    2465 \begin{lrbox}{\myboxB}
    2466 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
    2467 $\emph{translation unit 1}$
    2468 monitor M { ... }; // common type in .h file
    2469 void `f`( M & mutex m, ... );
    2470 void `g`( M & mutex m, ... );
    2471 void w1( M & mutex m, ... ) { ... waitfor(`f`, `g` : m); ... }
    2472 
    2473 $\emph{translation unit 2}$
    2474 // include M
    2475 extern void `f`( M & mutex m, ... ); // import f but not g
    2476 void `h`( M & mutex m ); // add
    2477 void w2( M & mutex m, ... ) { ... waitfor(`f`, `h` : m); ... }
    2478 
    2479 \end{cfa}
    2480 \end{lrbox}
    2481 
    2482 \subfloat[\uC]{\label{f:uCinheritance}\usebox\myboxA}
    2483 \hspace{3pt}
    2484 \vrule
    2485 \hspace{3pt}
    2486 \subfloat[\CFA]{\label{f:CFinheritance}\usebox\myboxB}
    2487 \caption{Member / Function visibility}
    2488 \label{f:MemberFunctionVisibility}
    2489 \end{figure}
    2490 
    2491 However, the @waitfor@ statement in translation unit 2 (see Figure~\ref{f:CFinheritance}) cannot see function @g@ in translation unit 1 precluding a unique numbering for a bit-mask because the monitor type only carries the protected shared-data.
     2277\subsection{Loose Object Definitions}
     2278\label{s:LooseObjectDefinitions}
     2279
     2280In an object-oriented programming language, a class includes an exhaustive list of operations.
     2281A new class can add members via static inheritance but the subclass still has an exhaustive list of operations.
     2282(Dynamic member adding, \eg JavaScript~\cite{JavaScript}, is not considered.)
     2283In the object-oriented scenario, the type and all its operators are always present at compilation (even separate compilation), so it is possible to number the operations in a bit mask and use an $O(1)$ compare with a similar bit mask created for the operations specified in a @waitfor@.
     2284
     2285However, in \CFA, monitor functions can be statically added/removed in translation units, making a fast subset check difficult.
     2286\begin{cfa}
     2287        monitor M { ... }; // common type, included in .h file
     2288translation unit 1
     2289        void `f`( M & mutex m );
     2290        void g( M & mutex m ) { waitfor( `f`, m ); }
     2291translation unit 2
     2292        void `f`( M & mutex m ); $\C{// replacing f and g for type M in this translation unit}$
     2293        void `g`( M & mutex m );
     2294        void h( M & mutex m ) { waitfor( `f`, m ) or waitfor( `g`, m ); } $\C{// extending type M in this translation unit}$
     2295\end{cfa}
     2296The @waitfor@ statements in each translation unit cannot form a unique bit-mask because the monitor type does not carry that information.
     2297Hence, function pointers are used to identify the functions listed in the @waitfor@ statement, stored in a variable-sized array.
     2298Then, the same implementation approach used for the urgent stack is used for the calling queue.
     2299Each caller has a list of monitors acquired, and the @waitfor@ statement performs a (usually short) linear search matching functions in the @waitfor@ list with called functions, and then verifying the associated mutex locks can be transfers.
    24922300(A possible way to construct a dense mapping is at link or load-time.)
    2493 Hence, function pointers are used to identify the functions listed in the @waitfor@ statement, stored in a variable-sized array.
    2494 Then, the same implementation approach used for the urgent stack (see Section~\ref{s:Scheduling}) is used for the calling queue.
    2495 Each caller has a list of monitors acquired, and the @waitfor@ statement performs a (short) linear search matching functions in the @waitfor@ list with called functions, and then verifying the associated mutex locks can be transfers.
    24962301
    24972302
     
    25082313The solution is for the programmer to disambiguate:
    25092314\begin{cfa}
    2510 waitfor( f : `m2` ); $\C{// wait for call to f with argument m2}$
     2315waitfor( f, `m2` ); $\C{// wait for call to f with argument m2}$
    25112316\end{cfa}
    25122317Both locks are acquired by function @g@, so when function @f@ is called, the lock for monitor @m2@ is passed from @g@ to @f@, while @g@ still holds lock @m1@.
     
    25152320monitor M { ... };
    25162321void f( M & mutex m1, M & mutex m2 );
    2517 void g( M & mutex m1, M & mutex m2 ) { waitfor( f : `m1, m2` ); $\C{// wait for call to f with arguments m1 and m2}$
     2322void g( M & mutex m1, M & mutex m2 ) { waitfor( f, `m1, m2` ); $\C{// wait for call to f with arguments m1 and m2}$
    25182323\end{cfa}
    25192324Again, the set of monitors passed to the @waitfor@ statement must be entirely contained in the set of monitors already acquired by the accepting function.
    2520 % Also, the order of the monitors in a @waitfor@ statement must match the order of the mutex parameters.
    2521 
    2522 Figure~\ref{f:UnmatchedMutexSets} shows internal and external scheduling with multiple monitors that must match exactly with a signalling or accepting thread, \ie partial matching results in waiting.
    2523 In both cases, the set of monitors is disjoint so unblocking is impossible.
     2325Also, the order of the monitors in a @waitfor@ statement is unimportant.
     2326
     2327Figure~\ref{f:UnmatchedMutexSets} shows an example where, for internal and external scheduling with multiple monitors, a signalling or accepting thread must match exactly, \ie partial matching results in waiting.
     2328For both examples, the set of monitors is disjoint so unblocking is impossible.
    25242329
    25252330\begin{figure}
     
    25502355}
    25512356void g( M1 & mutex m1, M2 & mutex m2 ) {
    2552         waitfor( f : m1, m2 );
     2357        waitfor( f, m1, m2 );
    25532358}
    25542359g( `m11`, m2 ); // block on accept
     
    25652370\end{figure}
    25662371
     2372
     2373\subsection{\texorpdfstring{\protect\lstinline@mutex@ Threads}{mutex Threads}}
     2374
     2375Threads in \CFA can also be monitors to allow \emph{direct communication} among threads, \ie threads can have mutex functions that are called by other threads.
     2376Hence, all monitor features are available when using threads.
     2377Figure~\ref{f:DirectCommunication} shows a comparison of direct call communication in \CFA with direct channel communication in Go.
     2378(Ada provides a similar mechanism to the \CFA direct communication.)
     2379The program main in both programs communicates directly with the other thread versus indirect communication where two threads interact through a passive monitor.
     2380Both direct and indirection thread communication are valuable tools in structuring concurrent programs.
     2381
    25672382\begin{figure}
    25682383\centering
     
    25712386
    25722387struct Msg { int i, j; };
    2573 monitor thread GoRtn { int i;  float f;  Msg m; };
     2388thread GoRtn { int i;  float f;  Msg m; };
    25742389void mem1( GoRtn & mutex gortn, int i ) { gortn.i = i; }
    25752390void mem2( GoRtn & mutex gortn, float f ) { gortn.f = f; }
     
    25812396        for () {
    25822397
    2583                 `waitfor( mem1 : gortn )` sout | i;  // wait for calls
    2584                 or `waitfor( mem2 : gortn )` sout | f;
    2585                 or `waitfor( mem3 : gortn )` sout | m.i | m.j;
    2586                 or `waitfor( ^?{} : gortn )` break; // low priority
     2398                `waitfor( mem1, gortn )` sout | i;  // wait for calls
     2399                or `waitfor( mem2, gortn )` sout | f;
     2400                or `waitfor( mem3, gortn )` sout | m.i | m.j;
     2401                or `waitfor( ^?{}, gortn )` break;
    25872402
    25882403        }
     
    26382453\hspace{3pt}
    26392454\subfloat[Go]{\label{f:Gochannel}\usebox\myboxB}
    2640 \caption{Direct versus indirect communication}
    2641 \label{f:DirectCommunicationComparison}
    2642 
    2643 \medskip
    2644 
    2645 \begin{cfa}
    2646 monitor thread DatingService {
    2647         condition Girls[CompCodes], Boys[CompCodes];
    2648         int girlPhoneNo, boyPhoneNo, ccode;
    2649 };
    2650 int girl( DatingService & mutex ds, int phoneno, int code ) with( ds ) {
    2651         girlPhoneNo = phoneno;  ccode = code;
    2652         `wait( Girls[ccode] );`                                                         $\C{// wait for boy}$
    2653         girlPhoneNo = phoneno;  return boyPhoneNo;
    2654 }
    2655 int boy( DatingService & mutex ds, int phoneno, int code ) with( ds ) {
    2656         boyPhoneNo = phoneno;  ccode = code;
    2657         `wait( Boys[ccode] );`                                                          $\C{// wait for girl}$
    2658         boyPhoneNo = phoneno;  return girlPhoneNo;
    2659 }
    2660 void main( DatingService & ds ) with( ds ) {                    $\C{// thread starts, ds defaults to mutex}$
    2661         for () {
    2662                 waitfor( ^?{} ) break;                                                  $\C{// high priority}$
    2663                 or waitfor( girl )                                                              $\C{// girl called, compatible boy ? restart boy then girl}$
    2664                         if ( ! is_empty( Boys[ccode] ) ) { `signal_block( Boys[ccode] );  signal_block( Girls[ccode] );` }
    2665                 or waitfor( boy ) {                                                             $\C{// boy called, compatible girl ? restart girl then boy}$
    2666                         if ( ! is_empty( Girls[ccode] ) ) { `signal_block( Girls[ccode] );  signal_block( Boys[ccode] );` }
    2667         }
    2668 }
    2669 \end{cfa}
    2670 \caption{Direct communication dating service}
    2671 \label{f:DirectCommunicationDatingService}
     2455\caption{Direct communication}
     2456\label{f:DirectCommunication}
    26722457\end{figure}
    26732458
     
    26842469void main( Ping & pi ) {
    26852470        for ( 10 ) {
    2686                 `waitfor( ping : pi );`
     2471                `waitfor( ping, pi );`
    26872472                `pong( po );`
    26882473        }
     
    26972482        for ( 10 ) {
    26982483                `ping( pi );`
    2699                 `waitfor( pong : po );`
     2484                `waitfor( pong, po );`
    27002485        }
    27012486}
     
    27122497
    27132498
    2714 \subsection{\texorpdfstring{\protect\lstinline@monitor@ Generators / Coroutines / Threads}{monitor Generators / Coroutines / Threads}}
    2715 
    2716 \CFA generators, coroutines, and threads can also be monitors (Table~\ref{t:ExecutionPropertyComposition} cases 4, 6, 12) allowing safe \emph{direct communication} with threads, \ie the custom types can have mutex functions that are called by other threads.
    2717 All monitor features are available within these mutex functions.
    2718 For example, if the formatter generator (or coroutine equivalent) in Figure~\ref{f:CFAFormatGen} is extended with the monitor property and this interface function is used to communicate with the formatter:
    2719 \begin{cfa}
    2720 void fmt( Fmt & mutex fmt, char ch ) { fmt.ch = ch; resume( fmt ) }
    2721 \end{cfa}
    2722 multiple threads can safely pass characters for formatting.
    2723 
    2724 Figure~\ref{f:DirectCommunicationComparison} shows a comparison of direct call-communication in \CFA versus indirect channel-communication in Go.
    2725 (Ada has a similar mechanism to \CFA direct communication.)
    2726 The program thread in \CFA @main@ uses the call/return paradigm to directly communicate with the @GoRtn main@, whereas Go switches to the channel paradigm to indirectly communicate with the goroutine.
    2727 Communication by multiple threads is safe for the @gortn@ thread via mutex calls in \CFA or channel assignment in Go.
    2728 
    2729 Figure~\ref{f:DirectCommunicationDatingService} shows the dating-service problem in Figure~\ref{f:DatingServiceMonitor} extended from indirect monitor communication to direct thread communication.
    2730 When converting a monitor to a thread (server), the coding pattern is to move as much code as possible from the accepted members into the thread main so it does an much work as possible.
    2731 Notice, the dating server is postponing requests for an unspecified time while continuing to accept new requests.
    2732 For complex servers (web-servers), there can be hundreds of lines of code in the thread main and safe interaction with clients can be complex.
     2499\subsection{Execution Properties}
     2500
     2501Table~\ref{t:ObjectPropertyComposition} shows how the \CFA high-level constructs cover 3 fundamental execution properties: thread, stateful function, and mutual exclusion.
     2502Case 1 is a basic object, with none of the new execution properties.
     2503Case 2 allows @mutex@ calls to Case 1 to protect shared data.
     2504Case 3 allows stateful functions to suspend/resume but restricts operations because the state is stackless.
     2505Case 4 allows @mutex@ calls to Case 3 to protect shared data.
     2506Cases 5 and 6 are the same as 3 and 4 without restriction because the state is stackful.
     2507Cases 7 and 8 are rejected because a thread cannot execute without a stackful state in a preemptive environment when context switching from the signal handler.
     2508Cases 9 and 10 have a stackful thread without and with @mutex@ calls.
     2509For situations where threads do not require direct communication, case 9 provides faster creation/destruction by eliminating @mutex@ setup.
     2510
     2511\begin{table}
     2512\caption{Object property composition}
     2513\centering
     2514\label{t:ObjectPropertyComposition}
     2515\renewcommand{\arraystretch}{1.25}
     2516%\setlength{\tabcolsep}{5pt}
     2517\begin{tabular}{c|c||l|l}
     2518\multicolumn{2}{c||}{object properties} & \multicolumn{2}{c}{mutual exclusion} \\
     2519\hline
     2520thread  & stateful                              & \multicolumn{1}{c|}{No} & \multicolumn{1}{c}{Yes} \\
     2521\hline
     2522\hline
     2523No              & No                                    & \textbf{1}\ \ \ aggregate type                & \textbf{2}\ \ \ @monitor@ aggregate type \\
     2524\hline
     2525No              & Yes (stackless)               & \textbf{3}\ \ \ @generator@                   & \textbf{4}\ \ \ @monitor@ @generator@ \\
     2526\hline
     2527No              & Yes (stackful)                & \textbf{5}\ \ \ @coroutine@                   & \textbf{6}\ \ \ @monitor@ @coroutine@ \\
     2528\hline
     2529Yes             & No / Yes (stackless)  & \textbf{7}\ \ \ {\color{red}rejected} & \textbf{8}\ \ \ {\color{red}rejected} \\
     2530\hline
     2531Yes             & Yes (stackful)                & \textbf{9}\ \ \ @thread@                              & \textbf{10}\ \ @monitor@ @thread@ \\
     2532\end{tabular}
     2533\end{table}
    27332534
    27342535
     
    27362537
    27372538For completeness and efficiency, \CFA provides a standard set of low-level locks: recursive mutex, condition, semaphore, barrier, \etc, and atomic instructions: @fetchAssign@, @fetchAdd@, @testSet@, @compareSet@, \etc.
    2738 Some of these low-level mechanism are used to build the \CFA runtime, but we always advocate using high-level mechanisms whenever possible.
     2539Some of these low-level mechanism are used in the \CFA runtime, but we strongly advocate using high-level mechanisms whenever possible.
    27392540
    27402541
     
    27792580\begin{cfa}
    27802581struct Adder {
    2781         int * row, cols;
     2582    int * row, cols;
    27822583};
    27832584int operator()() {
     
    28382639\label{s:RuntimeStructureCluster}
    28392640
    2840 A \newterm{cluster} is a collection of user and kernel threads, where the kernel threads run the user threads from the cluster's ready queue, and the operating system runs the kernel threads on the processors from its ready queue.
    2841 The term \newterm{virtual processor} is introduced as a synonym for kernel thread to disambiguate between user and kernel thread.
    2842 From the language perspective, a virtual processor is an actual processor (core).
    2843 
     2641A \newterm{cluster} is a collection of threads and virtual processors (abstract kernel-thread) that execute the (user) threads from its own ready queue (like an OS executing kernel threads).
    28442642The purpose of a cluster is to control the amount of parallelism that is possible among threads, plus scheduling and other execution defaults.
    28452643The default cluster-scheduler is single-queue multi-server, which provides automatic load-balancing of threads on processors.
     
    28602658Programs may use more virtual processors than hardware processors.
    28612659On a multiprocessor, kernel threads are distributed across the hardware processors resulting in virtual processors executing in parallel.
    2862 (It is possible to use affinity to lock a virtual processor onto a particular hardware processor~\cite{affinityLinux,affinityWindows}, which is used when caching issues occur or for heterogeneous hardware processors.) %, affinityFreebsd, affinityNetbsd, affinityMacosx
     2660(It is possible to use affinity to lock a virtual processor onto a particular hardware processor~\cite{affinityLinux, affinityWindows, affinityFreebsd, affinityNetbsd, affinityMacosx}, which is used when caching issues occur or for heterogeneous hardware processors.)
    28632661The \CFA runtime attempts to block unused processors and unblock processors as the system load increases;
    2864 balancing the workload with processors is difficult because it requires future knowledge, \ie what will the application workload do next.
     2662balancing the workload with processors is difficult because it requires future knowledge, \ie what will the applicaton workload do next.
    28652663Preemption occurs on virtual processors rather than user threads, via operating-system interrupts.
    28662664Thus virtual processors execute user threads, where preemption frequency applies to a virtual processor, so preemption occurs randomly across the executed user threads.
     
    28972695Nondeterministic preemption provides fairness from long-running threads, and forces concurrent programmers to write more robust programs, rather than relying on code between cooperative scheduling to be atomic.
    28982696This atomic reliance can fail on multi-core machines, because execution across cores is nondeterministic.
    2899 A different reason for not supporting preemption is that it significantly complicates the runtime system, \eg Windows runtime does not support interrupts and on Linux systems, interrupts are complex (see below).
     2697A different reason for not supporting preemption is that it significantly complicates the runtime system, \eg Microsoft runtime does not support interrupts and on Linux systems, interrupts are complex (see below).
    29002698Preemption is normally handled by setting a countdown timer on each virtual processor.
    2901 When the timer expires, an interrupt is delivered, and its signal handler resets the countdown timer, and if the virtual processor is executing in user code, the signal handler performs a user-level context-switch, or if executing in the language runtime kernel, the preemption is ignored or rolled forward to the point where the runtime kernel context switches back to user code.
     2699When the timer expires, an interrupt is delivered, and the interrupt handler resets the countdown timer, and if the virtual processor is executing in user code, the signal handler performs a user-level context-switch, or if executing in the language runtime kernel, the preemption is ignored or rolled forward to the point where the runtime kernel context switches back to user code.
    29022700Multiple signal handlers may be pending.
    29032701When control eventually switches back to the signal handler, it returns normally, and execution continues in the interrupted user thread, even though the return from the signal handler may be on a different kernel thread than the one where the signal is delivered.
    29042702The only issue with this approach is that signal masks from one kernel thread may be restored on another as part of returning from the signal handler;
    29052703therefore, the same signal mask is required for all virtual processors in a cluster.
    2906 Because preemption interval is usually long (1 millisecond) performance cost is negligible.
    2907 
    2908 Linux switched a decade ago from specific to arbitrary virtual-processor signal-delivery for applications with multiple kernel threads.
    2909 In the new semantics, a virtual-processor directed signal may be delivered to any virtual processor created by the application that does not have the signal blocked.
     2704Because preemption frequency is usually long (1 millisecond) performance cost is negligible.
     2705
     2706Linux switched a decade ago from specific to arbitrary process signal-delivery for applications with multiple kernel threads.
     2707\begin{cquote}
     2708A process-directed signal may be delivered to any one of the threads that does not currently have the signal blocked.
     2709If more than one of the threads has the signal unblocked, then the kernel chooses an arbitrary thread to which it will deliver the signal.
     2710SIGNAL(7) - Linux Programmer's Manual
     2711\end{cquote}
    29102712Hence, the timer-expiry signal, which is generated \emph{externally} by the Linux kernel to an application, is delivered to any of its Linux subprocesses (kernel threads).
    29112713To ensure each virtual processor receives a preemption signal, a discrete-event simulation is run on a special virtual processor, and only it sets and receives timer events.
     
    29252727\label{s:Performance}
    29262728
    2927 To test the performance of the \CFA runtime, a series of microbenchmarks are used to compare \CFA with pthreads, Java 11.0.6, Go 1.12.6, Rust 1.37.0, Python 3.7.6, Node.js 12.14.1, and \uC 7.0.0.
     2729To verify the implementation of the \CFA runtime, a series of microbenchmarks are performed comparing \CFA with pthreads, Java OpenJDK-9, Go 1.12.6 and \uC 7.0.0.
    29282730For comparison, the package must be multi-processor (M:N), which excludes libdill/libmil~\cite{libdill} (M:1)), and use a shared-memory programming model, \eg not message passing.
    2929 The benchmark computer is an AMD Opteron\texttrademark\ 6380 NUMA 64-core, 8 socket, 2.5 GHz processor, running Ubuntu 16.04.6 LTS, and pthreads/\CFA/\uC are compiled with gcc 9.2.1.
     2731The benchmark computer is an AMD Opteron\texttrademark\ 6380 NUMA 64-core, 8 socket, 2.5 GHz processor, running Ubuntu 16.04.6 LTS, and \CFA/\uC are compiled with gcc 6.5.
    29302732
    29312733All benchmarks are run using the following harness. (The Java harness is augmented to circumvent JIT issues.)
    29322734\begin{cfa}
    2933 #define BENCH( `run` ) uint64_t start = cputime_ns();  `run;`  double result = (double)(cputime_ns() - start) / N;
    2934 \end{cfa}
    2935 where CPU time in nanoseconds is from the appropriate language clock.
    2936 Each benchmark is performed @N@ times, where @N@ is selected so the benchmark runs in the range of 2--20 seconds for the specific programming language.
    2937 The total time is divided by @N@ to obtain the average time for a benchmark.
    2938 Each benchmark experiment is run 13 times and the average appears in the table.
     2735unsigned int N = 10_000_000;
     2736#define BENCH( `run` ) Time before = getTimeNsec();  `run;`  Duration result = (getTimeNsec() - before) / N;
     2737\end{cfa}
     2738The method used to get time is @clock_gettime( CLOCK_REALTIME )@.
     2739Each benchmark is performed @N@ times, where @N@ varies depending on the benchmark;
     2740the total time is divided by @N@ to obtain the average time for a benchmark.
     2741Each benchmark experiment is run 31 times.
    29392742All omitted tests for other languages are functionally identical to the \CFA tests and available online~\cite{CforallBenchMarks}.
    2940 % tar --exclude-ignore=exclude -cvhf benchmark.tar benchmark
    2941 
    2942 \paragraph{Context Switching}
     2743% tar --exclude=.deps --exclude=Makefile --exclude=Makefile.in --exclude=c.c --exclude=cxx.cpp --exclude=fetch_add.c -cvhf benchmark.tar benchmark
     2744
     2745\paragraph{Object Creation}
     2746
     2747Object creation is measured by creating/deleting the specific kind of concurrent object.
     2748Figure~\ref{f:creation} shows the code for \CFA, with results in Table~\ref{tab:creation}.
     2749The only note here is that the call stacks of \CFA coroutines are lazily created, therefore without priming the coroutine to force stack creation, the creation cost is artificially low.
     2750
     2751\begin{multicols}{2}
     2752\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
     2753\begin{cfa}
     2754@thread@ MyThread {};
     2755void @main@( MyThread & ) {}
     2756int main() {
     2757        BENCH( for ( N ) { @MyThread m;@ } )
     2758        sout | result`ns;
     2759}
     2760\end{cfa}
     2761\captionof{figure}{\CFA object-creation benchmark}
     2762\label{f:creation}
     2763
     2764\columnbreak
     2765
     2766\vspace*{-16pt}
     2767\captionof{table}{Object creation comparison (nanoseconds)}
     2768\label{tab:creation}
     2769
     2770\begin{tabular}[t]{@{}r*{3}{D{.}{.}{5.2}}@{}}
     2771\multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
     2772\CFA Coroutine Lazy             & 13.2          & 13.1          & 0.44          \\
     2773\CFA Coroutine Eager    & 531.3         & 536.0         & 26.54         \\
     2774\CFA Thread                             & 2074.9        & 2066.5        & 170.76        \\
     2775\uC Coroutine                   & 89.6          & 90.5          & 1.83          \\
     2776\uC Thread                              & 528.2         & 528.5         & 4.94          \\
     2777Goroutine                               & 4068.0        & 4113.1        & 414.55        \\
     2778Java Thread                             & 103848.5      & 104295.4      & 2637.57       \\
     2779Pthreads                                & 33112.6       & 33127.1       & 165.90
     2780\end{tabular}
     2781\end{multicols}
     2782
     2783
     2784\paragraph{Context-Switching}
    29432785
    29442786In procedural programming, the cost of a function call is important as modularization (refactoring) increases.
    2945 (In many cases, a compiler inlines function calls to increase the size and number of basic blocks for optimizing.)
    2946 Similarly, when modularization extends to coroutines/threads, the time for a context switch becomes a relevant factor.
     2787(In many cases, a compiler inlines function calls to eliminate this cost.)
     2788Similarly, when modularization extends to coroutines/tasks, the time for a context switch becomes a relevant factor.
    29472789The coroutine test is from resumer to suspender and from suspender to resumer, which is two context switches.
    2948 %For async-await systems, the test is scheduling and fulfilling @N@ empty promises, where all promises are allocated before versus interleaved with fulfillment to avoid garbage collection.
    2949 For async-await systems, the test measures the cost of the @await@ expression entering the event engine by awaiting @N@ promises, where each created promise is resolved by an immediate event in the engine (using Node.js @setImmediate@).
    29502790The thread test is using yield to enter and return from the runtime kernel, which is two context switches.
    29512791The difference in performance between coroutine and thread context-switch is the cost of scheduling for threads, whereas coroutines are self-scheduling.
    2952 Figure~\ref{f:ctx-switch} shows the \CFA code for a coroutine/thread with results in Table~\ref{t:ctx-switch}.
    2953 
    2954 % From: Gregor Richards <gregor.richards@uwaterloo.ca>
    2955 % To: "Peter A. Buhr" <pabuhr@plg2.cs.uwaterloo.ca>
    2956 % Date: Fri, 24 Jan 2020 13:49:18 -0500
    2957 %
    2958 % I can also verify that the previous version, which just tied a bunch of promises together, *does not* go back to the
    2959 % event loop at all in the current version of Node. Presumably they're taking advantage of the fact that the ordering of
    2960 % events is intentionally undefined to just jump right to the next 'then' in the chain, bypassing event queueing
    2961 % entirely. That's perfectly correct behavior insofar as its difference from the specified behavior isn't observable, but
    2962 % it isn't typical or representative of much anything useful, because most programs wouldn't have whole chains of eager
    2963 % promises. Also, it's not representative of *anything* you can do with async/await, as there's no way to encode such an
    2964 % eager chain that way.
     2792Figure~\ref{f:ctx-switch} only shows the \CFA code for coroutines/threads (other systems are similar) with all results in Table~\ref{tab:ctx-switch}.
    29652793
    29662794\begin{multicols}{2}
    29672795\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
    29682796\begin{cfa}[aboveskip=0pt,belowskip=0pt]
    2969 @coroutine@ C {};
    2970 void main( C & ) { for () { @suspend;@ } }
     2797@coroutine@ C {} c;
     2798void main( C & ) { for ( ;; ) { @suspend;@ } }
    29712799int main() { // coroutine test
    2972         C c;
    29732800        BENCH( for ( N ) { @resume( c );@ } )
    2974         sout | result;
    2975 }
    2976 int main() { // thread test
     2801        sout | result`ns;
     2802}
     2803int main() { // task test
    29772804        BENCH( for ( N ) { @yield();@ } )
    2978         sout | result;
     2805        sout | result`ns;
    29792806}
    29802807\end{cfa}
     
    29862813\vspace*{-16pt}
    29872814\captionof{table}{Context switch comparison (nanoseconds)}
    2988 \label{t:ctx-switch}
     2815\label{tab:ctx-switch}
    29892816\begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}}
    29902817\multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
    2991 C function                      & 1.8           & 1.8           & 0.0   \\
    2992 \CFA generator          & 1.8           & 2.0           & 0.3   \\
    2993 \CFA coroutine          & 32.5          & 32.9          & 0.8   \\
    2994 \CFA thread                     & 93.8          & 93.6          & 2.2   \\
    2995 \uC coroutine           & 50.3          & 50.3          & 0.2   \\
    2996 \uC thread                      & 97.3          & 97.4          & 1.0   \\
    2997 Python generator        & 40.9          & 41.3          & 1.5   \\
    2998 Node.js generator       & 32.6          & 32.2          & 1.0   \\
    2999 Node.js await           & 1852.2        & 1854.7        & 16.4  \\
    3000 Goroutine thread        & 143.0         & 143.3         & 1.1   \\
    3001 Rust thread                     & 332.0         & 331.4         & 2.4   \\
    3002 Java thread                     & 405.0         & 415.0         & 17.6  \\
    3003 Pthreads thread         & 334.3         & 335.2         & 3.9
     2818C function              & 1.8   & 1.8   & 0.01  \\
     2819\CFA generator  & 2.4   & 2.2   & 0.25  \\
     2820\CFA Coroutine  & 36.2  & 36.2  & 0.25  \\
     2821\CFA Thread             & 93.2  & 93.5  & 2.09  \\
     2822\uC Coroutine   & 52.0  & 52.1  & 0.51  \\
     2823\uC Thread              & 96.2  & 96.3  & 0.58  \\
     2824Goroutine               & 141.0 & 141.3 & 3.39  \\
     2825Java Thread             & 374.0 & 375.8 & 10.38 \\
     2826Pthreads Thread & 361.0 & 365.3 & 13.19
    30042827\end{tabular}
    30052828\end{multicols}
    30062829
    3007 \paragraph{Internal Scheduling}
    3008 
    3009 Internal scheduling is measured using a cycle of two threads signalling and waiting.
    3010 Figure~\ref{f:schedint} shows the code for \CFA, with results in Table~\ref{t:schedint}.
     2830
     2831\paragraph{Mutual-Exclusion}
     2832
     2833Uncontented mutual exclusion, which frequently occurs, is measured by entering/leaving a critical section.
     2834For monitors, entering and leaving a monitor function is measured.
     2835To put the results in context, the cost of entering a non-inline function and the cost of acquiring and releasing a @pthread_mutex@ lock is also measured.
     2836Figure~\ref{f:mutex} shows the code for \CFA with all results in Table~\ref{tab:mutex}.
    30112837Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
    3012 Java scheduling is significantly greater because the benchmark explicitly creates multiple thread in order to prevent the JIT from making the program sequential, \ie removing all locking.
    30132838
    30142839\begin{multicols}{2}
    30152840\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
    30162841\begin{cfa}
    3017 volatile int go = 0;
    3018 @condition c;@
    30192842@monitor@ M {} m1/*, m2, m3, m4*/;
    3020 void call( M & @mutex p1/*, p2, p3, p4*/@ ) {
    3021         @signal( c );@
    3022 }
    3023 void wait( M & @mutex p1/*, p2, p3, p4*/@ ) {
    3024         go = 1; // continue other thread
    3025         for ( N ) { @wait( c );@ } );
    3026 }
    3027 thread T {};
    3028 void main( T & ) {
    3029         while ( go == 0 ) { yield(); } // waiter must start first
    3030         BENCH( for ( N ) { call( m1/*, m2, m3, m4*/ ); } )
    3031         sout | result;
    3032 }
     2843void __attribute__((noinline))
     2844do_call( M & @mutex m/*, m2, m3, m4*/@ ) {}
    30332845int main() {
    3034         T t;
    3035         wait( m1/*, m2, m3, m4*/ );
    3036 }
    3037 \end{cfa}
    3038 \captionof{figure}{\CFA Internal-scheduling benchmark}
    3039 \label{f:schedint}
     2846        BENCH(
     2847                for( N ) do_call( m1/*, m2, m3, m4*/ );
     2848        )
     2849        sout | result`ns;
     2850}
     2851\end{cfa}
     2852\captionof{figure}{\CFA acquire/release mutex benchmark}
     2853\label{f:mutex}
    30402854
    30412855\columnbreak
    30422856
    30432857\vspace*{-16pt}
    3044 \captionof{table}{Internal-scheduling comparison (nanoseconds)}
    3045 \label{t:schedint}
    3046 \bigskip
    3047 
    3048 \begin{tabular}{@{}r*{3}{D{.}{.}{5.2}}@{}}
    3049 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
    3050 \CFA @signal@, 1 monitor        & 364.4         & 364.2         & 4.4           \\
    3051 \CFA @signal@, 2 monitor        & 484.4         & 483.9         & 8.8           \\
    3052 \CFA @signal@, 4 monitor        & 709.1         & 707.7         & 15.0          \\
    3053 \uC @signal@ monitor            & 328.3         & 327.4         & 2.4           \\
    3054 Rust cond. variable                     & 7514.0        & 7437.4        & 397.2         \\
    3055 Java @notify@ monitor           & 9623.0        & 9654.6        & 236.2         \\
    3056 Pthreads cond. variable         & 5553.7        & 5576.1        & 345.6
     2858\captionof{table}{Mutex comparison (nanoseconds)}
     2859\label{tab:mutex}
     2860\begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}}
     2861\multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
     2862test and test-and-test lock             & 19.1  & 18.9  & 0.40  \\
     2863\CFA @mutex@ function, 1 arg.   & 45.9  & 46.6  & 1.45  \\
     2864\CFA @mutex@ function, 2 arg.   & 105.0 & 104.7 & 3.08  \\
     2865\CFA @mutex@ function, 4 arg.   & 165.0 & 167.6 & 5.65  \\
     2866\uC @monitor@ member rtn.               & 54.0  & 53.7  & 0.82  \\
     2867Java synchronized method                & 31.0  & 31.1  & 0.50  \\
     2868Pthreads Mutex Lock                             & 33.6  & 32.6  & 1.14
    30572869\end{tabular}
    30582870\end{multicols}
     
    30622874
    30632875External scheduling is measured using a cycle of two threads calling and accepting the call using the @waitfor@ statement.
    3064 Figure~\ref{f:schedext} shows the code for \CFA with results in Table~\ref{t:schedext}.
     2876Figure~\ref{f:ext-sched} shows the code for \CFA, with results in Table~\ref{tab:ext-sched}.
    30652877Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
    30662878
     
    30692881\vspace*{-16pt}
    30702882\begin{cfa}
    3071 @monitor@ M {} m1/*, m2, m3, m4*/;
    3072 void call( M & @mutex p1/*, p2, p3, p4*/@ ) {}
    3073 void wait( M & @mutex p1/*, p2, p3, p4*/@ ) {
    3074         for ( N ) { @waitfor( call : p1/*, p2, p3, p4*/ );@ }
    3075 }
     2883volatile int go = 0;
     2884@monitor@ M {} m;
    30762885thread T {};
     2886void __attribute__((noinline))
     2887do_call( M & @mutex@ ) {}
    30772888void main( T & ) {
    3078         BENCH( for ( N ) { call( m1/*, m2, m3, m4*/ ); } )
    3079         sout | result;
     2889        while ( go == 0 ) { yield(); }
     2890        while ( go == 1 ) { do_call( m ); }
     2891}
     2892int __attribute__((noinline))
     2893do_wait( M & @mutex@ m ) {
     2894        go = 1; // continue other thread
     2895        BENCH( for ( N ) { @waitfor( do_call, m );@ } )
     2896        go = 0; // stop other thread
     2897        sout | result`ns;
    30802898}
    30812899int main() {
    30822900        T t;
    3083         wait( m1/*, m2, m3, m4*/ );
     2901        do_wait( m );
    30842902}
    30852903\end{cfa}
    30862904\captionof{figure}{\CFA external-scheduling benchmark}
    3087 \label{f:schedext}
     2905\label{f:ext-sched}
    30882906
    30892907\columnbreak
     
    30912909\vspace*{-16pt}
    30922910\captionof{table}{External-scheduling comparison (nanoseconds)}
    3093 \label{t:schedext}
     2911\label{tab:ext-sched}
    30942912\begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}}
    30952913\multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
    3096 \CFA @waitfor@, 1 monitor       & 367.1 & 365.3 & 5.0   \\
    3097 \CFA @waitfor@, 2 monitor       & 463.0 & 464.6 & 7.1   \\
    3098 \CFA @waitfor@, 4 monitor       & 689.6 & 696.2 & 21.5  \\
    3099 \uC \lstinline[language=uC++]|_Accept| monitor  & 328.2 & 329.1 & 3.4   \\
    3100 Go \lstinline[language=Golang]|select| channel  & 365.0 & 365.5 & 1.2
     2914\CFA @waitfor@, 1 @monitor@     & 376.4 & 376.8 & 7.63  \\
     2915\CFA @waitfor@, 2 @monitor@     & 491.4 & 492.0 & 13.31 \\
     2916\CFA @waitfor@, 4 @monitor@     & 681.0 & 681.7 & 19.10 \\
     2917\uC @_Accept@                           & 331.1 & 331.4 & 2.66
    31012918\end{tabular}
    31022919\end{multicols}
    31032920
    3104 \paragraph{Mutual-Exclusion}
    3105 
    3106 Uncontented mutual exclusion, which frequently occurs, is measured by entering/leaving a critical section.
    3107 For monitors, entering and leaving a monitor function is measured, otherwise the language-appropriate mutex-lock is measured.
    3108 For comparison, a spinning (versus blocking) test-and-test-set lock is presented.
    3109 Figure~\ref{f:mutex} shows the code for \CFA with results in Table~\ref{t:mutex}.
    3110 Note the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
     2921
     2922\paragraph{Internal Scheduling}
     2923
     2924Internal scheduling is measured using a cycle of two threads signalling and waiting.
     2925Figure~\ref{f:int-sched} shows the code for \CFA, with results in Table~\ref{tab:int-sched}.
     2926Note, the incremental cost of bulk acquire for \CFA, which is largely a fixed cost for small numbers of mutex objects.
     2927Java scheduling is significantly greater because the benchmark explicitly creates multiple thread in order to prevent the JIT from making the program sequential, \ie removing all locking.
    31112928
    31122929\begin{multicols}{2}
    31132930\lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
    31142931\begin{cfa}
    3115 @monitor@ M {} m1/*, m2, m3, m4*/;
    3116 call( M & @mutex p1/*, p2, p3, p4*/@ ) {}
     2932volatile int go = 0;
     2933@monitor@ M { @condition c;@ } m;
     2934void __attribute__((noinline))
     2935do_call( M & @mutex@ a1 ) { @signal( c );@ }
     2936thread T {};
     2937void main( T & this ) {
     2938        while ( go == 0 ) { yield(); }
     2939        while ( go == 1 ) { do_call( m ); }
     2940}
     2941int  __attribute__((noinline))
     2942do_wait( M & mutex m ) with(m) {
     2943        go = 1; // continue other thread
     2944        BENCH( for ( N ) { @wait( c );@ } );
     2945        go = 0; // stop other thread
     2946        sout | result`ns;
     2947}
    31172948int main() {
    3118         BENCH( for( N ) call( m1/*, m2, m3, m4*/ ); )
    3119         sout | result;
    3120 }
    3121 \end{cfa}
    3122 \captionof{figure}{\CFA acquire/release mutex benchmark}
    3123 \label{f:mutex}
     2949        T t;
     2950        do_wait( m );
     2951}
     2952\end{cfa}
     2953\captionof{figure}{\CFA Internal-scheduling benchmark}
     2954\label{f:int-sched}
    31242955
    31252956\columnbreak
    31262957
    31272958\vspace*{-16pt}
    3128 \captionof{table}{Mutex comparison (nanoseconds)}
    3129 \label{t:mutex}
    3130 \begin{tabular}{@{}r*{3}{D{.}{.}{3.2}}@{}}
    3131 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} &\multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
    3132 test-and-test-set lock                  & 19.1  & 18.9  & 0.4   \\
    3133 \CFA @mutex@ function, 1 arg.   & 48.3  & 47.8  & 0.9   \\
    3134 \CFA @mutex@ function, 2 arg.   & 86.7  & 87.6  & 1.9   \\
    3135 \CFA @mutex@ function, 4 arg.   & 173.4 & 169.4 & 5.9   \\
    3136 \uC @monitor@ member rtn.               & 54.8  & 54.8  & 0.1   \\
    3137 Goroutine mutex lock                    & 34.0  & 34.0  & 0.0   \\
    3138 Rust mutex lock                                 & 33.0  & 33.2  & 0.8   \\
    3139 Java synchronized method                & 31.0  & 31.0  & 0.0   \\
    3140 Pthreads mutex Lock                             & 31.0  & 31.1  & 0.4
     2959\captionof{table}{Internal-scheduling comparison (nanoseconds)}
     2960\label{tab:int-sched}
     2961\bigskip
     2962
     2963\begin{tabular}{@{}r*{3}{D{.}{.}{5.2}}@{}}
     2964\multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
     2965\CFA @signal@, 1 @monitor@      & 372.6         & 374.3         & 14.17         \\
     2966\CFA @signal@, 2 @monitor@      & 492.7         & 494.1         & 12.99         \\
     2967\CFA @signal@, 4 @monitor@      & 749.4         & 750.4         & 24.74         \\
     2968\uC @signal@                            & 320.5         & 321.0         & 3.36          \\
     2969Java @notify@                           & 10160.5       & 10169.4       & 267.71        \\
     2970Pthreads Cond. Variable         & 4949.6        & 5065.2        & 363
    31412971\end{tabular}
    31422972\end{multicols}
    31432973
    3144 \paragraph{Creation}
    3145 
    3146 Creation is measured by creating/deleting a specific kind of control-flow object.
    3147 Figure~\ref{f:creation} shows the code for \CFA with results in Table~\ref{t:creation}.
    3148 Note, the call stacks of \CFA coroutines are lazily created on the first resume, therefore the cost of creation with and without a stack are presented.
    3149 
    3150 \begin{multicols}{2}
    3151 \lstset{language=CFA,moredelim=**[is][\color{red}]{@}{@},deletedelim=**[is][]{`}{`}}
    3152 \begin{cfa}
    3153 @coroutine@ MyCoroutine {};
    3154 void ?{}( MyCoroutine & this ) {
    3155 #ifdef EAGER
    3156         resume( this );
    3157 #endif
    3158 }
    3159 void main( MyCoroutine & ) {}
    3160 int main() {
    3161         BENCH( for ( N ) { @MyCoroutine c;@ } )
    3162         sout | result;
    3163 }
    3164 \end{cfa}
    3165 \captionof{figure}{\CFA creation benchmark}
    3166 \label{f:creation}
    3167 
    3168 \columnbreak
    3169 
    3170 \vspace*{-16pt}
    3171 \captionof{table}{Creation comparison (nanoseconds)}
    3172 \label{t:creation}
    3173 
    3174 \begin{tabular}[t]{@{}r*{3}{D{.}{.}{5.2}}@{}}
    3175 \multicolumn{1}{@{}c}{} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Average} & \multicolumn{1}{c@{}}{Std Dev} \\
    3176 \CFA generator                  & 0.6           & 0.6           & 0.0           \\
    3177 \CFA coroutine lazy             & 13.4          & 13.1          & 0.5           \\
    3178 \CFA coroutine eager    & 144.7         & 143.9         & 1.5           \\
    3179 \CFA thread                             & 466.4         & 468.0         & 11.3          \\
    3180 \uC coroutine                   & 155.6         & 155.7         & 1.7           \\
    3181 \uC thread                              & 523.4         & 523.9         & 7.7           \\
    3182 Python generator                & 123.2         & 124.3         & 4.1           \\
    3183 Node.js generator               & 32.3          & 32.2          & 0.3           \\
    3184 Goroutine thread                & 751.0         & 750.5         & 3.1           \\
    3185 Rust thread                             & 53801.0       & 53896.8       & 274.9         \\
    3186 Java thread                             & 120274.0      & 120722.9      & 2356.7        \\
    3187 Pthreads thread                 & 31465.5       & 31419.5       & 140.4
    3188 \end{tabular}
    3189 \end{multicols}
    3190 
    3191 
    3192 \subsection{Discussion}
    3193 
    3194 Languages using 1:1 threading based on pthreads can at best meet or exceed (due to language overhead) the pthread results.
    3195 Note, pthreads has a fast zero-contention mutex lock checked in user space.
    3196 Languages with M:N threading have better performance than 1:1 because there is no operating-system interactions.
    3197 Languages with stackful coroutines have higher cost than stackless coroutines because of stack allocation and context switching;
    3198 however, stackful \uC and \CFA coroutines have approximately the same performance as stackless Python and Node.js generators.
    3199 The \CFA stackless generator is approximately 25 times faster for suspend/resume and 200 times faster for creation than stackless Python and Node.js generators.
    3200 
    32012974
    32022975\section{Conclusion}
     
    32042977Advanced control-flow will always be difficult, especially when there is temporal ordering and nondeterminism.
    32052978However, many systems exacerbate the difficulty through their presentation mechanisms.
    3206 This paper shows it is possible to understand high-level control-flow using three properties: statefulness, thread, mutual-exclusion/synchronization.
    3207 Combining these properties creates a number of high-level, efficient, and maintainable control-flow types: generator, coroutine, thread, each of which can be a monitor.
    3208 Eliminated from \CFA are barging and spurious wakeup, which are nonintuitive and lead to errors, and having to work with a bewildering set of low-level locks and acquisition techniques.
    3209 \CFA high-level race-free monitors and threads provide the core mechanisms for mutual exclusion and synchronization, without having to resort to magic qualifiers like @volatile@/@atomic@.
     2979This paper shows it is possible to present a hierarchy of control-flow features, generator, coroutine, thread, and monitor, providing an integrated set of high-level, efficient, and maintainable control-flow features.
     2980Eliminated from \CFA are spurious wakeup and barging, which are nonintuitive and lead to errors, and having to work with a bewildering set of low-level locks and acquisition techniques.
     2981\CFA high-level race-free monitors and tasks provide the core mechanisms for mutual exclusion and synchronization, without having to resort to magic qualifiers like @volatile@/@atomic@.
    32102982Extending these mechanisms to handle high-level deadlock-free bulk acquire across both mutual exclusion and synchronization is a unique contribution.
    32112983The \CFA runtime provides concurrency based on a preemptive M:N user-level threading-system, executing in clusters, which encapsulate scheduling of work on multiple kernel threads providing parallelism.
    32122984The M:N model is judged to be efficient and provide greater flexibility than a 1:1 threading model.
    32132985These concepts and the \CFA runtime-system are written in the \CFA language, extensively leveraging the \CFA type-system, which demonstrates the expressiveness of the \CFA language.
    3214 Performance comparisons with other concurrent systems/languages show the \CFA approach is competitive across all basic operations, which translates directly into good performance in well-written applications with advanced control-flow.
    3215 C programmers should feel comfortable using these mechanisms for developing complex control-flow in applications, with the ability to obtain maximum available performance by selecting mechanisms at the appropriate level of need using only calling communication.
     2986Performance comparisons with other concurrent systems/languages show the \CFA approach is competitive across all low-level operations, which translates directly into good performance in well-written concurrent applications.
     2987C programmers should feel comfortable using these mechanisms for developing complex control-flow in applications, with the ability to obtain maximum available performance by selecting mechanisms at the appropriate level of need.
    32162988
    32172989
     
    32333005\label{futur:nbio}
    32343006
    3235 Many modern workloads are not bound by computation but IO operations, common cases being web servers and XaaS~\cite{XaaS} (anything as a service).
     3007Many modern workloads are not bound by computation but IO operations, a common case being web servers and XaaS~\cite{XaaS} (anything as a service).
    32363008These types of workloads require significant engineering to amortizing costs of blocking IO-operations.
    32373009At its core, non-blocking I/O is an operating-system level feature queuing IO operations, \eg network operations, and registering for notifications instead of waiting for requests to complete.
     
    32613033\section{Acknowledgements}
    32623034
    3263 The authors recognize the design assistance of Aaron Moss, Rob Schluntz, Andrew Beach, and Michael Brooks; David Dice for commenting and helping with the Java benchmarks; and Gregor Richards for helping with the Node.js benchmarks.
    3264 This research is funded by a grant from Waterloo-Huawei (\url{http://www.huawei.com}) Joint Innovation Lab. %, and Peter Buhr is partially funded by the Natural Sciences and Engineering Research Council of Canada.
     3035The authors would like to recognize the design assistance of Aaron Moss, Rob Schluntz, Andrew Beach and Michael Brooks on the features described in this paper.
     3036Funding for this project has been provided by Huawei Ltd.\ (\url{http://www.huawei.com}). %, and Peter Buhr is partially funded by the Natural Sciences and Engineering Research Council of Canada.
    32653037
    32663038{%
    3267 \fontsize{9bp}{11.5bp}\selectfont%
     3039\fontsize{9bp}{12bp}\selectfont%
    32683040\bibliography{pl,local}
    32693041}%
  • doc/papers/concurrency/examples/Fib.py

    r6a490b2 rb7d6a36  
    44        while True:
    55                fn = fn1 + fn2; fn2 = fn1; fn1 = fn; yield fn
     6
     7
    68
    79f1 = Fib()
     
    1214# Local Variables: #
    1315# tab-width: 4 #
    14 # compile-command: "python3.7 Fib.py" #
     16# compile-command: "python3.5 Fib.py" #
    1517# End: #
  • doc/papers/concurrency/examples/Fib2.c

    r6a490b2 rb7d6a36  
    11#include <stdio.h>
    22
     3void mary() {
     4        printf( "MARY\n" );
     5}
     6
    37#define FIB_INIT { 0 }
    4 typedef struct { int restart; int fn1, fn2; } Fib;
     8typedef struct { int next; int fn1, fn2; } Fib;
    59int fib( Fib * f ) {
    6         static void * states[] = { &&s0, &&s1, &&s2 };
    7         goto *states[f->restart];
    8   s0:
     10        static void * states[] = { &&s1, &&s2, &&s3 };
     11        goto *states[f->next];
     12  s1:
     13        mary();
    914        f->fn1 = 0;
    10         f->restart = 1;
     15        f->next = 1;
    1116        return f->fn1;
    12   s1:
     17  s2:
     18        mary();
    1319        f->fn2 = f->fn1;
    1420        f->fn1 = 1;
    15         f->restart = 2;
     21        f->next = 2;
    1622        return f->fn1;
    17   s2:;
     23  s3:;
     24        mary();
    1825        int fn = f->fn1 + f->fn2;
    1926        f->fn2 = f->fn1;
  • doc/papers/concurrency/examples/Fib2.py

    r6a490b2 rb7d6a36  
    11def Fib():
    2     fn1, fn = 1, 0
     2    fn1, fn = 0, 1
    33    while True:
    4         yield fn
     4        yield fn1
    55        fn1, fn = fn, fn1 + fn
    66
     
    1212# Local Variables: #
    1313# tab-width: 4 #
    14 # compile-command: "python3.7 Fib2.py" #
     14# compile-command: "python3.5 Fib2.py" #
    1515# End: #
  • doc/papers/concurrency/examples/Fib3.c

    r6a490b2 rb7d6a36  
    22
    33typedef struct {
    4         int restart, fn1, fn;
     4        int fn1, fn;
     5        void * next;
    56} Fib;
    6 #define FibCtor { 0, 1, 0 }
     7#define FibCtor { 1, 0, NULL }
    78
    89Fib * comain( Fib * f ) {
    9         static void * states[] = {&&s0, &&s1};
    10         goto *states[f->restart];
    11   s0: f->restart = 1;
     10        if ( __builtin_expect(f->next != 0, 1) ) goto *f->next;
     11        f->next = &&s1;
    1212        for ( ;; ) {
    1313                return f;
  • doc/papers/concurrency/examples/FibRefactor.py

    r6a490b2 rb7d6a36  
    2222# Local Variables: #
    2323# tab-width: 4 #
    24 # compile-command: "python3.7 FibRefactor.py" #
     24# compile-command: "python3.5 FibRefactor.py" #
    2525# End: #
  • doc/papers/concurrency/examples/Format.c

    r6a490b2 rb7d6a36  
    22
    33typedef struct {
    4         int restart, g, b;
     4        void * next;
    55        char ch;
     6        int g, b;
    67} Fmt;
    78
    89void comain( Fmt * f ) {
    9         static void * states[] = {&&s0, &&s1};
    10         goto *states[f->restart];
    11   s0: f->restart = 1;
     10        if ( __builtin_expect(f->next != 0, 1) ) goto *f->next;
     11        f->next = &&s1;
    1212        for ( ;; ) {
    1313                for ( f->g = 0; f->g < 5; f->g += 1 ) {                 // groups
    1414                        for ( f->b = 0; f->b < 4; f->b += 1 ) {         // blocks
    15                                 do {
    16                                         return;  s1: ;
    17                                 } while ( f->ch == '\n' );                              // ignore
     15                                return;
     16                          s1:;  while ( f->ch == '\n' ) return;         // ignore
    1817                                printf( "%c", f->ch );                                  // print character
    1918                        }
     
    2524
    2625int main() {
    27         Fmt fmt = { 0 };
     26        Fmt fmt = { NULL };
    2827        comain( &fmt );                                                                         // prime
    2928        for ( ;; ) {
  • doc/papers/concurrency/examples/Format.cc

    r6a490b2 rb7d6a36  
    66                        for ( g = 0; g < 5; g += 1 ) { // groups of 5 blocks
    77                                for ( b = 0; b < 4; b += 1 ) { // blocks of 4 characters
    8                                         for ( ;; ) { // for newline characters
     8//                                      for ( ;; ) { // for newline characters
    99                                                suspend();
    10                                                 if ( ch != '\n' ) break; // ignore newline
    11                                         }
     10//                                              if ( ch != '\n' ) break; // ignore newline
     11//                                      }
    1212//                                      cout << ch; // print character
    1313                                }
     
    3131// Local Variables: //
    3232// tab-width: 4 //
    33 // compile-command: "u++-work -O2 -nodebug Format.cc" //
     33// compile-command: "u++-work -O2 -nodebubg Format.cc" //
    3434// End: //
  • doc/papers/concurrency/examples/Format.cfa

    r6a490b2 rb7d6a36  
    1111                for ( g = 0; g < 5; g += 1 ) {          // groups of 5 blocks
    1212                        for ( b = 0; b < 4; b += 1 ) {  // blocks of 4 characters
    13                                 do {
     13//                              do {
    1414                                        suspend();
    15                                 } while ( ch == '\n' || ch == '\t' );
     15//                              } while ( ch == '\n' || ch == '\t' );
    1616                                sout | ch;                                      // print character
    1717                        }
  • doc/papers/concurrency/examples/Format.data

    r6a490b2 rb7d6a36  
    1 abcdefghijklmnop
    2 qrstuvwxyzx
    3 xxxxxxxxxxxxx
     1abcdefghijklmnopqrstuvwxyzxxxxxxxxxxxxxx
  • doc/papers/concurrency/examples/Format.py

    r6a490b2 rb7d6a36  
    44                        for g in range( 5 ):    # groups of 5 blocks
    55                                for b in range( 4 ): # blocks of 4 characters
    6                                         while True:
    7                                                 ch = (yield) # receive from send
    8                                                 if '\n' not in ch:
    9                                                         break
    10                                         print( ch, end='' ) # receive from send
     6                                        print( (yield), end='' ) # receive from send
    117                                print( '  ', end='' ) # block separator
    128                        print()                                 # group separator
     
    1511                        print()
    1612
    17 input = "abcdefghijklmnop\nqrstuvwx\nyzxxxxxxxxxxxxxx\n"
    18 
    1913fmt = Format()
    2014next( fmt )                                                     # prime generator
    21 for i in input:
    22         fmt.send( i );                          # send to yield
     15for i in range( 41 ):
     16        fmt.send( 'a' );                                # send to yield
    2317
    2418# Local Variables: #
    2519# tab-width: 4 #
    26 # compile-command: "python3.7 Format.py" #
     20# compile-command: "python3.5 Format.py" #
    2721# End: #
  • doc/papers/concurrency/examples/Format1.c

    r6a490b2 rb7d6a36  
    22
    33typedef struct {
    4         int restart, g, b;
     4        void * next;
    55        char ch;
     6        int g, b;
    67} Fmt;
    78
    89void format( Fmt * f ) {
    9         static void * states[] = {&&s0, &&s1};
    10         goto *states[f->restart];
    11   s0: f->restart = 1;
     10        if ( __builtin_expect(f->next != 0, 1) ) goto *f->next;
     11        f->next = &&s1;
    1212        for ( ;; ) {
    1313                for ( f->g = 0; f->g < 5; f->g += 1 ) {                 // groups
    1414                        for ( f->b = 0; f->b < 4; f->b += 1 ) {         // blocks
    1515                                return;
    16                           s1: if ( f->ch == '\0' ) goto fini;           // EOF ?
     16                          s1: ;
     17                                if ( f->ch == '\0' ) goto fini;                 // EOF ?
    1718                                while ( f->ch == '\n' ) return;                 // ignore
    18 //                              printf( "%c", f->ch );                                  // print character
     19                                printf( "%c", f->ch );                                  // print character
    1920                        }
    20 //                      printf( " " );                                                          // block separator
     21                        printf( " " );                                                          // block separator
    2122                }
    22 //              printf( "\n" );                                                                 // group separator
     23                printf( "\n" );                                                                 // group separator
    2324        }
    24   fini:;
    25 //      if ( f->g != 0 || f->b != 0 ) printf( "\n" );
     25  fini:
     26        if ( f->g != 0 || f->b != 0 ) printf( "\n" );
    2627}
    2728
    2829int main() {
    29         Fmt fmt = { 0 };
     30        Fmt fmt = { NULL };
    3031        format( &fmt );                                                                         // prime
    31         fmt.ch = 'a';
    32         for ( long int i = 0; i < 1000000000; i += 1 ) {
    33 //              scanf( "%c", &fmt.ch );                                                 // direct read into communication variable
    34 //        if ( feof( stdin ) ) break;
     32        for ( ;; ) {
     33                scanf( "%c", &fmt.ch );                                                 // direct read into communication variable
     34          if ( feof( stdin ) ) break;
    3535                format( &fmt );
    3636        }
    37         fmt.ch = '\0';                                                                          // sentential (EOF)
     37        fmt.ch = '\0';
    3838        format( &fmt );
    3939}
  • doc/papers/concurrency/examples/PingPong.c

    r6a490b2 rb7d6a36  
    22
    33typedef struct PingPong {
    4         int restart;                                                                            // style 1
     4        const char * name;
    55        int N, i;
    6         const char * name;
    76        struct PingPong * partner;
    8         void * next;                                                                            // style 2
     7        void * next;
    98} PingPong;
    10 #define PPCtor( name, N ) { 0, N, 0, name, NULL, NULL }
    11 
     9#define PPCtor( name, N ) { name, N, 0, NULL, NULL }
    1210void comain( PingPong * pp ) __attribute__(( noinline ));
    1311void comain( PingPong * pp ) {
     12        if ( __builtin_expect(pp->next != 0, 1) ) goto *pp->next;
    1413#if 0
    15         if ( __builtin_expect(pp->next != 0, 1) ) goto *pp->next;
     14        pp->next = &&here;
     15                asm( "mov  %0,%%rdi" : "=m" (pp) );
     16                asm( "mov  %rdi,%rax" );
     17#ifndef OPT
     18#ifdef PRINT
     19                asm( "add  $16, %rsp" );
     20#endif // PRINT
     21                asm( "popq %rbp" );
     22#endif // ! OPT
     23
     24#ifdef OPT
     25#ifdef PRINT
     26                asm( "popq %rbx" );
     27#endif // PRINT
     28#endif // OPT
     29                asm( "jmp  comain" );
     30  here: ;
     31#endif // 0
     32
    1633        pp->next = &&cycle;
    1734        for ( ; pp->i < pp->N; pp->i += 1 ) {
     
    3653          cycle: ;
    3754        } // for
    38 #endif // 0
    39 
    40 #if 1
    41         static void * states[] = {&&s0, &&s1};
    42         goto *states[pp->restart];
    43   s0: pp->restart = 1;
    44         for ( ; pp->i < pp->N; pp->i += 1 ) {
    45 #ifdef PRINT
    46                 printf( "%s %d\n", pp->name, pp->i );
    47 #endif // PRINT
    48                 asm( "mov  %0,%%rdi" : "=m" (pp->partner) );
    49                 asm( "mov  %rdi,%rax" );
    50 #ifndef OPT
    51 #ifdef PRINT
    52                 asm( "add  $16, %rsp" );
    53 #endif // PRINT
    54                 asm( "popq %rbp" );
    55 #endif // ! OPT
    56 
    57 #ifdef OPT
    58 #ifdef PRINT
    59                 asm( "popq %rbx" );
    60 #endif // PRINT
    61 #endif // OPT
    62                 asm( "jmp  comain" );
    63           s1: ;
    64         } // for
    65 #endif // 0
    6655}
    6756
     
    8170// Local Variables: //
    8271// tab-width: 4 //
    83 // compile-command: "gcc-9 -g -DPRINT PingPong.c" //
     72// compile-command: "gcc-8 -g -DPRINT PingPong.c" //
    8473// End: //
  • doc/papers/concurrency/examples/Pingpong.py

    r6a490b2 rb7d6a36  
    11def PingPong( name, N ):
    2         partner = yield                         # get partner
    3         yield                                           # resume scheduler
     2        partner = (yield)           # get partner
     3        yield                       # resume scheduler
    44        for i in range( N ):
    55                print( name )
    6                 yield partner                   # execute next
     6                yield partner           # execute next
    77        print( "end", name )
    88
    99def Scheduler():
    10         n = yield                                       # starting coroutine
    11         try:
    12                 while True:
    13                         n = next( n )           # schedule coroutine
    14         except StopIteration:
    15                 pass
     10        n = (yield)                 # starting coroutine
     11        while True:
     12                n = next( n )           # schedule coroutine
    1613
    1714pi = PingPong( "ping", 5 )
    1815po = PingPong( "pong", 5 )
    19 next( pi )                                              # prime
    20 pi.send( po )                                   # send partner
    21 next( po )                                              # prime
    22 po.send( pi )                                   # send partner
     16next( pi )                      # prime
     17pi.send( po )                   # send partner
     18next( po )                      # prime
     19po.send( pi )                   # send partner
    2320
    2421s = Scheduler();
    25 next( s )                                               # prime
     22next( s )                       # prime
    2623try:
    2724        s.send( pi )                            # start cycle
    28 except StopIteration:                   # scheduler stopped
    29         pass
     25except StopIteration:
     26        print( "scheduler stop" )
    3027print( "stop" )
    3128
    3229# Local Variables: #
    3330# tab-width: 4 #
    34 # compile-command: "python3.7 Pingpong.py" #
     31# compile-command: "python3.5 Pingpong.py" #
    3532# End: #
  • doc/papers/concurrency/examples/ProdCons.py

    r6a490b2 rb7d6a36  
    11def Prod( N ):
    2         cons = yield                            # get cons
    3         yield                                           # resume scheduler
     2        cons = (yield)              # get cons
     3        yield                       # resume scheduler
    44        for i in range( N ):
    55                print( "prod" )
    6                 yield cons                              # execute next
     6                yield cons              # execute next
    77        print( "end", "prod" )
    88
    99def Cons( N ):
    10         prod = yield                            # get prod
    11         yield                                           # resume scheduler
     10        prod = (yield)              # get prod
     11        yield                       # resume scheduler
    1212        for i in range( N ):
    1313                print( "cons" )
    14                 yield prod                              # execute next
     14                yield prod              # execute next
    1515        print( "end", "cons" )
    1616
    1717def Scheduler():
    18         n = yield                                       # starting coroutine
    19         try:
    20                 while True:
    21                         n = next( n )           # schedule coroutine
    22         except StopIteration:
    23                 pass
     18        n = (yield)                 # starting coroutine
     19        while True:
     20                n = next( n )           # schedule coroutine
    2421
    2522prod = Prod( 5 )
    2623cons = Cons( 5 )
    27 next( prod )                                    # prime
    28 prod.send( cons )                               # send cons
    29 next( cons )                                    # prime
    30 cons.send( prod )                               # send prod
     24next( prod )                    # prime
     25prod.send( cons )               # send cons
     26next( cons )                    # prime
     27cons.send( prod )               # send prod
    3128
    3229s = Scheduler();
    33 next( s )                                               # prime
     30next( s )                       # prime
    3431try:
    3532        s.send( prod )                          # start cycle
    36 except StopIteration:                   # scheduler stopped
    37         pass
     33except StopIteration:
     34        print( "scheduler stop" )
    3835print( "stop" )
    3936
    4037# Local Variables: #
    4138# tab-width: 4 #
    42 # compile-command: "python3.7 ProdCons.py" #
     39# compile-command: "python3.5 ProdCons.py" #
    4340# End: #
  • doc/papers/concurrency/examples/Refactor.py

    r6a490b2 rb7d6a36  
    2626# Local Variables: #
    2727# tab-width: 4 #
    28 # compile-command: "python3.7 Refactor.py" #
     28# compile-command: "python3.5 Refactor.py" #
    2929# End: #
  • doc/papers/concurrency/figures/FullCoroutinePhases.fig

    r6a490b2 rb7d6a36  
    88-2
    991200 2
    10 5 1 0 1 0 7 100 0 -1 0.000 0 0 1 0 5175.000 2437.500 4875 1875 5175 1800 5475 1875
     105 1 0 1 0 7 100 0 -1 0.000 0 0 1 0 4575.000 2437.500 4275 1875 4575 1800 4875 1875
    1111        1 1 1.00 45.00 90.00
    12 5 1 0 1 0 7 100 0 -1 0.000 0 0 1 0 5175.000 1537.500 5475 2100 5175 2175 4875 2100
     125 1 0 1 0 7 100 0 -1 0.000 0 0 1 0 4575.000 1537.500 4875 2100 4575 2175 4275 2100
    1313        1 1 1.00 45.00 90.00
    14 5 1 0 1 0 7 50 -1 -1 0.000 0 1 1 0 4807.500 1642.500 4725 1425 4575 1650 4800 1875
     145 1 0 1 0 7 50 -1 -1 0.000 0 1 1 0 4207.500 1642.500 4125 1425 3975 1650 4200 1875
    1515        1 1 1.00 45.00 90.00
    16 6 1575 1575 2700 2025
    17162 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
    1817        1 1 1.00 45.00 90.00
     
    2120        1 1 1.00 45.00 90.00
    2221         2175 1575 2400 1800
     222 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
     23        1 1 1.00 45.00 90.00
     24         3300 1575 3300 1800
     252 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
     26        1 1 1.00 45.00 90.00
     27         3300 2025 3300 2250
     284 1 0 100 0 0 10 0.0000 2 105 555 2100 1200 creation\001
    23294 1 0 100 0 4 10 0.0000 2 165 300 1725 1950 ping\001
    24304 1 0 100 0 4 10 0.0000 2 135 360 2475 1950 pong\001
    25 -6
    26 6 3075 1575 4200 2025
    27 6 3075 1575 4200 2025
    28 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
    29         1 1 1.00 45.00 90.00
    30          3525 1575 3300 1800
    31 2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
    32         1 1 1.00 45.00 90.00
    33          3675 1575 3900 1800
    34 4 1 0 100 0 4 10 0.0000 2 165 300 3225 1950 ping\001
    35 4 1 0 100 0 4 10 0.0000 2 135 360 3975 1950 pong\001
    36 -6
    37 -6
     314 1 0 100 0 4 10 0.0000 2 165 300 3300 1950 ping\001
     324 1 0 100 0 4 10 0.0000 2 135 360 3300 2400 pong\001
     334 1 0 100 0 0 10 0.0000 2 105 675 4575 1200 execution\001
     344 1 0 100 0 4 10 0.0000 2 165 300 4275 2025 ping\001
     354 1 0 100 0 4 10 0.0000 2 135 360 4875 2025 pong\001
     364 1 0 100 0 0 10 0.0000 2 90 420 3300 1200 starter\001
    38374 1 0 100 0 4 10 0.0000 2 165 705 2100 1500 pgm main\001
    39 4 1 0 100 0 4 10 0.0000 2 165 705 3600 1500 pgm main\001
    40 4 1 0 100 0 4 10 0.0000 2 165 300 4875 2025 ping\001
    41 4 1 0 100 0 4 10 0.0000 2 135 360 5475 2025 pong\001
    42 4 1 0 100 0 4 10 0.0000 2 165 705 5100 1500 pgm main\001
    43 4 1 0 100 0 2 10 0.0000 2 105 540 2100 1275 creator\001
    44 4 1 0 100 0 2 10 0.0000 2 105 495 3600 1275 starter\001
    45 4 1 0 100 0 2 10 0.0000 2 105 690 5175 1275 execution\001
     384 1 0 100 0 4 10 0.0000 2 165 705 3300 1500 pgm main\001
     394 1 0 100 0 4 10 0.0000 2 165 705 4500 1500 pgm main\001
  • doc/papers/concurrency/figures/RunTimeStructure.fig

    r6a490b2 rb7d6a36  
    36361 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 4500 3600 15 15 4500 3600 4515 3615
    3737-6
    38 6 3225 4125 4650 4425
    39 6 4350 4200 4650 4350
    40 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 4425 4275 15 15 4425 4275 4440 4290
    41 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 4500 4275 15 15 4500 4275 4515 4290
    42 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 4575 4275 15 15 4575 4275 4590 4290
     386 2175 4650 7050 4950
     391 3 0 1 0 0 0 0 0 0.000 1 0.0000 2250 4830 30 30 2250 4830 2280 4860
     401 1 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4200 4800 150 75 4200 4800 4350 4875
     411 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3275 4800 100 100 3275 4800 3375 4800
     422 2 0 1 -1 -1 0 0 -1 0.000 0 0 0 0 0 5
     43         5400 4950 5400 4725 5175 4725 5175 4950 5400 4950
     442 2 1 1 -1 -1 0 0 -1 3.000 0 0 0 0 0 5
     45         6525 4950 6300 4950 6300 4725 6525 4725 6525 4950
     464 0 -1 0 0 0 10 0.0000 2 105 450 6600 4875 cluster\001
     474 0 -1 0 0 0 10 0.0000 2 105 660 5475 4875 processor\001
     484 0 -1 0 0 0 10 0.0000 2 105 555 4425 4875 monitor\001
     494 0 -1 0 0 0 10 0.0000 2 120 270 3450 4875 task\001
     504 0 -1 0 0 0 10 0.0000 2 105 660 2325 4875 coroutine\001
    4351-6
    44 1 1 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3450 4275 225 150 3450 4275 3675 4425
    45 1 1 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4050 4275 225 150 4050 4275 4275 4425
     526 3450 1275 3750 1425
     531 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3525 1350 15 15 3525 1350 3540 1365
     541 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3600 1350 15 15 3600 1350 3615 1365
     551 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3675 1350 15 15 3675 1350 3690 1365
    4656-6
    47 6 6675 4125 7500 4425
    48 6 7200 4200 7500 4350
    49 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 7275 4275 15 15 7275 4275 7290 4290
    50 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 7350 4275 15 15 7350 4275 7365 4290
    51 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 7425 4275 15 15 7425 4275 7440 4290
    52 -6
    53 1 1 0 1 -1 -1 0 0 -1 0.000 1 0.0000 6900 4275 225 150 6900 4275 7125 4425
    54 -6
    55 6 6675 3525 8025 3975
    56 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 1 0 2
    57         1 1 1.00 45.00 90.00
    58          6675 3750 6975 3750
    59 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 1 0 2
    60         1 1 1.00 45.00 90.00
    61          7125 3750 7350 3750
    62 2 2 0 1 -1 -1 0 0 -1 0.000 0 0 0 0 0 5
    63          7800 3975 7800 3525 7350 3525 7350 3975 7800 3975
    64 2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 1 0 2
    65         1 1 1.00 45.00 90.00
    66          7800 3750 8025 3750
     576 5550 1275 5850 1425
     581 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 5625 1350 15 15 5625 1350 5640 1365
     591 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 5700 1350 15 15 5700 1350 5715 1365
     601 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 5775 1350 15 15 5775 1350 5790 1365
    6761-6
    68621 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 5550 2625 150 150 5550 2625 5700 2625
     
    73671 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4425 2850 150 150 4425 2850 4575 2850
    74681 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4650 2475 150 150 4650 2475 4800 2475
     691 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3525 3600 150 150 3525 3600 3675 3600
    75701 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3975 3600 150 150 3975 3600 4125 3600
    76711 3 0 1 0 0 0 0 0 0.000 1 0.0000 3525 3600 30 30 3525 3600 3555 3630
     
    79741 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3975 2850 150 150 3975 2850 4125 2850
    80751 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 7200 2775 150 150 7200 2775 7350 2775
    81 1 3 0 1 0 0 0 0 0 0.000 1 0.0000 2250 4830 30 30 2250 4830 2280 4860
    82 1 3 0 1 0 0 0 0 0 0.000 1 0.0000 7200 2775 30 30 7200 2775 7230 2805
    83 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3525 3600 150 150 3525 3600 3675 3600
    84 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 3875 4800 100 100 3875 4800 3975 4800
    85 1 1 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4650 4800 150 75 4650 4800 4800 4875
     761 1 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4650 1350 225 150 4650 1350 4875 1500
     771 1 0 1 -1 -1 0 0 -1 0.000 1 0.0000 5250 1350 225 150 5250 1350 5475 1500
     781 1 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4050 1350 225 150 4050 1350 4275 1500
    86792 2 0 1 -1 -1 0 0 -1 0.000 0 0 0 0 0 5
    8780         2400 4200 2400 3750 1950 3750 1950 4200 2400 4200
     
    1471402 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 1 0 2
    148141        1 1 1.00 45.00 90.00
     142         6675 3975 6975 3975
     1432 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 1 0 2
     144        1 1 1.00 45.00 90.00
    149145         7050 2775 6825 2775
    1501462 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 2
    151          6825 2775 6825 3750
     147         6825 2775 6825 3975
     1482 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 1 0 2
     149        1 1 1.00 45.00 90.00
     150         7125 3975 7350 3975
     1512 2 0 1 -1 -1 0 0 -1 0.000 0 0 0 0 0 5
     152         7800 4200 7800 3750 7350 3750 7350 4200 7800 4200
     1532 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 1 0 2
     154        1 1 1.00 45.00 90.00
     155         7800 3975 8025 3975
    1521562 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 1 0 4
    153157        1 1 1.00 45.00 90.00
    154          7875 3750 7875 2325 7200 2325 7200 2550
    155 2 2 0 1 -1 -1 0 0 -1 0.000 0 0 0 0 0 5
    156          5850 4950 5850 4725 5625 4725 5625 4950 5850 4950
    157 2 2 1 1 -1 -1 0 0 -1 3.000 0 0 0 0 0 5
    158          6975 4950 6750 4950 6750 4725 6975 4725 6975 4950
     158         7875 3975 7875 2325 7200 2325 7200 2550
    1591594 1 -1 0 0 0 10 0.0000 2 105 720 5550 4425 Processors\001
    1601604 1 -1 0 0 0 10 0.0000 2 120 1005 4200 3225 Blocked Tasks\001
     
    1651654 1 -1 0 0 0 10 0.0000 2 105 990 2175 3525 Discrete-event\001
    1661664 1 -1 0 0 0 10 0.0000 2 135 795 2175 4350 preemption\001
    167 4 0 -1 0 0 0 10 0.0000 2 150 1290 2325 4875 genrator/coroutine\001
    168 4 0 -1 0 0 0 10 0.0000 2 120 270 4050 4875 task\001
    169 4 0 -1 0 0 0 10 0.0000 2 105 450 7050 4875 cluster\001
    170 4 0 -1 0 0 0 10 0.0000 2 105 660 5925 4875 processor\001
    171 4 0 -1 0 0 0 10 0.0000 2 105 555 4875 4875 monitor\001
  • doc/papers/concurrency/mail2

    r6a490b2 rb7d6a36  
    2222Software: Practice and Experience Editorial Office
    2323
    24 
    25 
    26 Date: Tue, 12 Nov 2019 22:25:17 +0000
    27 From: Richard Jones <onbehalfof@manuscriptcentral.com>
    28 Reply-To: R.E.Jones@kent.ac.uk
    29 To: tdelisle@uwaterloo.ca, pabuhr@uwaterloo.ca
    30 Subject: Software: Practice and Experience - Decision on Manuscript ID
    31  SPE-19-0219
    32 
    33 12-Nov-2019
    34 
    35 Dear Dr Buhr,
    36 
    37 Many thanks for submitting SPE-19-0219 entitled "Advanced Control-flow and Concurrency in Cforall" to Software: Practice and Experience. The paper has now been reviewed and the comments of the referees are included at the bottom of this letter.
    38 
    39 The decision on this paper is that it requires substantial further work is required. The referees have a number of substantial concerns. All the reviewers found the submission very hard to read; two of the reviewers state that it needs very substantial restructuring. These concerns must be addressed before your submission can be considered further.
    40 
    41 A revised version of your manuscript that takes into account the comments of the referees will be reconsidered for publication.
    42 
    43 Please note that submitting a revision of your manuscript does not guarantee eventual acceptance, and that your revision will be subject to re-review by the referees before a decision is rendered.
    44 
    45 You have 90 days from the date of this email to submit your revision. If you are unable to complete the revision within this time, please contact me to request an extension.
    46 
    47 You can upload your revised manuscript and submit it through your Author Center. Log into https://mc.manuscriptcentral.com/spe  and enter your Author Center, where you will find your manuscript title listed under "Manuscripts with Decisions".
    48 
    49 When submitting your revised manuscript, you will be able to respond to the comments made by the referee(s) in the space provided.  You can use this space to document any changes you make to the original manuscript.
    50 
    51 If you feel that your paper could benefit from English language polishing, you may wish to consider having your paper professionally edited for English language by a service such as Wiley's at http://wileyeditingservices.com. Please note that while this service will greatly improve the readability of your paper, it does not guarantee acceptance of your paper by the journal.
    52  
    53 Once again, thank you for submitting your manuscript to Software: Practice and Experience and I look forward to receiving your revision.
    54 
    55 
    56 Sincerely,
    57 
    58 Prof. Richard Jones
    59 Software: Practice and Experience
    60 R.E.Jones@kent.ac.uk
    61 
    62 
    63 Referee(s)' Comments to Author:
    64 
    65 Reviewing: 1
    66 
    67 Comments to the Author
    68 This article presents the design and rationale behind the various
    69 threading and synchronization mechanisms of C-forall, a new low-level
    70 programming language.  This paper is very similar to a companion paper
    71 which I have also received: as the papers are similar, so will these
    72 reviews be --- in particular any general comments from the other
    73 review apply to this paper also.
    74 
    75 As far as I can tell, the article contains three main ideas: an
    76 asynchronous execution / threading model; a model for monitors to
    77 provide mutual exclusion; and an implementation.  The first two ideas
    78 are drawn together in Table 1: unfortunately this is on page 25 of 30
    79 pages of text. Implementation choices and descriptions are scattered
    80 throughout the paper - and the sectioning of the paper seems almost
    81 arbitrary.
    82 
    83 The article is about its contributions.  Simply adding feature X to
    84 language Y isn't by itself a contribution, (when feature X isn't
    85 already a contribution).  The contribution can be in the design: the
    86 motivation, the space of potential design options, the particular
    87 design chosen and the rationale for that choice, or the resulting
    88 performance.  For example: why support two kinds of generators as well
    89 as user-level threads?  Why support both low and high level
    90 synchronization constructs?  Similarly I would have found the article
    91 easier to follow if it was written top down, presenting the design
    92 principles, present the space of language features, justify chosen
    93 language features (and rationale) and those excluded, and then present
    94 implementation, and performance.
    95 
    96 Then the writing of the article is often hard to follow, to say the
    97 least. Two examples: section 3 "stateful functions" - I've some idea
    98 what that is (a function with Algol's "own" or C's "static" variables?
    99 but in fact the paper has a rather more specific idea than that. The
    100 top of page 3 throws a whole lot of defintions at the reader
    101 "generator" "coroutine" "stackful" "stackless" "symmetric"
    102 "asymmetric" without every stopping to define each one --- but then in
    103 footnote "C" takes the time to explain what C's "main" function is?  I
    104 cannot imagine a reader of this paper who doesn't know what "main" is
    105 in C; especially if they understand the other concepts already
    106 presented in the paper.  The start of section 3 then does the same
    107 thing: putting up a whole lot of definitions, making distinctions and
    108 comparisons, even talking about some runtime details, but the critical
    109 definition of a monitor doesn't appear until three pages later, at the
    110 start of section 5 on p15, lines 29-34 are a good, clear, description
    111 of what a monitor actually is.  That needs to come first, rather than
    112 being buried again after two sections of comparisons, discussions,
    113 implementations, and options that are ungrounded because they haven't
    114 told the reader what they are actually talking about.  First tell the
    115 reader what something is, then how they might use it (as programmers:
    116 what are the rules and restrictions) and only then start comparison
    117 with other things, other approaches, other languages, or
    118 implementations.
    119 
    120 The description of the implementation is similarly lost in the trees
    121 without ever really seeing the wood. Figure 19 is crucial here, but
    122 it's pretty much at the end of the paper, and comments about
    123 implementations are threaded throughout the paper without the context
    124 (fig 19) to understand what's going on.   The protocol for performance
    125 testing may just about suffice for C (although is N constantly ten
    126 million, or does it vary for each benchmark) but such evaluation isn't
    127 appropriate for garbage-collected or JITTed languages like Java or Go.
    128 
    129 other comments working through the paper - these are mostly low level
    130 and are certainly not comprehensive.
    131 
    132 p1 only a subset of C-forall extensions?
    133 
    134 p1 "has features often associated with object-oriented programming
    135 languages, such as constructors, destructors, virtuals and simple
    136 inheritance."   There's no need to quibble about this. Once a language
    137 has inheritance, it's hard to claim it's not object-oriented.
    138 
    139 
    140 p2 barging? signals-as-hints?
    141 
    142 p3 start your discussion of generations with a simple example of a
    143 C-forall generator.  Fig 1(b) might do: but put it inline instead of
    144 the python example - and explain the key rules and restrictions on the
    145 construct.  Then don't even start to compare with coroutines until
    146 you've presented, described and explained your coroutines...
    147 p3 I'd probably leave out the various "C" versions unless there are
    148 key points to make you can't make in C-forall. All the alternatives
    149 are just confusing.
    150 
    151 
    152 p4 but what's that "with" in Fig 1(B)
    153 
    154 p5 start with the high level features of C-forall generators...
    155 
    156 p5 why is the paper explaining networking protocols?
    157 
    158 p7 lines 1-9 (transforming generator to coroutine - why would I do any
    159 of this? Why would I want one instead of the other (do not use "stack"
    160 in your answer!)
    161 
    162 p10 last para "A coroutine must retain its last resumer to suspend
    163 back because the resumer is on a different stack. These reverse
    164 pointers allow suspend to cycle backwards, "  I've no idea what is
    165 going on here?  why should I care?  Shouldn't I just be using threads
    166 instead?  why not?
    167 
    168 p16 for the same reasons - what reasons?
    169 
    170 p17 if the multiple-monitor entry procedure really is novel, write a
    171 paper about that, and only about that.
    172 
    173 p23 "Loose Object Definitions" - no idea what that means.  in that
    174 section: you can't leave out JS-style dynamic properties.  Even in
    175 OOLs that (one way or another) allow separate definitions of methods
    176 (like Objective-C, Swift, Ruby, C#) at any time a runtime class has a
    177 fixed definition.  Quite why the detail about bit mask implementation
    178 is here anyway, I've no idea.
    179 
    180 p25 this cluster isn't a CLU cluster then?
    181 
    182 * conclusion should conclude the paper, not the related.
    183 
    184 
    185 Reviewing: 2
    186 
    187 Comments to the Author
    188 This paper describes the concurrency features of an extension of C (whose name I will write as "C\/" here, for convenience), including much design-level discussion of the coroutine- and monitor-based features and some microbenchmarks exploring the current implementation's performance. The key message of the latter is that the system's concurrency abstractions are much lighter-weight than the threading found in mainstream C or Java implementations.
    189 
    190 There is much description of the system and its details, but nothing about (non-artificial) uses of it. Although the microbenchmark data is encouraging, arguably not enough practical experience with the system has been reported here to say much about either its usability advantages or its performance.
    191 
    192 As such, the main contribution of the paper seem to be to document the existence of the described system and to provide a detailed design rationale and (partial) tutorial. I believe that could be of interest to some readers, so an acceptable manuscript is lurking in here somewhere.
    193 
    194 Unfortunately, at present the writing style is somewhere between unclear and infuriating. It omits to define terms; it uses needlessly many terms for what are apparently (but not clearly) the same things; it interrupts itself rather than deliver the natural consequent of whatever it has just said; and so on. Section 5 is particularly bad in these regards -- see my detailed comments below. Fairly major additional efforts will be needed to turn the present text into a digestible design-and-tutorial document. I suspect that a shorter paper could do this job better than the present manuscript, which is overwrought in parts.
    195 
    196 p2: lines 4--9 are a little sloppy. It is not the languages but their popular implementations which "adopt" the 1:1 kernel threading model.
    197 
    198 line 10: "medium work" -- "medium-sized work"?
    199 
    200 line 18: "is all sequential to the compiler" -- not true in modern compilers, and in 2004 H-J Boehm wrote a tech report describing exactly why ("Threads cannot be implemented as a library", HP Labs).
    201 
    202 line 20: "knows the optimization boundaries" -- I found this vague. What's an example?
    203 
    204 line 31: this paragraph has made a lot of claims. Perhaps forward-reference to the parts of the paper that discuss each one.
    205 
    206 line 33: "so the reader can judge if" -- this reads rather passive-aggressively. Perhaps better: "... to support our argument that..."
    207 
    208 line 41: "a dynamic partitioning mechanism" -- I couldn't tell what this meant
    209 
    210 p3. Presenting concept of a "stateful function" as a new language feature seems odd. In C, functions often have local state thanks to static local variables (or globals, indeed). Of course, that has several limitations. Can you perhaps present your contributions by enumerating these limitations? See also my suggestion below about a possible framing centred on a strawman.
    211 
    212 line 2: "an old idea that is new again" -- this is too oblique
    213 
    214 lines 2--15: I found this to be a word/concept soup. Stacks, closures, generators, stackless stackful, coroutine, symmetric, asymmetric, resume/suspend versus resume/resume... there needs to be a more gradual and structured way to introduce all this, and ideally one that minimises redundancy. Maybe present it as a series of "definitions" each with its own heading, e.g. "A closure is stackless if its local state has statically known fixed size"; "A generator simply means a stackless closure." And so on. Perhaps also strongly introduce the word "activate" as a direct contrast with resume and suspend. These are just a flavour of the sort of changes that might make this paragraph into something readable.
    215 
    216 Continuing the thought: I found it confusing that by these definitinos, a stackful closure is not a stack, even though logically the stack *is* a kind of closure (it is a representation of the current thread's continuation).
    217 
    218 lines 24--27: without explaining what the boost functor types mean, I don't think the point here comes across.
    219 
    220 line 34: "semantically coupled" -- I wasn't surew hat this meant
    221 
    222 p4: the point of Figure 1 (C) was not immediately clear. It seem to be showing how one might "compile down" Figure 1 (B). Or is that Figure 1 (A)?
    223 
    224 It's right that the incidental language features of the system are not front-and-centre, but I'd appreciate some brief glossing of non-C languages features as they appear. Examples are the square bracket notation, the pipe notation and the constructor syntax. These explanations could go in the caption of the figure which first uses them, perhaps. Overall I found the figure captions to be terse, and a missed opportunity to explain clearly what was going on.
    225 
    226 p5 line 23: "This restriction is removed..." -- give us some up-front summary of your contributions and the elements of the language design that will be talked about, so that this isn't an aside. This will reduce the "twisty passages" feeling that characterises much of the paper.
    227 
    228 line 40: "a killer asymmetric generator" -- this is stylistically odd, and the sentence about failures doesn't convincigly argue that C\/ will help with them. Have you any experience writing device drivers using C\/? Or any argument that the kinds of failures can be traced to the "stack-ripping" style that one is forced to use without coroutines? Also, a typo on line 41: "device drives". And saying "Windows/Linux" is sloppy... what does the cited paper actually say?
    229 
    230 p6 lines 13--23: this paragraph is difficult to understand. It seems to be talking about a control-flow pattern roughly equivalent to tail recursion. What is the high-level point, other than that this is possible?
    231 
    232 line 34: "which they call coroutines" -- a better way to make this point is presumably that the C++20 proposal only provides a specialised kind of coroutine, namely generators, despite its use of the more general word.
    233 
    234 line 47: "... due to dynamic stack allocation, execution..." -- this sentence doesn't scan. I suggest adding "and for" in the relevant places where currently there are only commas.
    235 
    236 p8 / Figure 5 (B) -- the GNU C extension of unary "&&" needs to be explained. The whole figure needs a better explanation, in fact.
    237 
    238 p9, lines 1--10: I wasn't sure this stepping-through really added much value. What are the truly important points to note about this code?
    239 
    240 p10: similarly, lines 3--27 again are somewhere between tedious and confusing. I'm sure the motivation and details of "starter semantics" can both be stated much more pithily.
    241 
    242 line 32: "a self-resume does not overwrite the last resumer" -- is this a hack or a defensible principled decision?
    243 
    244 p11: "a common source of errors" -- among beginners or among production code? Presumably the former.
    245 
    246 line 23: "with builtin and library" -- not sure what this means
    247 
    248 lines 31--36: these can be much briefer. The only important point here seems to be that coroutines cannot be copied.
    249 
    250 p12: line 1: what is a "task"? Does it matter?
    251 
    252 line 7: calling it "heap stack" seems to be a recipe for confusion. "Stack-and-heap" might be better, and contrast with "stack-and-VLS" perhaps. When "VLS" is glossed, suggest actually expanding its initials: say "length" not "size".
    253 
    254 line 21: are you saying "cooperative threading" is the same as "non-preemptive scheduling", or that one is a special case (kind) of the other? Both are defensible, but be clear.
    255 
    256 line 27: "mutual exclusion and synchronization" -- the former is a kind of the latter, so I suggest "and other forms of synchronization".
    257 
    258 line 30: "can either be a stackless or stackful" -- stray "a", but also, this seems to be switching from generic/background terminology to C\/-specific terminology.
    259 
    260 An expositional idea occurs: start the paper with a strawman naive/limited realisation of coroutines -- say, Simon Tatham's popular "Coroutines in C" web page -- and identify point by point what the limitations are and how C\/ overcomes them. Currently the presentation is often flat (lacking motivating contrasts) and backwards (stating solutions before problems). The foregoing approach might fix both of these.
    261 
    262 page 13: line 23: it seems a distraction to mention the Python feature here.
    263 
    264 p14 line 5: it seems odd to describe these as "stateless" just because they lack shared mutable state. It means the code itself is even more stateful. Maybe the "stack ripping" argument could usefully be given here.
    265 
    266 line 16: "too restrictive" -- would be good to have a reference to justify this, or at least give a sense of what the state-of-the-art performance in transactional memory systems is (both software and hardware)
    267 
    268 line 22: "simulate monitors" -- what about just *implementing* monitors? isn't that what these systems do? or is the point more about refining them somehow into something more specialised?
    269 
    270 p15: sections 4.1 and 4.2 seem adrift and misplaced. Split them into basic parts (which go earlier) and more advanced parts (e.g. barging, which can be explained later).
    271 
    272 line 31: "acquire/release" -- misses an opportunity to contrast the monitor's "enter/exit" abstraction with the less structured acquire/release of locks.
    273 
    274 p16 line 12: the "implicit" versus "explicit" point is unclear. Is it perhaps about the contract between an opt-in *discipline* and a language-enforced *guarantee*?
    275 
    276 line 28: no need to spend ages dithering about which one is default and which one is the explicit qualifier. Tell us what you decided, briefly justify it, and move on.
    277 
    278 p17: Figure 11: since the main point seems to be to highlight bulk acquire, include a comment which identifies the line where this is happening.
    279 
    280 line 2: "impossible to statically..." -- or dynamically. Doing it dynamically would be perfectly acceptable (locking is a dynamic operation after all)
    281 
    282 "guarantees acquisition order is consistent" -- assuming it's done in a single bulk acquire.
    283 
    284 p18: section 5.3: the text here is a mess. The explanations of "internal" versus "external" scheduling are unclear, and "signals as hints" is not explained. "... can cause thread starvation" -- means including a while loop, or not doing so? "There are three signalling mechanisms.." but the text does not follow that by telling us what they are. My own scribbled attempt at unpicking the internal/external thing: "threads already in the monitor, albeit waiting, have priority over those trying to enter".
    285 
    286 p19: line 3: "empty condition" -- explain that condition variables don't store anything. So being "empty" means that the queue of waiting threads (threads waiting to be signalled that the condition has become true) is empty.
    287 
    288 line 6: "... can be transformed into external scheduling..." -- OK, but give some motivation.
    289 
    290 p20: line 6: "mechnaism"
    291 
    292 lines 16--20: this is dense and can probably only be made clear with an example
    293 
    294 p21 line 21: clarify that nested monitor deadlock was describe earlier (in 5.2). (Is the repetition necessary?)
    295 
    296 line 27: "locks, and by extension monitors" -- this is true but the "by extension" argument is faulty. It is perfectly possible to use locks as a primitive and build a compositional mechanism out of them, e.g. transactions.
    297 
    298 p22 line 2: should say "restructured"
    299 
    300 line 33: "Implementing a fast subset check..." -- make clear that the following section explains how to do this. Restructuring the sections themselves could do this, or noting in the text.
    301 
    302 p23: line 3: "dynamic member adding, eg, JavaScript" -- needs to say "as permitted in JavaScript", and "dynamically adding members" is stylistically better
    303 
    304 p23: line 18: "urgent stack" -- back-reference to where this was explained before
    305 
    306 p24 line 7: I did not understand what was more "direct" about "direct communication". Also, what is a "passive monitor" -- just a monitor, given that monitors are passive by design?
    307 
    308 line 14 / section 5.9: this table was useful and it (or something like it) could be used much earlier on to set the structure of the rest of the paper. The explanation at present is too brief, e.g. I did not really understand the point about cases 7 and 8.
    309 
    310 p25 line 2: instead of casually dropping in a terse explanation for the newly intrdouced term "virtual processor", introduce it properly. Presumably the point is to give a less ambiguous meaning to "thread" by reserving it only for C\/'s green threads.
    311 
    312 Table 1: what does "No / Yes" mean?
    313 
    314 p26 line 15: "transforms user threads into fibres" -- a reference is needed to explain what "fibres" means... guessing it's in the sense of Adya et al.
    315 
    316 line 20: "Microsoft runtime" -- means Windows?
    317 
    318 lines 21--26: don't say "interrupt" to mean "signal", especially not without clear introduction. You can use "POSIX signal" to disambiguate from condition variables' "signal".
    319 
    320 p27 line 3: "frequency is usually long" -- that's a "time period" or "interval", not a frequency
    321 
    322 line 5: the lengthy quotation is not really necessary; just paraphrase the first sentence and move on.
    323 
    324 line 20: "to verify the implementation" -- I don't think that means what is intended
    325 
    326 Tables in section 7 -- too many significant figures. How many overall runs are described? What is N in each case?
    327 
    328 p29 line 2: "to eliminate this cost" -- arguably confusing since nowadays on commodity CPUs most of the benefits of inlining are not to do with call overheads, but from later optimizations enabled as a consequence of the inlining
    329 
    330 line 41: "a hierarchy" -- are they a hierarchy? If so, this could be explained earlier. Also, to say these make up "an integrated set... of control-flow features" verges on the tautologous.
    331 
    332 p30 line 15: "a common case being web servers and XaaS" -- that's two cases
    333 
    334 
    335 Reviewing: 3
    336 
    337 Comments to the Author
    338 # Cforall review
    339 
    340 Overall, I quite enjoyed reading the paper. Cforall has some very interesting ideas. I did have some suggestions that I think would be helpful before final publication. I also left notes on various parts of the paper that I find confusing when reading, in hopes that it may be useful to you.
    341 
    342 ## Summary
    343 
    344 * Expand on the motivations for including both generator and coroutines, vs trying to build one atop the other
    345 * Expand on the motivations for having Why both symmetric and asymettric coroutines?
    346 * Comparison to async-await model adopted by other languages
    347     * C#, JS
    348     * Rust and its async/await model
    349 * Consider performance comparisons against node.js and Rust frameworks
    350 * Discuss performance of monitors vs finer-grained memory models and atomic operations found in other languages
    351 * Why both internal/external scheduling for synchronization?
    352 
    353 ## Generator/coroutines
    354 
    355 In general, this section was clear, but I thought it would be useful to provide a somewhat deeper look into why Cforall opted for the particular combination of features that it offers. I see three main differences from other languages:
    356 
    357 * Generators are not exposed as a "function" that returns a generator object, but rather as a kind of struct, with communication happening via mutable state instead of "return values". That is, the generator must be manually resumed and (if I understood) it is expected to store values that can then later be read (perhaps via methods), instead of having a `yield <Expr>` statement that yields up a value explicitly.
    358 * Both "symmetric" and "asymmetric" generators are supported, instead of only asymmetric.
    359 * Coroutines (multi-frame generators) are an explicit mechanism.
    360 
    361 In most other languages, coroutines are rather built by layering single-frame generators atop one another (e.g., using a mechanism like async-await), and symmetric coroutines are basically not supported. I'd like to see a bit more justification for Cforall including all the above mechanisms -- it seemed like symmetric coroutines were a useful building block for some of the user-space threading and custom scheduler mechanisms that were briefly mentioned later in the paper.
    362 
    363 In the discussion of coroutines, I would have expected a bit more of a comparison to the async-await mechanism offered in other languages. Certainly the semantics of async-await in JavaScript implies significantly more overhead (because each async fn is a distinct heap object). [Rust's approach avoids this overhead][zc], however, and might be worthy of a comparison (see the Performance section).
    364 
    365 ## Locks and threading
    366 
    367 ### Comparison to atomics overlooks performance
    368 
    369 There are several sections in the paper that compare against atomics -- for example, on page 15, the paper shows a simple monitor that encapsulates an integer and compares that to C++ atomics. Later, the paper compares the simplicity of monitors against the `volatile` quantifier from Java. The conclusion in section 8 also revisits this point.
    370 
    371 While I agree that monitors are simpler, they are obviously also significantly different from a performance perspective -- the paper doesn't seem to address this at all. It's plausible that (e.g.) the `Aint` monitor type described in the paper can be compiled and mapped to the specialized instructions offered by hardware, but I didn't see any mention of how this would be done. There is also no mention of the more nuanced memory ordering relations offered by C++11 and how one might achieve similar performance characteristics in Cforall (perhaps the answer is that one simply doesn't need to; I think that's defensible, but worth stating explicitly).
    372 
    373 ### Justification for external scheduling feels lacking
    374 
    375 Cforall includes both internal and external scheduling; I found the explanation for the external scheduling mechanism to be lacking in justification. Why include both mechanisms when most languages seem to make do with only internal scheduling? It would be useful to show some scenarios where external scheduling is truly more powerful.
    376 
    377 I would have liked to see some more discussion of external scheduling and how it  interacts with software engineering best practices. It seems somewhat similar to AOP in certain regards. It seems to add a bit of "extra semantics" to monitor methods, in that any method may now also become a kind of synchronization point. The "open-ended" nature of this feels like it could easily lead to subtle bugs, particularly when code refactoring occurs (which may e.g. split an existing method into two). This seems particularly true if external scheduling can occur across compilation units -- the paper suggested that this is true, but I wasn't entirely clear.
    378 
    379 I would have also appreciated a few more details on how external scheduling is implemented. It seems to me that there must be some sort of "hooks" on mutex methods so that they can detect whether some other function is waiting on them and awaken those blocked threads. I'm not sure how such hooks are inserted, particularly across compilation units. The material in Section 5.6 didn't quite clarify the matter for me. For example, it left me somewhat confused about whether the `f` and `g` functions declared were meant to be local to a translation unit, or shared with other unit.
    380 
    381 ### Presentation of monitors is somewhat confusing
    382 
    383 I found myself confused fairly often in the section on monitors. I'm just going to leave some notes here on places that I got confused in how that it could be useful to you as feedback on writing that might want to be clarified.
    384 
    385 To start, I did not realize that the `mutex_opt` notation was a keyword, I thought it was a type annotation. I think this could be called out more explicitly.
    386 
    387 Later, in section 5.2, the paper discusses `nomutex` annotations, which initially threw me, as they had not been introduced (now I realize that this paragraph is there to justify why there is no such keyword). The paragraph might be rearranged to make that clearer, perhaps by leading with the choice that Cforall made.
    388 
    389 On page 17, the paper states that "acquiring multiple monitors is safe from deadlock", but this could be stated a bit more precisely: acquiring multiple monitors in a bulk-acquire is safe from deadlock (deadlock can still result from nested acquires).
    390 
    391 On page 18, the paper states that wait states do not have to be enclosed in loops, as there is no concern of barging. This seems true but there are also other reasons to use loops (e.g., if there are multiple reasons to notify on the same condition). Thus the statement initially surprised me, as barging is only one of many reasons that I typically employ loops around waits.
    392 
    393 I did not understand the diagram in Figure 12 for some time. Initially, I thought that it was generic to all monitors, and I could not understand the state space. It was only later that I realized it was specific to your example. Updating the caption from "Monitor scheduling to "Monitor scheduling in the example from Fig 13" might have helped me quite a bit.
    394 
    395 I spent quite some time reading the boy/girl dating example (\*) and I admit I found it somewhat confusing. For example, I couldn't tell whether there were supposed to be many "girl" threads executing at once, or if there was only supposed to be one girl and one boy thread executing in a loop. Are the girl/boy threads supposed to invoke the girl/boy methods or vice versa? Surely there is some easier way to set this up? I believe that when reading the paper I convinced myself of how it was supposed to be working, but I'm writing this review some days later, and I find myself confused all over again and not able to easily figure it out.
    396 
    397 (\*) as an aside, I would consider modifying the example to some other form of matching, like customers and support personnel.
    398 
    399 ## Related work
    400 
    401 The paper offered a number of comparisons to Go, C#, Scala, and so forth, but seems to have overlooked another recent language, Rust. In many ways, Rust seems to be closest in philosophy to Cforall, so it seems like an odd omission. I already mentioned above that Rust is in the process of shipping [async-await syntax][aa], which is definitely an alternative to the generator/coroutine approach in Cforall (though one with clear pros/cons).
    402 
    403 ## Performance
    404 
    405 In the performance section in particular, you might consider comparing against some of the Rust web servers and threading systems. For example, actix is top of the [single query TechEmpower Framework benchmarks], and tokio is near the top of the [plainthreading benchmarks][pt] (hyper, the top, is more of an HTTP framework, though it is also written in Rust). It would seem worth trying to compare their "context switching" costs as well -- I believe both actix and tokio have a notion of threads that could be readily compared.
    406 
    407 Another addition that might be worth considering is to compare against node.js promises, although I think the comparison to process creation is not as clean.
    408 
    409 That said, I think that the performance comparison is not a big focus of the paper, so it may not be necessary to add anything to it.
    410 
    411 ## Authorship of this review
    412 
    413 I'm going to sign this review. This review was authored by Nicholas D. Matsakis. In the intrerest of full disclosure, I'm heavily involved in the Rust project, although I dont' think that influenced this review in particular. Feel free to reach out to me for clarifying questions.
    414 
    415 ## Links
    416 
    417 [aa]: https://blog.rust-lang.org/2019/09/30/Async-await-hits-beta.html
    418 [zc]: https://aturon.github.io/blog/2016/08/11/futures/
    419 [sq]: https://www.techempower.com/benchmarks/#section=data-r18&hw=ph&test=db
    420 [pt]: https://www.techempower.com/benchmarks/#section=data-r18&hw=ph&test=plaintext
    421 
    422 
    423 
    424 Subject: Re: manuscript SPE-19-0219
    425 To: "Peter A. Buhr" <pabuhr@uwaterloo.ca>
    426 From: Richard Jones <R.E.Jones@kent.ac.uk>
    427 Date: Tue, 12 Nov 2019 22:43:55 +0000
    428 
    429 Dear Dr Buhr
    430 
    431 Your should have received a decision letter on this today. I am sorry that this
    432 has taken so long. Unfortunately SP&E receives a lot of submissions and getting
    433 reviewers is a perennial problem.
    434 
    435 Regards
    436 Richard
    437 
    438 Peter A. Buhr wrote on 11/11/2019 13:10:
    439 >     26-Jun-2019
    440 >     Your manuscript entitled "Advanced Control-flow and Concurrency in Cforall"
    441 >     has been received by Software: Practice and Experience. It will be given
    442 >     full consideration for publication in the journal.
    443 >
    444 > Hi, it has been over 4 months since submission of our manuscript SPE-19-0219
    445 > with no response.
    446 >
    447 > Currently, I am refereeing a paper for IEEE that already cites our prior SP&E
    448 > paper and the Master's thesis forming the bases of the SP&E paper under
    449 > review. Hence our work is apropos and we want to get it disseminates as soon as
    450 > possible.
    451 >
    452 > [3] A. Moss, R. Schluntz, and P. A. Buhr, "Cforall: Adding modern programming
    453 >      language features to C," Software - Practice and Experience, vol. 48,
    454 >      no. 12, pp. 2111-2146, 2018.
    455 >
    456 > [4] T. Delisle, "Concurrency in C for all," Master's thesis, University of
    457 >      Waterloo, 2018.  [Online].  Available:
    458 >      https://uwspace.uwaterloo.ca/bitstream/handle/10012/12888
    459 
    460 
    461 
    462 Date: Mon, 13 Jan 2020 05:33:15 +0000
    463 From: Richard Jones <onbehalfof@manuscriptcentral.com>
    464 Reply-To: R.E.Jones@kent.ac.uk
    465 To: pabuhr@uwaterloo.ca
    466 Subject: Revision reminder - SPE-19-0219
    467 
    468 13-Jan-2020
    469 Dear Dr Buhr
    470 SPE-19-0219
    471 
    472 This is a reminder that your opportunity to revise and re-submit your
    473 manuscript will expire 28 days from now. If you require more time please
    474 contact me directly and I may grant an extension to this deadline, otherwise
    475 the option to submit a revision online, will not be available.
    476 
    477 I look forward to receiving your revision.
    478 
    479 Sincerely,
    480 
    481 Prof. Richard Jones
    482 Editor, Software: Practice and Experience
    483 https://mc.manuscriptcentral.com/spe
    484 
    485 
    486 
    487 Date: Wed, 5 Feb 2020 04:22:18 +0000
    488 From: Aaron Thomas <onbehalfof@manuscriptcentral.com>
    489 Reply-To: speoffice@wiley.com
    490 To: tdelisle@uwaterloo.ca, pabuhr@uwaterloo.ca
    491 Subject: SPE-19-0219.R1 successfully submitted
    492 
    493 04-Feb-2020
    494 
    495 Dear Dr Buhr,
    496 
    497 Your manuscript entitled "Advanced Control-flow and Concurrency in Cforall" has
    498 been successfully submitted online and is presently being given full
    499 consideration for publication in Software: Practice and Experience.
    500 
    501 Your manuscript number is SPE-19-0219.R1.  Please mention this number in all
    502 future correspondence regarding this submission.
    503 
    504 You can view the status of your manuscript at any time by checking your Author
    505 Center after logging into https://mc.manuscriptcentral.com/spe.  If you have
    506 difficulty using this site, please click the 'Get Help Now' link at the top
    507 right corner of the site.
    508 
    509 Thank you for submitting your manuscript to Software: Practice and Experience.
    510 
    511 Sincerely,
    512 Software: Practice and Experience Editorial Office
    513 
  • doc/proposals/vtable.md

    r6a490b2 rb7d6a36  
    237237default is provided or not, the second syntax can be used to pick a
    238238parameter on instantiation.
    239 
    240 ### Extension: Object Access
    241 This requires that the resolution scope (see below) is at the type level or
    242 has explicate points with names. These are the tables and table names used
    243 here.
    244 
    245 The system already knows where to find the virtual table and the object. If
    246 the tables have particular identities, or on the user side names, then it is
    247 meaningful to check if a binding virtual table is the same* as another. The
    248 main use of this is virtual table declarations also give the type they bind
    249 and if a binding table matches a known table then the underlyind object in the
    250 trait object must be of that type.
    251 
    252 * By identity, by value would work and in some senses be more flexiable. But
    253   it would be slower and refering to further away functions would be harder.
    254 
    255 This gives one of the main new features of the hierarchical use of virtual
    256 tables (see below); the ability to recover the underlying object. Or a pointer
    257 of the approprate type it which both reflects the implementation and gives a
    258 convenent way to encode the boolean/conditional aspect of the operation which
    259 is that a different virtual table might be in use.
    260 
    261 There are two general ways to reperent this; a cast or a field access. The
    262 cast is traditional and would definitely fit if a single pointer repersents
    263 a trait object with the virtual table as part of the object. However for a
    264 double pointer field access might be more approprate. By this system though
    265 it is not the type that is used as the identifier but the virtual table. If
    266 there is one table per type than it becomes equivilant again. Otherwise the
    267 table has to be used as the identifier and the type is just a result of that
    268 which seems important for syntax.
    269239
    270240Hierarchy
     
    590560be used in only some of the declarations.
    591561
    592     trait combiner fee = {summation_instance, sum};
     562    trait combiner fee = (summation_instance, sum);
    593563    trait combiner foe = summation_instance;
    594564
  • doc/theses/thierry_delisle_PhD/.gitignore

    r6a490b2 rb7d6a36  
    88
    99comp_II/build/
    10 comp_II/img/*.fig.bak
    1110comp_II/comp_II.pdf
    1211comp_II/comp_II.ps
  • doc/theses/thierry_delisle_PhD/comp_II/Makefile

    r6a490b2 rb7d6a36  
    22
    33Build = build
    4 Figures = img
     4Figures = figures
    55Macros = ../../../LaTeXmacros
    66TeXLIB = .:${Macros}:${Build}:../../../bibliography:
     
    1818
    1919FIGURES = ${addsuffix .tex, \
    20         base \
    21         empty \
    22         emptybit \
    23         emptytree \
    24         emptytls \
    25         resize \
    26         system \
    2720}
    2821
     
    7770        mkdir -p ${Build}
    7871
    79 %.tex : img/%.fig ${Build}
     72%.tex : %.fig ${Build}
    8073        fig2dev -L eepic $< > ${Build}/$@
    8174
    82 %.ps : img/%.fig | ${Build}
     75%.ps : %.fig | ${Build}
    8376        fig2dev -L ps $< > ${Build}/$@
    8477
    85 %.pstex : img/%.fig | ${Build}
     78%.pstex : %.fig | ${Build}
    8679        fig2dev -L pstex $< > ${Build}/$@
    8780        fig2dev -L pstex_t -p ${Build}/$@ $< > ${Build}/$@_t
  • doc/theses/thierry_delisle_PhD/comp_II/comp_II.tex

    r6a490b2 rb7d6a36  
    1 \documentclass[11pt]{article}
    2 \usepackage{fullpage}
     1\documentclass[11pt,fullpage]{article}
    32\usepackage[T1]{fontenc}
    43\usepackage[utf8]{inputenc}
     4\usepackage{listings}           % for code listings
    55\usepackage{xspace}
    66\usepackage{xcolor}
    77\usepackage{graphicx}
    8 \usepackage{epic,eepic}
    9 \usepackage{listings}                   % for code listings
     8\usepackage[hidelinks]{hyperref}
    109\usepackage{glossaries}
    1110\usepackage{textcomp}
     11\usepackage{geometry}
     12
    1213% cfa macros used in the document
    1314\input{common}
    14 
    15 \setlist{topsep=6pt,parsep=0pt}         % global reduce spacing between points
    16 \newcommand{\uC}{$\mu$\CC}
    17 \usepackage[hidelinks]{hyperref}
    18 \setlength{\abovecaptionskip}{5pt plus 3pt minus 2pt}
    19 \lstMakeShortInline$%                   % single-character for \lstinline
    20 %\usepackage[margin=1in]{geometry}
    21 %\usepackage{float}
    22 
    2315\input{glossary}
    2416
     
    3224
    3325\author{
    34         \huge Thierry Delisle \vspace*{5pt} \\
    35         \Large \texttt{tdelisle@uwaterloo.ca} \vspace*{5pt} \\
     26        \huge Thierry Delisle \\
     27        \Large \vspace*{0.1in} \texttt{tdelisle@uwaterloo.ca} \\
    3628        \Large Cheriton School of Computer Science \\
    3729        \Large University of Waterloo
     
    4739
    4840\newcommand{\cit}{\textsuperscript{[Citation Needed]}\xspace}
    49 \newcommand{\TODO}{{\large\bf\color{red} TODO: }\xspace}
     41\newcommand{\TODO}{~\newline{\large\bf\color{red} TODO :}\xspace}
    5042
    5143% ===============================================================================
     
    5951\section{Introduction}
    6052\subsection{\CFA and the \CFA concurrency package}
    61 \CFA\cite{Moss18} is a modern, polymorphic, non-object-oriented, concurrent, backwards-compatible extension of the C programming language.
    62 It aims to add high-productivity features while maintaining the predictable performance of C.
    63 As such, concurrency in \CFA\cite{Delisle19} aims to offer simple and safe high-level tools while still allowing performant code.
    64 \CFA concurrent code is written in the synchronous programming paradigm but uses \glspl{uthrd} in order to achieve the simplicity and maintainability of synchronous programming without sacrificing the efficiency of asynchronous programing.
    65 As such, the \CFA \newterm{scheduler} is a preemptive user-level scheduler that maps \glspl{uthrd} onto \glspl{kthrd}.
     53\CFA\cit is a modern, polymorphic, non-object-oriented, backwards-compatible extension of the C programming language. It aims to add high productivity features while maintaning the predictible performance of C. As such concurrency in \CFA\cit aims to offer simple and safe high-level tools while still allowing performant code. Concurrent code is written in the syncrhonous programming paradigm but uses \glspl{uthrd} in order to achieve the simplicity and maintainability of synchronous programming without sacrificing the efficiency of asynchronous programing. As such the \CFA scheduler is a user-level scheduler that maps \glspl{uthrd} onto \glspl{kthrd}.
    6654
    67 \newterm{Scheduling} occurs when execution switches from one thread to another, where the second thread is implicitly chosen by the scheduler.
    68 This scheduling is an indirect handoff, as opposed to generators and coroutines which explicitly switch to the next generator and coroutine respectively.
    69 The cost of switching between two threads for an indirect handoff has two components:
    70 \begin{enumerate}
    71 \item
    72 the cost of actually context-switching, \ie changing the relevant registers to move execution from one thread to the other,
    73 \item
    74 and the cost of scheduling, \ie deciding which thread to run next among all the threads ready to run.
    75 \end{enumerate}
    76 The first cost is generally constant and fixed\footnote{Affecting the constant context-switch cost is whether it is done in one step, after the scheduling, or in two steps, context-switching to a fixed third-thread before scheduling.}, while the scheduling cost can vary based on the system state.
    77 Adding multiple \glspl{kthrd} does not fundamentally change the scheduler semantics or requirements, it simply adds new correctness requirements, \ie \newterm{linearizability}\footnote{Meaning however fast the CPU threads run, there is an equivalent sequential order that gives the same result.}, and a new dimension to performance: scalability, where scheduling cost now also depends on contention.
    78 
    79 The more threads switch, the more the administration cost of scheduling becomes noticeable.
    80 It is therefore important to build a scheduler with the lowest possible cost and latency.
    81 Another important consideration is \newterm{fairness}.
    82 In principle, scheduling should give the illusion of perfect fairness, where all threads ready to run are running \emph{simultaneously}.
    83 While the illusion of simultaneity is easier to reason about, it can break down if the scheduler allows too much unfairness.
    84 Therefore, the scheduler should offer as much fairness as needed to guarantee eventual progress, but use unfairness to help performance.
    85 In practice, threads must wait in turn but there can be advantages to unfair scheduling, similar to the the express cash-register at a grocery store.
    86 
    87 The goal of this research is to produce a scheduler that is simple for programmers to understand and offers good performance.
    88 Here understandability does not refer to the API but to how much scheduling concerns programmers need to take into account when writing a \CFA concurrent package.
    89 Therefore, the main goal of this proposal is :
     55The goal of this research is to produce a scheduler that is simple to use and offers acceptable performance in all cases. Here simplicity does not refer to the API but to how much scheduling concerns programmers need to take into account when using the \CFA concurrency package. Therefore, the main goal of this proposal is as follows :
    9056\begin{quote}
    91 The \CFA scheduler should be \emph{viable} for \emph{any} workload.
     57The \CFA scheduler should be \emph{viable} for any workload.
    9258\end{quote}
    9359
    94 For a general purpose scheduler, it is impossible to produce an optimal algorithm as it would require knowledge of the future behaviour of threads.
    95 As such, scheduling performance is generally either defined by the best case scenario, \ie a workload to which the scheduler is tailored, or the worst case scenario, \ie the scheduler behaves no worst than \emph{X}.
    96 For this proposal, the performance is evaluated using the second approach to allow \CFA programmers to rely on scheduling performance.
    97 Because there is no optimal scheduler, ultimately \CFA may allow programmers to write their own scheduler; but that is not the subject of this proposal, which considers only the default scheduler.
    98 As such, it is important that only programmers with exceptionally high performance requirements should need to write their own scheduler and replace the scheduler in this proposal.
    99 
    100 To achieve the \CFA scheduling goal includes:
    101 \begin{enumerate}
    102 \item
    103 producing a scheduling strategy with sufficient fairness guarantees,
    104 \item
    105 creating an abstraction layer over the operating system to handle kernel-threads spinning unnecessarily,
    106 \item
    107 scheduling blocking I/O operations,
    108 \item
    109 and writing sufficient library tools to allow developers to indirectly use the scheduler, either through tuning knobs or replacing the default scheduler.
    110 \end{enumerate}
     60This objective includes producing a scheduling strategy with minimal fairness guarantees, creating an abstraction layer over the operating system to handle kernel-threads spinning unnecessarily and hide blocking I/O operations and, writing sufficient library tools to allow developpers to properly use the scheduler.
    11161
    11262% ===============================================================================
    11363% ===============================================================================
    11464
    115 \section{\CFA Scheduling}
    116 To schedule user-level threads across all workloads, the scheduler has a number of requirements:
    117 
    118 \paragraph{Correctness} As with any other concurrent data structure or algorithm, the correctness requirement is paramount.
    119 The scheduler cannot allow threads to be dropped from the ready queue, \ie scheduled but never run, or be executed multiple times when only being scheduled once.
    120 Since \CFA concurrency has no spurious wakeup, this definition of correctness also means the scheduler should have no spurious wakeup.
    121 The \CFA scheduler must be correct.
    122 
    123 \paragraph{Performance} The performance of a scheduler can generally be measured in terms of scheduling cost, scalability and latency.
    124 \newterm{Scheduling cost} is the cost to switch from one thread to another, as mentioned above.
    125 For simple applications, where a single kernel thread does most of the scheduling, it is generally the dominating cost.
    126 \newterm{Scalability} is the cost of adding multiple kernel threads because it increases the time for context switching because of contention by multiple threads accessing shared resources, \eg the ready queue.
    127 Finally, \newterm{tail latency} is service delay and relates to thread fairness.
    128 Specifically, latency measures how long a thread waits to run once scheduled and is evaluated in the worst case.
    129 The \CFA scheduler should offer good performance for all three metrics.
    130 
    131 \paragraph{Fairness} Like performance, this requirement has several aspect : eventual progress, predictability and performance reliability.
    132 \newterm{Eventual progress} guarantees every scheduled thread is eventually run, \ie prevent starvation.
    133 As a hard requirement, the \CFA scheduler must guarantee eventual progress, otherwise the above mentioned illusion of simultaneous execution is broken and the scheduler becomes much more complex to reason about.
    134 \newterm{Predictability} and \newterm{reliability} means similar workloads achieve similar performance and programmer execution intuition is respected.
    135 For example, a thread that yields aggressively should not run more often then other tasks.
    136 While this is intuitive, it does not hold true for many work-stealing or feedback based schedulers.
    137 The \CFA scheduler must guarantee eventual progress and should be predictable and offer reliable performance.
    138 
    139 \paragraph{Efficiency} Finally, efficient usage of CPU resources is also an important requirement and is discussed in depth towards the end of the proposal.
    140 \newterm{Efficiency} means avoiding using CPU cycles when there are no threads to run, and conversely, use all CPUs available when the workload can benefit from it.
    141 Balancing these two states is where the complexity lies.
    142 The \CFA scheduler should be efficient with respect to the underlying (shared) computer.
    143 
    144 \bigskip To achieve these requirements, I can reject two broad types of scheduling strategies : feedback-based and priority schedulers.
     65\section{Scheduling for \CFA}
     66While the \CFA concurrency package doesn't have any particular scheduling needs beyond those of any concurrency package which uses \glspl{uthrd}, it is important that the default \CFA Scheduler be viable in general. Indeed, since the \CFA Scheduler does not target any specific workloads, it is unrealistic to demand that it use the best scheduling strategy in all cases. However, it should offer a viable ``out of the box'' solution for most scheduling problems so that programmers can quickly write performant concurrent without needed to think about which scheduling strategy is more appropriate for their workload. Indeed, only programmers with exceptionnaly high performance requirements should need to write their own scheduler. More specifically, two broad types of schedulering strategies should be avoided in order to avoid penalizing certain types of workloads : feedback-based and priority schedulers.
    14567
    14668\subsection{Feedback-Based Schedulers}
    147 Many operating systems use schedulers based on feedback in some form, \eg measuring how much CPU a particular thread has used\footnote{Different metrics can be measured but it is not relevant to the discussion.} and schedule threads based on this metric.
    148 These strategies are sensible for operating systems but rely on two assumptions for the workload:
     69Many operating systems use schedulers based on feadback loops in some form, they measure how much CPU a particular thread has used\footnote{Different metrics can be used to here but it is not relevant to the discussion.} and schedule threads based on this metric. These strategies are sensible for operating systems but rely on two assumptions on the workload :
    14970
    15071\begin{enumerate}
    151         \item Threads live long enough for useful feedback information to be to gathered.
    152         \item Threads belong to multiple users so fairness across threads is insufficient.
     72        \item Threads live long enough to be scheduled many times.
     73        \item Cooperation among all threads is not simply infeasible, it is a security risk.
    15374\end{enumerate}
    15475
    155 While these two assumptions generally hold for operating systems, they may not for user-level threading.
    156 Since \CFA has the explicit goal of allowing many smaller threads, this can naturally lead to threads with much shorter lifetimes that are only scheduled a few times.
    157 Scheduling strategies based on feedback cannot be effective in these cases because there is no opportunity to measure the metrics that underlie the algorithm.
    158 Note, the problem of \newterm{feedback convergence} (reacting too slowly to scheduling events) is not specific to short lived threads but can also occur with threads that show drastic changes in scheduling, \eg threads running for long periods of time and then suddenly blocking and unblocking quickly and repeatedly.
     76While these two assumptions generally hold for operating systems, they may not for \CFA programs. In fact, \CFA uses \glspl{uthrd} which have the explicit goal of reducing the cost of threading primitives to allow many smaller threads. This can naturally lead to have threads with much shorter lifetime and only being scheduled a few times. Scheduling strategies based on feadback loops cannot be effective in these cases because they will not have the opportunity to measure the metrics that underlay the algorithm. Note that the problem of feadback loop convergence (reacting too slowly to scheduling events) is not specific to short lived threads but can also occur with threads that show drastic changes in scheduling event, e.g., threads running for long periods of time and then suddenly blocking and unblocking quickly and repeatedly.
    15977
    160 In the context of operating systems, these concerns can be overshadowed by a more pressing concern : security.
    161 When multiple users are involved, it is possible some users are malevolent and try to exploit the scheduling strategy to achieve some nefarious objective.
    162 Security concerns mean more precise and robust fairness metrics must be used to guarantee fairness across processes created by users as well as threads created within a process.
    163 In the case of the \CFA scheduler, every thread runs in the same user space and is controlled by the same user.
    164 Fairness across users is therefore a given and it is then possible to safely ignore the possibility that threads are malevolent.
    165 This approach allows for a much simpler fairness metric and in this proposal \emph{fairness} is defined as: when multiple threads are cycling through the system, the total ordering of threads being scheduled, \ie pushed onto the ready-queue, should not differ much from the total ordering of threads being executed, \ie popped from the ready-queue.
     78In the context of operating systems, these concerns can be overshadowed by a more pressing concern : security. When multiple users are involved, it is possible that some users are malevolent and try to exploit the scheduling strategy in order to achieve some nefarious objective. Security concerns mean that more precise and robust fairness metrics must be used. In the case of the \CFA scheduler, every thread runs in the same user-space and are controlled from the same user. It is then possible to safely ignore the possibility that threads are malevolent and assume that all threads will ignore or cooperate with each other. This allows for a much simpler fairness metric and in this proposal ``fairness'' will be considered as equal opportunities to run once scheduled.
    16679
    167 Since feedback is not necessarily feasible within the lifetime of all threads and a simple fairness metric can be used, the scheduling strategy proposed for the \CFA runtime does not use per-threads feedback.
    168 Feedback in general is not rejected for secondary concerns like idle sleep for kernel threads, but no feedback is used to decide which thread to run next.
     80Since feadback is not necessarily feasible within the lifetime of all threads and a simple fairness metric can be used, the scheduling strategy proposed for the \CFA runtime does not user per-threads feedback. Feedback loops in general are not rejected for secondary concerns like idle sleep, but no feedback loop is used to decide which thread to run next.
    16981
    17082\subsection{Priority Schedulers}
    171 Another broad category of schedulers are priority schedulers.
    172 In these scheduling strategies, threads have priorities and the runtime schedules the threads with the highest priority before scheduling other threads.
    173 Threads with equal priority are scheduled using a secondary strategy, often something simple like round-robin or FIFO.
    174 A consequence of priority is that, as long as there is a thread with a higher priority that desires to run, a thread with a lower priority does not run.
    175 This possible starving of threads can dramatically increase programming complexity since starving threads and priority inversion (prioritizing a lower priority thread) can both lead to serious problems.
     83Another broad category of schedulers are priority schedulers. In these scheduling strategies threads have priorities and the runtime schedules the threads with the highest priority before scheduling other threads. Threads with equal priority are scheduled using a secondary strategy, often something simple like round-robin or FIFO. These priority mean that, as long as there is a thread with a higher priority that desires to run, a thread with a lower priority will not run. This possible starving of threads can dramatically increase programming complexity since starving threads and priority inversion (prioritising a lower priority thread) can both lead to serious problems, leaving programmers between a rock and a hard place.
    17684
    177 An important observation is that threads do not need to have explicit priorities for problems to occur.
    178 Indeed, any system with multiple ready-queues that attempts to exhaust one queue before accessing the other queues, essentially provide implicit priority, which can encounter starvation problems.
    179 For example, a popular scheduling strategy that suffers from implicit priorities is work stealing.
    180 \newterm{Work stealing} is generally presented as follows:
     85An important observation to make is that threads do not need to have explicit priorities for problems to be possible. Indeed, any system with multiple ready-queues and attempts to exhaust one queue before accessing the other queues, could encounter starvation problems. A popular scheduling strategy that suffers from implicit priorities is work-stealing. Work-stealing is generally presented as follows :
     86
     87\begin{itemize}
     88        \item Each processor has a list of threads.
     89\end{itemize}
    18190\begin{enumerate}
    182         \item Each processor has a list of ready threads.
    183         \item Each processor runs threads from its ready queue first.
    184         \item If a processor's ready queue is empty, attempt to run threads from some other processor's ready queue.
     91        \item Run threads from ``this'' processor's list.
     92        \item If ``this'' processor's list is empty, run threads from some other processor's list.
    18593\end{enumerate}
    18694
    187 In a loaded system\footnote{A \newterm{loaded system} is a system where threads are being run at the same rate they are scheduled.}, if a thread does not yield, block, or preempt for an extended period of time, threads on the same processor's list starve if no other processors exhaust their list.
     95In a loaded system\footnote{A loaded system is a system where threads are being run at the same rate they are scheduled}, if a thread does not yield or block for an extended period of time, threads on the same processor list will starve if no other processors can exhaust their list.
    18896
    189 Since priorities can be complex for programmers to incorporate into their execution intuition, the scheduling strategy proposed for the \CFA runtime does not use a strategy with either implicit or explicit thread priorities.
     97Since priorities can be complex to handle for programmers, the scheduling strategy proposed for the \CFA runtime does not use a strategy with either implicit or explicit thread priorities.
    19098
    191 \subsection{Schedulers without feedback or priorities}
    192 This proposal conjectures that is is possible to construct a default scheduler for the \CFA runtime that offers good scalability and a simple fairness guarantee that is easy for programmers to reason about.
    193 The simplest fairness guarantee is FIFO ordering, \ie threads scheduled first run first.
    194 However, enforcing FIFO ordering generally conflicts with scalability across multiple processors because of the additional synchronization.
    195 Thankfully, strict FIFO is not needed for sufficient fairness.
    196 Since concurrency is inherently non-deterministic, fairness concerns in scheduling are only a problem if a thread repeatedly runs before another thread can run.
    197 Some relaxation is possible because non-determinism means programmers already handle ordering problems to produce correct code and hence rely on weak guarantees, \eg that a specific thread will \emph{eventually} run.
    198 Since some reordering does not break correctness, the FIFO fairness guarantee can be significantly relaxed without causing problems.
    199 For this proposal, the target guarantee is that the \CFA scheduler provides \emph{probable} FIFO ordering, which allows reordering but makes it improbable that threads are reordered far from their position in total ordering.
     99\subsection{Schedulers without feadback or priorities}
     100I claim that the ideal default scheduler for the \CFA runtime is a scheduler that offers good scalability and a simple fairness guarantee that is easy for programmers to reason about. The simplest fairness guarantee is to guarantee FIFO ordering, i.e., threads scheduled first will run first. However, enforcing FIFO ordering generally conflicts with scalability across multiple processors because of the additionnal synchronization. Thankfully, strict FIFO is not needed for scheduling. Since concurrency is inherently non-deterministic, fairness concerns in scheduling are only a problem if a thread repeatedly runs before another thread can run\footnote{This is because the non-determinism means that programmers must already handle ordering problems in order to produce correct code and already must rely on weak guarantees, for example that a specific thread will \emph{eventually} run.}. This need for unfairness to persist before problems occur means that the FIFO fairness guarantee can be significantly relaxed without causing problems. For this proposal, the target guarantee is that the \CFA scheduler guarantees \emph{probable} FIFO ordering, which is defined as follows :
     101\begin{itemize}
     102        \item Given two threads $X$ and $Y$, the odds that thread $X$ runs $N$ times \emph{after} thread $Y$ is scheduled but \emph{before} it is run, decreases exponentially with regards to $N$.
     103\end{itemize}
    200104
    201 The \CFA scheduler fairness is defined as follows:
    202 \begin{itemize}
    203         \item Given two threads $X$ and $Y$, the odds that thread $X$ runs $N$ times \emph{after} thread $Y$ is scheduled but \emph{before} it is run, decreases exponentially with regard to $N$.
    204 \end{itemize}
    205 While this is not a bounded guarantee, the probability that unfairness persist for long periods of times decreases exponentially, making persisting unfairness virtually impossible.
     105While this is not a strong guarantee, the probability that problems persist for long period of times decreases exponentially, making persisting problems virtually impossible.
     106
     107\subsection{Real-Time}
     108While the objective of this proposed scheduler is similar to the objective of real-time scheduling, this proposal is not a proposal for real-time scheduler and as such makes no attempt to offer either soft or hard guarantees on scheduling delays.
    206109
    207110% ===============================================================================
    208111% ===============================================================================
    209 \section{Proposal Details}
     112\section{Proposal}
    210113
    211 \subsection{Central Ready Queue} \label{sec:queue}
    212 A central ready queue can be built from a FIFO queue, where user threads are pushed onto the queue when they are ready to run, and processors (kernel-threads acting as virtual processors) pop the user threads from the queue and execute them.
    213 Alistarh \etal~\cite{alistarh2018relaxed} show it is straightforward to build a relaxed FIFO list that is fast and scalable for loaded or overloaded systems.
    214 The described queue uses an array of underlying strictly FIFO queues as shown in Figure~\ref{fig:base}\footnote{For this section, the number of underlying queues is assumed to be constant.
    215 Section~\ref{sec:resize} discusses resizing the array.}.
    216 Pushing new data is done by selecting one of these underlying queues at random, recording a timestamp for the operation and pushing to the selected queue.
    217 Popping is done by selecting two queues at random and popping from the queue with the oldest timestamp.
    218 A higher number of underlying queues leads to less contention on each queue and therefore better performance.
    219 In a loaded system, it is highly likely the queues are non-empty, \ie several tasks are on each of the underlying queues.
    220 This means that selecting a queue at random to pop from is highly likely to yield a queue with available items.
    221 In Figure~\ref{fig:base}, ignoring the ellipsis, the chances of getting an empty queue is 2/7 per pick, meaning two random picks yield an item approximately 9 times out of 10.
     114\subsection{Ready-Queue}
     115Using trevor's paper\cit as basis, it is simple to build a relaxed FIFO list that is fast and scalable for loaded or overloaded systems. The described queue uses an array of underlying strictly FIFO queue. Pushing new data is done by selecting one of these underlying queues at random, recording a timestamp for the push and pushing to the selected queue. Popping is done by selecting two queues at random and popping from the queue for which the head has the oldest timestamp. In loaded or overloaded systems, it is higly likely that the queues is far from empty, e.i., several tasks are on each of the underlying queues. This means that selecting a queue at random to pop from is higly likely to yield a queue that is not empty.
    222116
    223 \begin{figure}
    224         \begin{center}
    225                 \input{base}
    226         \end{center}
    227         \caption{Relaxed FIFO list at the base of the scheduler: an array of strictly FIFO lists.
    228 The timestamp is in all nodes and cell arrays.}
    229         \label{fig:base}
    230 \end{figure}
     117When the ready queue is "more empty", i.e., several of the inner queues are empty, selecting a random queue for popping is less likely to yield a valid selection and more attempts need to be made, resulting in a performance degradation. In cases, with few elements on the ready queue and few processors running, performance can be improved by adding information to help processors find which inner queues are used. Preliminary performance tests indicate that with few processors, a bitmask can be used to identify which inner queues are currently in use. This is especially effective in the single-thread case, where the bitmask will always be up-to-date. Furthermore, modern x86 CPUs have a BMI2 extension which allow using the bitmask with very little overhead over directly accessing the readyqueue offerring decent performance even in cases with many empty inner queues. This technique does not solve the problem completely, it randomly attempts to find a block of 64 queues where at least one is used, instead of attempting to find a used queue. For systems with a large number of cores this does not completely solve the problem, but it is a fixed improvement. The size of the blocks are limited by the maximum size atomic instruction can operate on, therefore atomic instructions on large words would increase the 64 queues per block limit.
    231118
    232 \begin{figure}
    233         \begin{center}
    234                 \input{empty}
    235         \end{center}
    236         \caption{``More empty'' state of the queue: the array contains many empty cells.}
    237         \label{fig:empty}
    238 \end{figure}
     119\TODO double check the next sentence
     120Preliminary result indicate that the bitmask approach with the BMI2 extension can lead to multi-threaded performance that is contention agnostic in the worst case.
     121This result suggests that the contention penalty and the increase performance for additionnal thread cancel each other exactly. This may indicate that a relatively small reduction in contention may tip the performance into positive scalling even for the worst case. It can be noted that in cases of high-contention, the use of the bitmask to find queues that are not empty is much less reliable. Indeed, if contention on the bitmask is high, it means it probably changes significantly between the moment it is read and the actual operation on the queues it represents. Furthermore, the objective of the bitmask is to avoid probing queues that are empty. Therefore, in cases where the bitmask is highly contented, it may be preferrable to probe queues randomly, either until contention decreases or until a prior prefetch of the bitmask completes. Ideally, the scheduler would be able to observe that the bitmask is highly contented and adjust its behaviour appropriately. However, I am not aware of any mechanism to query whether a cacheline is in cache or to run other instructions until a cacheline is fetch without blocking on the cacheline. As such, an alternative that may have a similar impact would be for each thread to have their own bitmask, which would be updated both after each scheduler action and after a certain number of failed probing. If the bitmask has little contention, the local bitmask will be mostly up-to-date and several threads won't need to contend as much on the global bitmask. If the bitmask has significant contention, then fetching it becomes more expensive and threads may as well probe randomly. This solution claims that probing randomly or against an out-of-date bitmask is equivalent.
    239122
    240 When the ready queue is \emph{more empty}, \ie several of the queues are empty, selecting a random queue for popping is less likely to yield a successful selection and more attempts are needed, resulting in a performance degradation.
    241 Figure~\ref{fig:empty} shows an example with fewer elements, where the chances of getting an empty queue is 5/7 per pick, meaning two random picks yield an item only half the time.
    242 Since the ready queue is not empty, the pop operation \emph{must} find an element before returning and therefore must retry.
    243 Note, the popping kernel thread has no work to do, but CPU cycles are wasted both for available user and kernel threads during the pop operation as the popping thread is using a CPU.
    244 Overall performance is therefore influenced by the contention on the underlying queues and pop performance is influenced by the item density.
     123In cases where this is insufficient, another approach is to use a hiearchical data structure. Creating a tree of nodes to reduce contention has been shown to work in similar cases\cit(SNZI: Scalable NonZero Indicators)\footnote{This particular paper seems to be patented in the US. How does that affect \CFA? Can I use it in my work?}. However, this approach may lead to poorer single-threaded performance due to the inherent pointer chasing, as such, it was not considered as the first approach but as a fallback in case the bitmask approach does not satisfy the performance goals.
    245124
    246 This leads to four performance cases for the centralized ready-queue, as depicted in Table~\ref{tab:perfcases}.
    247 The number of processors (many or few) refers to the number of kernel threads \emph{actively} attempting to pop user threads from the queues, not the total number of kernel threads.
    248 The number of threads (many or few) refers to the number of user threads ready to be run.
    249 Many threads means they outnumber processors significantly and most underlying queues have items, few threads mean there are barely more threads than processors and most underlying queues are empty.
    250 Cases with fewer threads than processors are discussed in Section~\ref{sec:sleep}.
    251 
    252 \begin{table}
    253         \begin{center}
    254                 \begin{tabular}{|r|l|l|}
    255                         \cline{2-3}
    256                         \multicolumn{1}{r|}{} & \multicolumn{1}{c|}{Many Processors} & \multicolumn{1}{c|}{Few Processors} \\
    257                         \hline
    258                         Many Threads & A: good performance & B: good performance \\
    259                         \hline
    260                         Few Threads  & C: worst performance & D: poor performance \\
    261                         \hline
    262                 \end{tabular}
    263         \end{center}
    264         \caption{Expected performance of the relaxed FIFO list in different cases.}
    265         \label{tab:perfcases}
    266 \end{table}
    267 
    268 Performance can be improved in case~D (Table~\ref{tab:perfcases}) by adding information to help processors find which inner queues are used.
    269 This addition aims to avoid the cost of retrying the pop operation but does not affect contention on the underlying queues and can incur some management cost for both push and pop operations.
    270 The approach used to encode this information can vary in density and be either global or local.
    271 \newterm{Density} means the information is either packed in a few cachelines or spread across several cachelines, and \newterm{local information} means each thread uses an independent copy instead of a single global, \ie common, source of information.
    272 
    273 For example, Figure~\ref{fig:emptybit} shows a dense bitmask to identify which inner queues are currently in use.
    274 This approach means processors can often find user threads in constant time, regardless of how many underlying queues are empty.
    275 Furthermore, modern x86 CPUs have extended bit manipulation instructions (BMI2) that allow using the bitmask with very little overhead compared to the randomized selection approach for a filled ready queue, offering good performance even in cases with many empty inner queues.
    276 However, this technique has its limits: with a single word\footnote{Word refers here to however many bits can be written atomically.} bitmask, the total number of underlying queues in the ready queue is limited to the number of bits in the word.
    277 With a multi-word bitmask, this maximum limit can be increased arbitrarily, but it is not possible to check if the queue is empty by reading the bitmask atomically.
    278 
    279 Finally, a dense bitmap, either single or multi-word, causes additional problems in case C (Table 1), because many processors are continuously scanning the bitmask to find the few available threads.
    280 This increased contention on the bitmask(s) reduces performance because of cache misses after updates and the bitmask is updated more frequently by the scanning processors racing to read and/or update that information.
    281 This increased update frequency means the information in the bitmask is more often stale before a processor can use it to find an item, \ie mask read says there are available user threads but none on queue.
    282 
    283 \begin{figure}
    284         \begin{center}
    285                 {\resizebox{0.8\textwidth}{!}{\input{emptybit}}}
    286         \end{center}
    287         \caption{``More empty'' queue with added bitmask to indicate which array cells have items.}
    288         \label{fig:emptybit}
    289 \end{figure}
    290 
    291 Figure~\ref{fig:emptytree} shows another approach using a hierarchical tree data-structure to reduce contention and has been shown to work in similar cases~\cite{ellen2007snzi}\footnote{This particular paper seems to be patented in the US.
    292 How does that affect \CFA? Can I use it in my work?}.
    293 However, this approach may lead to poorer performance in case~B (Table~\ref{tab:perfcases}) due to the inherent pointer chasing cost and already low contention cost in that case.
    294 
    295 \begin{figure}
    296         \begin{center}
    297                 {\resizebox{0.8\textwidth}{!}{\input{emptytree}}}
    298         \end{center}
    299         \caption{``More empty'' queue with added binary search tree indicate which array cells have items.}
    300         \label{fig:emptytree}
    301 \end{figure}
    302 
    303 Finally, a third approach is to use dense information, similar to the bitmap, but have each thread keep its own independent copy of it.
    304 While this approach can offer good scalability \emph{and} low latency, the liveliness of the information can become a problem.
    305 In the simple cases, local copies of which underlying queues are empty can become stale and end-up not being useful for the pop operation.
    306 A more serious problem is that reliable information is necessary for some parts of this algorithm to be correct.
    307 As mentioned in this section, processors must know \emph{reliably} whether the list is empty or not to decide if they can return \texttt{NULL} or if they must keep looking during a pop operation.
    308 Section~\ref{sec:sleep} discusses another case where reliable information is required for the algorithm to be correct.
    309 
    310 \begin{figure}
    311         \begin{center}
    312                 \input{emptytls}
    313         \end{center}
    314         \caption{``More empty'' queue with added per processor bitmask to indicate which array cells have items.}
    315         \label{fig:emptytls}
    316 \end{figure}
    317 
    318 There is a fundamental tradeoff among these approach.
    319 Dense global information about empty underlying queues helps zero-contention cases at the cost of high-contention case.
    320 Sparse global information helps high-contention cases but increases latency in zero-contention-cases, to read and ``aggregate'' the information\footnote{Hierarchical structures, \eg binary search tree, effectively aggregate information but follow pointer chains, learning information at each node.
    321 Similarly, other sparse schemes need to read multiple cachelines to acquire all the information needed.}.
    322 Finally, dense local information has both the advantages of low latency in zero-contention cases and scalability in high-contention cases, however the information can become stale making it difficult to use to ensure correctness.
    323 The fact that these solutions have these fundamental limits suggest to me a better solution that attempts to combine these properties in an interesting ways.
    324 Also, the lock discussed in Section~\ref{sec:resize} allows for solutions that adapt to the number of processors, which could also prove useful.
     125Part of this performance relies on contention being low when there are few threads on the readyqueue. However, this can be assumed reliably if the system handles putting idle processors to sleep, which is addressed in section \ref{sleep}.
    325126
    326127\paragraph{Objectives and Existing Work}
     128How much scalability is actually needed is highly debatable, libfibre\cit is has compared favorably to other schedulers in webserver tests\cit and uses a single atomic counter in its scheduling algorithm similarly to the proposed bitmask. As such the single atomic instruction on a shared cacheline may be sufficiently performant.
    327129
    328 How much scalability is actually needed is highly debatable.
    329 \emph{libfibre}\cite{libfibre} has compared favorably to other schedulers in webserver tests\cite{karstenuser} and uses a single atomic counter in its scheduling algorithm similarly to the proposed bitmask.
    330 As such, the single atomic instruction on a shared cacheline may be sufficiently performant.
     130I have built a prototype of this ready-queue (including the bitmask and BMI2 usage, but not the sharded bitmask) and ran performance experiments on it but it is difficult to compare this prototype to a thread scheduler as the prototype is used as a data-queue. I have also integrated this prototype into the \CFA runtime, but have not yet created performance experiments to compare results. I believe that the bitmask approach is currently one of the larger risks of the proposal, early tests lead me to believe it may work but it is not clear that the contention problem can be overcome. The worst-case scenario is a case where the number of processors and the number of ready threads are similar, yet scheduling events are very frequent. Fewer threads should lead to the Idle Sleep mechanism reducing contention while having many threads ready leads to optimal performance. It is difficult to evaluate the likeliness of this worst-case scenario in real workloads. I believe, frequent scheduling events suggest a more ``bursty'' workload where new work is finely divided among many threads which race to completion. This type of workload would only see a peek of contention close to the end of the work, but no sustained contention. Very fine-grained pipelines are less ``bursty'', these may lead to more sustained contention. However, they could also easily benefit from a direct hand-off strategy which would circumvent the problem entirely.
    331131
    332 I have built a prototype of this ready queue in the shape of a data queue, \ie nodes on the queue are structures with a single int representing a thread and intrusive data fields.
    333 Using this prototype I ran preliminary performance experiments that confirm the expected performance in Table~\ref{tab:perfcases}.
    334 However, these experiments only offer a hint at the actual performance of the scheduler since threads form more complex operations than simple integer nodes, \eg threads are not independent of each other, when a thread blocks some other thread must intervene to wake it.
     132\subsection{Dynamic Resizing}
     133The \CFA runtime system currently handles dynamically adding and removing processors from clusters at any time. Since this is part of the existing design, the proposed scheduler must also support this behaviour. However, dynamicly resizing the clusters is considered a rare event associated with setup, teardown and major configuration changes. This assumptions is made both in the design of the proposed scheduler as well as in the original design of the \CFA runtime system. As such, the proposed scheduler must honor the correctness of these behaviour but does not have any performance objectives with regards to resizing a cluster. How long adding or removing processors take and how much this disrupts the performance of other threads is considered a secondary concern since it should be amortized over long period of times. This description effectively matches with te description of a Reader-Writer lock, in frequent but invasive updates among frequent (mostly) read operations. In the case of the Ready-Queue described above, read operations are operations that push or pop from the ready-queue but do not invalidate any references to the ready queue data structures. Writes on the other-hand would add or remove inner queues, invalidating references to the array of inner queues in the process. Therefore, the current proposed approach to this problem is the add a per-cluster Reader Writer lock around the ready queue to prevent restructuring of the ready-queue data structure while threads are being pushed or popped.
    335134
    336 I have also integrated this prototype into the \CFA runtime, but have not yet created performance experiments to compare results, as creating one-to-one comparisons between the prototype and the \CFA runtime will be complex.
    337 
    338 \subsection{Dynamic Resizing} \label{sec:resize}
    339 
    340 \begin{figure}
    341         \begin{center}
    342                 \input{system}
    343         \end{center}
    344         \caption{Global structure of the \CFA runtime system.}
    345         \label{fig:system}
    346 \end{figure}
    347 
    348 The \CFA runtime system groups processors together as \newterm{clusters}, as shown in Figure~\ref{fig:system}.
    349 Threads on a cluster are always scheduled on one of the processors of the cluster.
    350 Currently, the runtime handles dynamically adding and removing processors from clusters at any time.
    351 Since this is part of the existing design, the proposed scheduler must also support this behaviour.
    352 However, dynamically resizing a cluster is considered a rare event associated with setup, tear down and major configuration changes.
    353 This assumption is made both in the design of the proposed scheduler as well as in the original design of the \CFA runtime system.
    354 As such, the proposed scheduler must honour the correctness of this behaviour but does not have any performance objectives with regard to resizing a cluster.
    355 How long adding or removing processors take and how much this disrupts the performance of other threads is considered a secondary concern since it should be amortized over long period of times.
    356 However, as mentioned in Section~\ref{sec:queue}, contention on the underlying queues can have a direct impact on performance.
    357 The number of underlying queues must therefore be adjusted as the number of processors grows or shrinks.
    358 Since the underlying queues are stored in a dense array, changing the number of queues requires resizing the array and expanding the array requires moving it, which can introduce memory reclamation problems if not done correctly.
    359 
    360 \begin{figure}
    361         \begin{center}
    362                 \input{resize}
    363         \end{center}
    364         \caption{Copy of data structure shown in Figure~\ref{fig:base}.}
    365         \label{fig:base2}
    366 \end{figure}
    367 
    368 It is important to note how the array is used in this case.
    369 While the array cells are modified by every push and pop operation, the array itself, \ie the pointer that would change when resized, is only read during these operations.
    370 Therefore the use of this pointer can be described as frequent reads and infrequent writes.
    371 This description effectively matches with the description of a reader-writer lock, infrequent but invasive updates among frequent read operations.
    372 In the case of the ready queue described above, read operations are operations that push or pop from the ready queue but do not invalidate any references to the ready queue data structures.
    373 Writes on the other hand would add or remove inner queues, invalidating references to the array of inner queues in a process.
    374 Therefore, the current proposed approach to this problem is to add a per-cluster reader-writer lock around the ready queue to prevent restructuring of the ready-queue data-structure while threads are being pushed or popped.
    375 
    376 There are possible alternatives to the reader-writer lock solution.
    377 This problem is effectively a memory reclamation problem and as such there is a large body of research on the subject\cite{michael2004hazard, brown2015reclaiming}.
    378 However, the reader-write lock-solution is simple and can be leveraged to solve other problems (\eg processor ordering and memory reclamation of threads), which makes it an attractive solution.
     135There are possible alternatives to the Reader Writer lock solution. This problem is effectively a memory reclamation problem and as such there is a large body of research on the subject. However, the RWlock solution is simple and can be leveraged to solve other problems (e.g. processor ordering and memory reclamation of threads) which makes it an attractive solution.
    379136
    380137\paragraph{Objectives and Existing Work}
    381 The lock must offer scalability and performance on par with the actual ready-queue in order not to introduce a new bottleneck.
    382 I have already built a lock that fits the desired requirements and preliminary testing show scalability and performance that exceed the target.
    383 As such, I do not consider this lock to be a risk for this project.
     138The lock must offer scalability and performance on par with the actual ready-queue in order not to introduce a new bottle neck. I have already built a lock that fits the desired requirements and preliminary testing show scalability and performance that exceed the target. As such, I do not consider this lock to be a risk on this project.
    384139
    385 \subsection{Idle Sleep} \label{sec:sleep}
     140\subsection{Idle Sleep} \label{sleep}
     141As mentionned above, idle sleep is the process of putting processors to sleep while they do not have threads to execute. In this context processors are kernel-threads and sleeping refers to asking the kernel to block a thread. This can be achieved with either thread synchronization operations like pthread\_cond\_wait or using signal operations like sigsuspend.
    386142
    387 \newterm{Idle sleep} is the process of putting processors to sleep when they have no threads to execute.
    388 In this context, processors are kernel threads and sleeping refers to asking the kernel to block a thread.
    389 This operation can be achieved with either thread synchronization operations like $pthread_cond_wait$ or using signal operations like $sigsuspend$.
    390 The goal of putting idle processors to sleep is:
    391 \begin{enumerate}
    392 \item
    393 reduce contention on the ready queue, since the otherwise idle processors generally contend trying to pop items from the queue,
    394 \item
    395 give back unneeded CPU time associated with a process to other user processors executing on the computer,
    396 \item
    397 and reduce energy consumption in cases where more idle kernel-threads translate to idle CPUs, which can cycle down.
    398 \end{enumerate}
    399 Support for idle sleep broadly involves calling the operating system to block the kernel thread and handling the race between a blocking thread and the waking thread, and handling which kernel thread should sleep or wake up.
     143Support for idle sleep broadly involves calling the operating system to block the kernel thread but also handling the race between the sleeping and the waking up, and handling which kernel thread should sleep or wake-up.
    400144
    401 When a processor decides to sleep, there is a race that occurs between it signalling that is going to sleep (so other processors can find sleeping processors) and actually blocking the kernel thread.
    402 This operation is equivalent to the classic problem of missing signals when using condition variables: the ``sleepy'' processor indicates its intention to block but has not yet gone to sleep when another processor attempts to wake it up.
    403 The waking-up operation sees the blocked process and signals it, but the blocking process is racing to sleep so the signal is missed.
    404 In cases where kernel threads are managed as processors on the current cluster, loosing signals is not necessarily critical, because at least some processors on the cluster are awake and may check for more processors eventually.
    405 Individual processors always finish scheduling user threads before looking for new work, which means that the last processor to go to sleep cannot miss threads scheduled from inside the cluster (if they do, that demonstrates the ready queue is not linearizable).
    406 However, this guarantee does not hold if threads are scheduled from outside the cluster, either due to an external event like timers and I/O, or due to a user (or kernel) thread migrating from a different cluster.
    407 In this case, missed signals can lead to the cluster deadlocking\footnote{Clusters should only deadlock in cases where a \CFA programmer \emph{actually} write \CFA code that leads to a deadlock.}.
    408 Therefore, it is important that the scheduling of threads include a mechanism where signals \emph{cannot} be missed.
    409 For performance reasons, it can be advantageous to have a secondary mechanism that allows signals to be missed in cases where it cannot lead to a deadlock.
    410 To be safe, this process must include a ``handshake'' where it is guaranteed that either~: the sleeping processor notices that a user thread is scheduled after the sleeping processor signalled its intent to block or code scheduling threads sees the intent to sleep before scheduling and be able to wake-up the processor.
    411 This matter is complicated by the fact that pthreads and Linux offer few tools to implement this solution and no guarantee of ordering of threads waking up for most of these tools.
     145When a processor decides to sleep, there is a race that occurs between it signalling that it will go to sleep (so other processors can find sleeping processors) and actually blocking the kernel thread. This is equivalent to the classic problem of missing signals when using condition variables, the ``sleepy'' processor indicates that it will sleep but has not yet gone to sleep, if another processor attempts to wake it up, the waking-up operation may claim nothing needs to be done and the signal will have been missed. In cases where threads are scheduled from processors on the current cluster, loosing signals is not necessarily critical, because at least some processors on the cluster are awake. Individual processors always finish shceduling threads before looking for new work, which means that the last processor to go to sleep cannot miss threads scheduled from inside the cluster (if they do, that demonstrates the ready-queue is not linearizable). However, this guarantee does not hold if threads are shceduled from outside the cluster, either due to an external event like timers and I/O, or due to a thread migrating from a different cluster. In this case, missed signals can lead to the cluster deadlocking where it should not\footnote{Clusters ``should'' never deadlock, but for this proposal, cases where \CFA users \emph{actually} wrote \CFA code that leads to a deadlock it is considered as a deadlock that ``should'' happen. }. Therefore, it is important that the scheduling of threads include a mechanism where signals \emph{cannot} be missed. For performance reasons, it can be advantageous to have a secondary mechanism that allows signals to be missed in cases where it cannot lead to a deadlock. To be safe, this process must include a ``handshake'' where it is guaranteed that either~: the sleepy processor notices that a thread was scheduled after it signalled its intent to block or code scheduling threads well see the intent to sleep before scheduling and be able to wake-up the processor. This matter is complicated by the fact that pthread offers few tools to implement this solution and offers no guarantee of ordering of threads waking up for most of these tools.
    412146
    413 Another important issue is avoiding kernel threads sleeping and waking frequently because there is a significant operating-system cost.
    414 This scenario happens when a program oscillates between high and low activity, needing most and then less processors.
    415 A possible partial solution is to order the processors so that the one which most recently went to sleep is woken up.
    416 This allows other sleeping processors to reach deeper sleep state (when these are available) while keeping ``hot'' processors warmer.
    417 Note that while this generally means organizing the processors in a stack, I believe that the unique index provided in my reader-writer lock can be reused to strictly order the waking processors, causing a mostly LIFO order.
    418 While a strict LIFO stack is probably better, the processor index could prove useful for other reasons, while still offering a sufficiently LIFO ordering.
     147Another issues is trying to avoid kernel sleeping and waking frequently. A possible partial solution is to order the processors so that the one which most recently went to sleep is woken up. This allows other sleeping processors to reach deeper sleep state (when these are available) while keeping ``hot'' processors warmer. Note that while this generally means organising the processors in a stack, I believe that the unique index provided by the ReaderWriter lock can be reused to strictly order the waking order of processors, causing a LIFO like waking order. While a strict LIFO stack is probably better, using the processor index could proove useful and offer a sufficiently LIFO ordering.
    419148
    420 A final important aspect of idle sleep is when should processors make the decision to sleep and when is it appropriate for sleeping processors to be woken up.
    421 Processors that are unnecessarily unblocked lead to unnecessary contention, CPU usage, and power consumption, while too many sleeping processors can lead to sub-optimal throughput.
    422 Furthermore, transitions from sleeping to awake and vice-versa also add unnecessary latency.
    423 There is already a wealth of research on the subject\cite{schillings1996engineering, wiki:thunderherd} and I may use an existing approach for the idle-sleep heuristic in this project, \eg\cite{karstenuser}.
     149Finally, another important aspect of Idle Sleep is when should processors make the decision to sleep and when it is appropriate for sleeping processors to be woken up. Processors that are unnecessarily awake lead to unnecessary contention and power consumption, while too many sleeping processors can lead to sub-optimal throughput. Furthermore, transitions from sleeping to awake and vice-versa also add unnecessary latency. There is already a wealth of research on the subject and I do not plan to implement a novel idea for the Idle Sleep heuristic in this project.
    424150
    425151\subsection{Asynchronous I/O}
    426 
    427 The final aspect of this proposal is asynchronous I/O.
    428 Without it, user threads that execute I/O operations block the underlying kernel thread, which leads to poor throughput.
    429 It is preferable to block the user thread performing the I/O and reuse the underlying kernel-thread to run other ready user threads.
    430 This approach requires intercepting user-thread calls to I/O operations, redirecting them to an asynchronous I/O interface, and handling the multiplexing/demultiplexing between the synchronous and asynchronous API.
    431 As such, there are three components needed to implemented support for asynchronous I/O:
    432 \begin{enumerate}
    433 \item
    434 an OS abstraction layer over the asynchronous interface,
    435 \item
    436 an event-engine to (de)multiplex the operations,
    437 \item
    438 and a synchronous interface for users to use.
    439 \end{enumerate}
    440 None of these components currently exist in \CFA and I will need to build all three for this project.
     152The final aspect of this proposal is asynchronous I/O. Without it, user threads that execute I/O operations will block the underlying kernel thread. This leads to poor throughput, it would be preferrable to block the user-thread and reuse the underlying kernel-thread to run other ready threads. This requires intercepting the user-threads' calls to I/O operations, redirecting them to an asynchronous I/O interface and handling the multiplexing between the synchronous and asynchronous API. As such, these are the three components needed to implemented to support asynchronous I/O : an OS abstraction layer over the asynchronous interface, an event-engine to (de)multiplex the operations and a synchronous interface for users to use. None of these components currently exist in \CFA and I will need to build all three for this project.
    441153
    442154\paragraph{OS Abstraction}
    443 One fundamental part for converting blocking I/O operations into non-blocking ones is having an underlying asynchronous I/O interface to direct the I/O operations.
    444 While there exists many different APIs for asynchronous I/O, it is not part of this proposal to create a novel API.
    445 It is sufficient to make one work in the complex context of the \CFA runtime.
    446 \uC uses the $select$\cite{select} as its interface, which handles ttys, pipes and sockets, but not disk.
    447 $select$ entails significant complexity and is being replaced in UNIX operating-systems, which make it a less interesting alternative.
    448 Another popular interface is $epoll$\cite{epoll}, which is supposed to be cheaper than $select$.
    449 However, $epoll$ also does not handle the file system and anectodal evidence suggest it has problem with linux pipes and $TTY$s.
    450 A popular cross-platform alternative is $libuv$\cite{libuv}, which offers asynchronous sockets and asynchronous file system operations (among other features).
    451 However, as a full-featured library it includes much more than I need and could conflict with other features of \CFA unless significant effort is made to merge them together.
    452 A very recent alternative that I am investigating is $io_uring$\cite{io_uring}.
    453 It claims to address some of the issues with $epoll$ and my early investigating suggest that the claim is accurate.
    454 $io_uring$ uses a much more general approach where system calls are register to a queue and later executed by the kernel, rather than relying on system calls to return an error instead of blocking and subsequently waiting for changes on file descriptors.
    455 I believe this approach allows for fewer problems, \eg the manpage for $open$\cite{open} states:
    456 \begin{quote}
    457         Note that [the $O_NONBLOCK$ flag] has no effect for regular files and block devices;
    458         that is, I/O operations will (briefly) block when device activity is required, regardless of whether $O_NONBLOCK$ is set.
    459         Since $O_NONBLOCK$ semantics might eventually be implemented, applications should not depend upon blocking behavior when specifying this flag for regular files and block devices.
    460 \end{quote}
    461 This makes approach based on $epoll$/$select$ less reliable since they may not work for every file descriptors.
    462 For this reason, I plan to use $io_uring$ as the OS abstraction for the \CFA runtime, unless further work shows problems I haven't encountered yet.
    463 However, only a small subset of the features are available in Ubuntu as of April 2020\cite{wiki:ubuntu-linux}, which will limit performance comparisons.
    464 I do not believe this will affect the comparison result.
     155One of the fundamental part of this converting blocking I/O operations into non-blocking ones. This relies on having an underlying asynchronous I/O interface to which to direct the I/O operations. While there exists many different APIs for asynchronous I/O, it is not part of this proposal to create a novel API, simply to use an existing one that is sufficient. uC++ uses the \texttt{select} as its interface, which handles pipes and sockets. It entails significant complexity and has performances problems which make it a less interesting alternative. Another interface which is becoming popular recently\cit is \texttt{epoll}. However, epoll also does not handle file system and seems to have problem to linux pipes and \texttt{TTY}s\cit. A very recent alternative that must still be investigated is \texttt{io\_uring}. It claims to address some of the issues with \texttt{epoll} but is too recent to be confident that it does. Finally, a popular cross-platform alternative is \texttt{libuv}, which offers asynchronous sockets and asynchronous file system operations (among other features). However, as a full-featured library it includes much more than what is needed and could conflict with other features of \CFA unless significant efforts are made to merge them together.
    465156
    466 \paragraph{Event Engine}
    467 Laying on top of the asynchronous interface layer is the event engine.
    468 This engine is responsible for multiplexing (batching) the synchronous I/O requests into asynchronous I/O requests and demultiplexing the results to appropriate blocked user threads.
    469 This step can be straightforward for simple cases, but becomes quite complex when there are thousands of user threads performing both reads and writes, possibly on overlapping file descriptors.
    470 Decisions that need to be made include:
    471 \begin{enumerate}
    472 \item
    473 whether to poll from a separate kernel thread or a regularly scheduled user thread,
    474 \item
    475 what should be the ordering used when results satisfy many requests,
    476 \item
    477 how to handle threads waiting for multiple operations, etc.
    478 \end{enumerate}
     157\paragraph{Event-Engine}
     158Laying on top of the asynchronous interface layer is the event-engine. This engine is responsible for multiplexing (batching) the synchronous I/O requests into an asynchronous I/O request and demultiplexing the results onto appropriate blocked threads. This can be straightforward for the simple cases, but can become quite complex. Decisions that will need to be made include : whether to poll from a seperate kernel thread or a regularly scheduled user thread, what should be the ordering used when results satisfy many requests, how to handle threads waiting for multiple operations, etc.
    479159
    480160\paragraph{Interface}
    481 Finally, for these non-blocking I/O components to be available, it is necessary to expose them through a synchronous interface because that is the \CFA concurrent programming style.
    482 The interface can be novel but it is preferable to match the existing POSIX interface when possible to be compatible with existing code.
    483 Matching allows C programs written using this interface to be transparently converted to \CFA with minimal effort.
    484 Where new functionality is needed, I will create a novel interface to fill gaps and provide advanced features.
     161Finally, for these components to be available, it is necessary to expose them through a synchronous interface. This can be a novel interface but it is preferrable to attempt to intercept the existing POSIX interface in order to be compatible with existing code. This will allow C programs written using this interface to be transparently converted to \CFA with minimal effeort. Where this is not applicable, a novel interface will be created to fill the gaps.
    485162
    486163
     
    488165% ===============================================================================
    489166\section{Discussion}
    490 I believe that runtime system and scheduling are still open topics.
    491 Many ``state of the art'' production frameworks still use single threaded event-loops because of performance considerations, \eg \cite{nginx-design}, and, to my knowledge, no wideyl available system language offers modern threading facilities.
    492 I believe the proposed work offers a novel runtime and scheduling package, where existing work only offers fragments that users must assemble themselves when possible.
     167
    493168
    494169% ===============================================================================
    495170% ===============================================================================
    496171\section{Timeline}
    497 \begin{center}
    498 \begin{tabular}{ | r @{--} l | p{4in} | }
    499 \hline May 2020 & October 2020   & Creation of the performance benchmark. \\
    500 \hline November 2020 & March 2021   & Completion of the implementation. \\
    501 \hline March 2021 & April 2021  & Final Performance experiments. \\
    502 \hline May 2021 & August 2021 & Thesis writing and defense. \\
    503 \hline
    504 \end{tabular}
    505 \end{center}
     172
     173
     174\cleardoublepage
    506175
    507176% B I B L I O G R A P H Y
    508177% -----------------------------
     178\addcontentsline{toc}{chapter}{Bibliography}
     179\bibliographystyle{plain}
     180\bibliography{pl,local}
    509181\cleardoublepage
    510182\phantomsection         % allows hyperref to link to the correct page
    511 \addcontentsline{toc}{section}{\refname}
    512 \bibliographystyle{plain}
    513 \bibliography{pl,local}
    514183
    515184% G L O S S A R Y
    516185% -----------------------------
     186\addcontentsline{toc}{chapter}{Glossary}
     187\printglossary
    517188\cleardoublepage
    518189\phantomsection         % allows hyperref to link to the correct page
    519 \addcontentsline{toc}{section}{Glossary}
    520 \printglossary
    521190
    522191\end{document}
  • doc/theses/thierry_delisle_PhD/comp_II/local.bib

    r6a490b2 rb7d6a36  
    7676
    7777@article{finkel1987dib,
    78   title={DIB-a distributed implementation of backtracking},
     78  title={DIB—a distributed implementation of backtracking},
    7979  author={Finkel, Raphael and Manber, Udi},
    8080  journal={ACM Transactions on Programming Languages and Systems (TOPLAS)},
     
    221221  organization={ACM}
    222222}
    223 
    224 % ===============================================================================
    225 % Algorithms
    226 % ===============================================================================
    227 @article{michael2004hazard,
    228   title={Hazard pointers: Safe memory reclamation for lock-free objects},
    229   author={Michael, Maged M},
    230   journal={IEEE Transactions on Parallel and Distributed Systems},
    231   volume={15},
    232   number={6},
    233   pages={491--504},
    234   year={2004},
    235   publisher={IEEE}
    236 }
    237 
    238 @inproceedings{brown2015reclaiming,
    239   title={Reclaiming memory for lock-free data structures: There has to be a better way},
    240   author={Brown, Trevor Alexander},
    241   booktitle={Proceedings of the 2015 ACM Symposium on Principles of Distributed Computing},
    242   pages={261--270},
    243   year={2015}
    244 }
    245 
    246 % Trevor's relaxed FIFO list
    247 @inproceedings{alistarh2018relaxed,
    248   title={Relaxed schedulers can efficiently parallelize iterative algorithms},
    249   author={Alistarh, Dan and Brown, Trevor and Kopinsky, Justin and Nadiradze, Giorgi},
    250   booktitle={Proceedings of the 2018 ACM Symposium on Principles of Distributed Computing},
    251   pages={377--386},
    252   year={2018}
    253 }
    254 
    255 % Scalable counters which only support is !0
    256 @inproceedings{ellen2007snzi,
    257   title={SNZI: Scalable nonzero indicators},
    258   author={Ellen, Faith and Lev, Yossi and Luchangco, Victor and Moir, Mark},
    259   booktitle={Proceedings of the twenty-sixth annual ACM symposium on Principles of distributed computing},
    260   pages={13--22},
    261   year={2007}
    262 }
    263 
    264 % ===============================================================================
    265 % Linux Man Pages
    266 % ===============================================================================
    267 @manual{open,
    268   key        = "open",
    269   title      = "open(2) Linux User's Manual",
    270   year       = "2020",
    271   month      = "February",
    272 }
    273 
    274 @manual{epoll,
    275   key        = "epoll",
    276   title      = "epoll(7) Linux User's Manual",
    277   year       = "2019",
    278   month      = "March",
    279 }
    280 
    281 @manual{select,
    282   key        = "select",
    283   title      = "select(7) Linux User's Manual",
    284   year       = "2019",
    285   month      = "March",
    286 }
    287 
    288 @misc{io_uring,
    289   title   = {Efficient IO with io\_uring},
    290   author  = {Axboe, Jens},
    291   year    = "2019",
    292   month   = "March",
    293   version = {0,4},
    294   howpublished = {\url{https://kernel.dk/io_uring.pdf}}
    295 }
    296 
    297 @misc{libuv,
    298   key   = "libuv",
    299   title = {libuv},
    300   howpublished = {\url{https://github.com/libuv/libuv}}
    301 }
    302 
    303 % ===============================================================================
    304 % MISC
    305 % ===============================================================================
    306 
    307 @misc{nginx-design,
    308   key   = "nginx",
    309   title={Inside {NGINX}: How We Designed for Performance \& Scale},
    310   howpublished= {\href{https://www.nginx.com/blog/inside-nginx-how-we-designed-for-performance-scale}
    311                 {https://\-www.nginx.com/\-blog/\-inside\--nginx\--how\--we\--designed\--for\--performance\--scale}},
    312 }
    313 
    314 @article{schillings1996engineering,
    315   title={Be engineering insights: Benaphores},
    316   author={Schillings, Benoit},
    317   journal={Be Newsletters},
    318   volume={1},
    319   number={26},
    320   year={1996}
    321 }
    322 
    323 @misc{wiki:thunderherd,
    324    author = "{Wikipedia contributors}",
    325    title = "Thundering herd problem --- {W}ikipedia{,} The Free Encyclopedia",
    326    year = "2020",
    327    howpublished = {\href{https://en.wikipedia.org/wiki/Thundering_herd_problem}
    328                   {https://\-en.wikipedia.org/\-wiki/\-Thundering\_herd\_problem}},},
    329    note = "[Online; accessed 14-April-2020]"
    330 }
    331 
    332 @misc{wiki:ubuntu-linux,
    333    author = "{Wikipedia contributors}",
    334    title = "Ubuntu version history : Table of versions --- {W}ikipedia{,} The Free Encyclopedia",
    335    year = "2020",
    336    howpublished = {\href{https://en.wikipedia.org/wiki/Ubuntu_version_history\#Table_of_versions}
    337                   {https://\-en.wikipedia.org/\-wiki/\-Ubuntu\_version\_history\#Table\_of\_versions}},
    338    note = "[Online; accessed 15-April-2020]"
    339 }
  • doc/user/user.tex

    r6a490b2 rb7d6a36  
    1111%% Created On       : Wed Apr  6 14:53:29 2016
    1212%% Last Modified By : Peter A. Buhr
    13 %% Last Modified On : Fri Mar  6 13:34:52 2020
    14 %% Update Count     : 3924
     13%% Last Modified On : Sat Jul 13 18:36:18 2019
     14%% Update Count     : 3876
    1515%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    1616
     
    211211Even with all its problems, C continues to be popular because it allows writing software at virtually any level in a computer system without restriction.
    212212For system programming, where direct access to hardware, storage management, and real-time issues are a requirement, C is usually the only language of choice.
    213 The TIOBE index~\cite{TIOBE} for February 2020 ranks the top six most \emph{popular} programming languages as \Index*{Java} 17.4\%, C 16.8\%, Python 9.3\%, \Index*[C++]{\CC{}} 6.2\%, \Csharp 5.9\%, Visual Basic 5.9\% = 61.5\%, where the next 50 languages are less than 2\% each, with a long tail.
    214 The top 4 rankings over the past 35 years are:
     213The TIOBE index~\cite{TIOBE} for July 2018 ranks the top five most \emph{popular} programming languages as \Index*{Java} 16\%, C 14\%, \Index*[C++]{\CC{}} 7.5\%, Python 6\%, Visual Basic 4\% = 47.5\%, where the next 50 languages are less than 4\% each, with a long tail.
     214The top 3 rankings over the past 30 years are:
    215215\begin{center}
    216216\setlength{\tabcolsep}{10pt}
    217 \begin{tabular}{@{}rcccccccc@{}}
    218                 & 2020  & 2015  & 2010  & 2005  & 2000  & 1995  & 1990  & 1985  \\ \hline
    219 Java    & 1             & 2             & 1             & 2             & 3             & -             & -             & -             \\
    220 \R{C}   & \R{2} & \R{1} & \R{2} & \R{1} & \R{1} & \R{2} & \R{1} & \R{1} \\
    221 Python  & 3             & 7             & 6             & 6             & 22    & 21    & -             & -             \\
    222 \CC             & 4             & 4             & 4             & 3             & 2             & 1             & 2             & 12    \\
     217\begin{tabular}{@{}rccccccc@{}}
     218                & 2018  & 2013  & 2008  & 2003  & 1998  & 1993  & 1988  \\ \hline
     219Java    & 1             & 2             & 1             & 1             & 16    & -             & -             \\
     220\R{C}   & \R{2} & \R{1} & \R{2} & \R{2} & \R{1} & \R{1} & \R{1} \\
     221\CC             & 3             & 4             & 3             & 3             & 2             & 2             & 5             \\
    223222\end{tabular}
    224223\end{center}
     
    513512Keyword clashes are accommodated by syntactic transformations using the \CFA backquote escape-mechanism:
    514513\begin{cfa}
    515 int Ā®``Ā®otype = 3; §\C{// make keyword an identifier}§
    516 double Ā®``Ā®forall = 3.5;
     514int Ā®`Ā®otypeĀ®`Ā® = 3; §\C{// make keyword an identifier}§
     515double Ā®`Ā®forallĀ®`Ā® = 3.5;
    517516\end{cfa}
    518517
     
    525524// include file uses the CFA keyword "with".
    526525#if ! defined( with ) §\C{// nesting ?}§
    527 #define with Ā®``Ā®with §\C{// make keyword an identifier}§
     526#define with Ā®`Ā®withĀ®`Ā® §\C{// make keyword an identifier}§
    528527#define __CFA_BFD_H__
    529528#endif
    530 Ā§{\color{red}\#\textbf{include\_next} <bfdlink.h>}§ §\C{// must have internal check for multiple expansion}§
     529
     530Ā®#include_next <bfdlink.h> §\C{// must have internal check for multiple expansion}§
     531Ā®
    531532#if defined( with ) && defined( __CFA_BFD_H__ ) §\C{// reset only if set}§
    532533#undef with
     
    575576\section{Exponentiation Operator}
    576577
    577 C, \CC, and Java (and many other programming languages) have no exponentiation operator\index{exponentiation!operator}\index{operator!exponentiation}, \ie $x^y$, and instead use a routine, like \Indexc{pow(x,y)}, to perform the exponentiation operation.
    578 \CFA extends the basic operators with the exponentiation operator Ā©?Ā®\Ā®?Ā©\index{?\\?@Ā©?Ā®\Ā®?Ā©} and Ā©?\=?Ā©\index{?\\=?@©®\Ā®=?Ā©}, as in, Ā©x Ā®\Ā® yĀ© and Ā©x Ā®\Ā®= yĀ©, which means $x^y$ and $x \leftarrow x^y$.
     578C, \CC, and Java (and many other programming languages) have no exponentiation operator\index{exponentiation!operator}\index{operator!exponentiation}, \ie $x^y$, and instead use a routine, like \Indexc{pow}, to perform the exponentiation operation.
     579\CFA extends the basic operators with the exponentiation operator Ā©?\?Ā©\index{?\\?@Ā©?\?Ā©} and Ā©?\=?Ā©\index{?\\=?@Ā©\=?Ā©}, as in, Ā©x \ yĀ© and Ā©x \= yĀ©, which means $x^y$ and $x \leftarrow x^y$.
    579580The priority of the exponentiation operator is between the cast and multiplicative operators, so that ©w * (int)x \ (int)y * z© is parenthesized as ©((w * (((int)x) \ ((int)y))) * z)©.
    580581
    581 There are exponentiation operators for integral and floating types, including the builtin \Index{complex} types.
     582As for \Index{division}, there are exponentiation operators for integral and floating types, including the builtin \Index{complex} types.
    582583Integral exponentiation\index{exponentiation!unsigned integral} is performed with repeated multiplication\footnote{The multiplication computation is $O(\log y)$.} (or shifting if the exponent is 2).
    583 Overflow for a large exponent or negative exponent returns zero.
     584Overflow from large exponents or negative exponents return zero.
    584585Floating exponentiation\index{exponentiation!floating} is performed using \Index{logarithm}s\index{exponentiation!logarithm}, so the exponent cannot be negative.
    585586\begin{cfa}
     
    5885891 1 256 -64 125 ®0® 3273344365508751233 ®0® ®0® -0.015625 18.3791736799526 0.264715-1.1922i
    589590\end{cfa}
    590 Note, Ā©5 \ 32Ā© and Ā©5L \ 64Ā© overflow, and Ā©-4 \ -3Ā© is a fraction but stored in an integer so all three computations generate an integral zero.
     591Note, Ā©5 Ā®\Ā® 32Ā© and Ā©5L Ā®\Ā® 64Ā© overflow, and Ā©-4 Ā®\Ā® -3Ā© is a fraction but stored in an integer so all three computations generate an integral zero.
    591592Parenthesis are necessary for complex constants or the expression is parsed as ©1.0f+®(®2.0fi \ 3.0f®)®+2.0fi©.
    592593The exponentiation operator is available for all the basic types, but for user-defined types, only the integral-computation version is available.
     
    597598OT ?Ā®\Ā®?( OT ep, unsigned long int y );
    598599\end{cfa}
    599 The user type Ā©TĀ© must define multiplication, one (Ā©1Ā©), and Ā©*Ā©.
     600The user type Ā©TĀ© must define multiplication, one, Ā©1Ā©, and, Ā©*Ā©.
    600601
    601602
     
    625626
    626627
    627 %\section{\texorpdfstring{\protect\lstinline@case@ Clause}{case Clause}}
    628 \subsection{\texorpdfstring{\LstKeywordStyle{case} Clause}{case Clause}}
    629 
    630 C restricts the Ā©caseĀ© clause of a Ā©switchĀ© statement to a single value.
    631 For multiple Ā©caseĀ© clauses associated with the same statement, it is necessary to have multiple Ā©caseĀ© clauses rather than multiple values.
    632 Requiring a Ā©caseĀ© clause for each value does not seem to be in the spirit of brevity normally associated with C.
    633 Therefore, the Ā©caseĀ© clause is extended with a list of values, as in:
     628\subsection{Loop Control}
     629
     630The Ā©forĀ©/Ā©whileĀ©/Ā©do-whileĀ© loop-control allows empty or simplified ranges (see Figure~\ref{f:LoopControlExamples}).
     631\begin{itemize}
     632\item
     633An empty conditional implies Ā©1Ā©.
     634\item
     635The up-to range Ā©~Ā©\index{~@Ā©~Ā©} means exclusive range [M,N).
     636\item
     637The up-to range Ā©~=Ā©\index{~=@Ā©~=Ā©} means inclusive range [M,N].
     638\item
     639The down-to range Ā©-~Ā©\index{-~@Ā©-~Ā©} means exclusive range [N,M).
     640\item
     641The down-to range Ā©-~=Ā©\index{-~=@Ā©-~=Ā©} means inclusive range [N,M].
     642\item
     643Ā©@Ā© means put nothing in this field.
     644\item
     645Ā©0Ā© is the implicit start value;
     646\item
     647Ā©1Ā© is the implicit increment value.
     648\item
     649The up-to range uses Ā©+=Ā© for increment;
     650\item
     651The down-to range uses Ā©-=Ā© for decrement.
     652\item
     653The loop index is polymorphic in the type of the start value or comparison value when start is implicitly Ā©0Ā©.
     654\end{itemize}
     655
     656\begin{figure}
    634657\begin{cquote}
    635 \begin{tabular}{@{}l@{\hspace{3em}}l@{\hspace{2em}}l@{}}
    636 \multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c@{\hspace{2em}}}{\textbf{C}} \\
    637 \begin{cfa}
    638 switch ( i ) {
    639   case Ā®1, 3, 5Ā®:
    640         ...
    641   case Ā®2, 4, 6Ā®:
    642         ...
    643 }
     658\begin{tabular}{@{}l|l@{}}
     659\multicolumn{1}{c|}{loop control} & \multicolumn{1}{c}{output} \\
     660\hline
     661\begin{cfa}
     662sout | nlOff;
     663while Ā®()Ā® { sout | "empty"; break; } sout | nl;
     664do { sout | "empty"; break; } while Ā®()Ā®; sout | nl;
     665for Ā®()Ā® { sout | "empty"; break; } sout | nl;
     666for ( Ā®0Ā® ) { sout | "A"; } sout | "zero" | nl;
     667for ( Ā®1Ā® ) { sout | "A"; } sout | nl;
     668for ( Ā®10Ā® ) { sout | "A"; } sout | nl;
     669for ( Ā®1 ~= 10 ~ 2Ā® ) { sout | "B"; } sout | nl;
     670for ( Ā®10 -~= 1 ~ 2Ā® ) { sout | "C"; } sout | nl;
     671for ( Ā®0.5 ~ 5.5Ā® ) { sout | "D"; } sout | nl;
     672for ( Ā®5.5 -~ 0.5Ā® ) { sout | "E"; } sout | nl;
     673for ( Ā®i; 10Ā® ) { sout | i; } sout | nl;
     674for ( Ā®i; 1 ~= 10 ~ 2Ā® ) { sout | i; } sout | nl;
     675for ( Ā®i; 10 -~= 1 ~ 2Ā® ) { sout | i; } sout | nl;
     676for ( Ā®i; 0.5 ~ 5.5Ā® ) { sout | i; } sout | nl;
     677for ( Ā®i; 5.5 -~ 0.5Ā® ) { sout | i; } sout | nl;
     678for ( Ā®ui; 2u ~= 10u ~ 2uĀ® ) { sout | ui; } sout | nl;
     679for ( Ā®ui; 10u -~= 2u ~ 2uĀ® ) { sout | ui; } sout | nl;
     680enum { N = 10 };
     681for ( Ā®NĀ® ) { sout | "N"; } sout | nl;
     682for ( Ā®i; NĀ® ) { sout | i; } sout | nl;
     683for ( Ā®i; N -~ 0Ā® ) { sout | i; } sout | nl;
     684const int start = 3, comp = 10, inc = 2;
     685for ( Ā®i; start ~ comp ~ inc + 1Ā® ) { sout | i; } sout | nl;
     686for ( Ā®i; 1 ~ @Ā® ) { if ( i > 10 ) break;
     687        sout | i; } sout | nl;
     688for ( Ā®i; 10 -~ @Ā® ) { if ( i < 0 ) break;
     689        sout | i; } sout | nl;
     690for ( Ā®i; 2 ~ @ ~ 2Ā® ) { if ( i > 10 ) break;
     691        sout | i; } sout | nl;
     692for ( Ā®i; 2.1 ~ @ ~ @Ā® ) { if ( i > 10.5 ) break;
     693        sout | i; i += 1.7; } sout | nl;
     694for ( Ā®i; 10 -~ @ ~ 2Ā® ) { if ( i < 0 ) break;
     695        sout | i; } sout | nl;
     696for ( Ā®i; 12.1 ~ @ ~ @Ā® ) { if ( i < 2.5 ) break;
     697        sout | i; i -= 1.7; } sout | nl;
     698for ( Ā®i; 5 : j; -5 ~ @Ā® ) { sout | i | j; } sout | nl;
     699for ( Ā®i; 5 : j; -5 -~ @Ā® ) { sout | i | j; } sout | nl;
     700for ( Ā®i; 5 : j; -5 ~ @ ~ 2Ā® ) { sout | i | j; } sout | nl;
     701for ( Ā®i; 5 : j; -5 -~ @ ~ 2Ā® ) { sout | i | j; } sout | nl;
     702for ( Ā®j; -5 ~ @ : i; 5Ā® ) { sout | i | j; } sout | nl;
     703for ( Ā®j; -5 -~ @ : i; 5Ā® ) { sout | i | j; } sout | nl;
     704for ( Ā®j; -5 ~ @ ~ 2 : i; 5Ā® ) { sout | i | j; } sout | nl;
     705for ( Ā®j; -5 -~ @ ~ 2 : i; 5Ā® ) { sout | i | j; } sout | nl;
     706for ( Ā®j; -5 -~ @ ~ 2 : i; 5 : k; 1.5 ~ @Ā® ) {
     707        sout | i | j | k; } sout | nl;
     708for ( Ā®j; -5 -~ @ ~ 2 : k; 1.5 ~ @ : i; 5Ā® ) {
     709        sout | i | j | k; } sout | nl;
     710for ( Ā®k; 1.5 ~ @ : j; -5 -~ @ ~ 2 : i; 5Ā® ) {
     711        sout | i | j | k; } sout | nl;
    644712\end{cfa}
    645713&
    646714\begin{cfa}
    647 switch ( i ) {
    648   case 1: case 3 : case 5:
    649         ...
    650   case 2: case 4 : case 6:
    651         ...
    652 }
    653 \end{cfa}
    654 &
    655 \begin{cfa}
    656 
    657 // odd values
    658 
    659 // even values
    660 
    661 
     715
     716empty
     717empty
     718empty
     719zero
     720A
     721A A A A A A A A A A
     722B B B B B
     723C C C C C
     724D D D D D
     725E E E E E
     7260 1 2 3 4 5 6 7 8 9
     7271 3 5 7 9
     72810 8 6 4 2
     7290.5 1.5 2.5 3.5 4.5
     7305.5 4.5 3.5 2.5 1.5
     7312 4 6 8 10
     73210 8 6 4 2
     733
     734N N N N N N N N N N
     7350 1 2 3 4 5 6 7 8 9
     73610 9 8 7 6 5 4 3 2 1
     737
     7383 6 9
     739
     7401 2 3 4 5 6 7 8 9 10
     741
     74210 9 8 7 6 5 4 3 2 1 0
     743
     7442 4 6 8 10
     745
     7462.1 3.8 5.5 7.2 8.9
     747
     74810 8 6 4 2 0
     749
     75012.1 10.4 8.7 7 5.3 3.6
     7510 -5 1 -4 2 -3 3 -2 4 -1
     7520 -5 1 -6 2 -7 3 -8 4 -9
     7530 -5 1 -3 2 -1 3 1 4 3
     7540 -5 1 -7 2 -9 3 -11 4 -13
     7550 -5 1 -4 2 -3 3 -2 4 -1
     7560 -5 1 -6 2 -7 3 -8 4 -9
     7570 -5 1 -3 2 -1 3 1 4 3
     7580 -5 1 -7 2 -9 3 -11 4 -13
     759
     7600 -5 1.5 1 -7 2.5 2 -9 3.5 3 -11 4.5 4 -13 5.5
     761
     7620 -5 1.5 1 -7 2.5 2 -9 3.5 3 -11 4.5 4 -13 5.5
     763
     7640 -5 1.5 1 -7 2.5 2 -9 3.5 3 -11 4.5 4 -13 5.5
    662765\end{cfa}
    663766\end{tabular}
    664767\end{cquote}
    665 In addition, subranges are allowed to specify case values.\footnote{
    666 gcc has the same mechanism but awkward syntax, \lstinline@2 ...42@, because a space is required after a number, otherwise the period is a decimal point.}
    667 \begin{cfa}
    668 switch ( i ) {
    669   case Ā®1~5:Ā® §\C{// 1, 2, 3, 4, 5}§
    670         ...
    671   case Ā®10~15:Ā® §\C{// 10, 11, 12, 13, 14, 15}§
    672         ...
    673 }
    674 \end{cfa}
    675 Lists of subranges are also allowed.
    676 \begin{cfa}
    677 case Ā®1~5, 12~21, 35~42Ā®:
    678 \end{cfa}
     768\caption{Loop Control Examples}
     769\label{f:LoopControlExamples}
     770\end{figure}
    679771
    680772
     
    885977
    886978
    887 \subsection{Non-terminating and Labelled \texorpdfstring{\LstKeywordStyle{fallthrough}}{Non-terminating and Labelled fallthrough}}
    888 
    889 The Ā©fallthroughĀ© clause may be non-terminating within a Ā©caseĀ© clause or have a target label to common code from multiple case clauses.
    890 \begin{center}
    891 \begin{tabular}{@{}lll@{}}
    892 \begin{cfa}
    893 choose ( ... ) {
    894   case 3:
    895         if ( ... ) {
    896                 ... Ā®fallthru;Ā® // goto case 4
    897         } else {
    898                 ...
    899         }
    900         // implicit break
    901   case 4:
    902 
    903 
    904 
    905 
     979%\section{\texorpdfstring{\protect\lstinline@case@ Clause}{case Clause}}
     980\subsection{\texorpdfstring{\LstKeywordStyle{case} Statement}{case Statement}}
     981
     982C restricts the Ā©caseĀ© clause of a Ā©switchĀ© statement to a single value.
     983For multiple Ā©caseĀ© clauses associated with the same statement, it is necessary to have multiple Ā©caseĀ© clauses rather than multiple values.
     984Requiring a Ā©caseĀ© clause for each value does not seem to be in the spirit of brevity normally associated with C.
     985Therefore, the Ā©caseĀ© clause is extended with a list of values, as in:
     986\begin{cquote}
     987\begin{tabular}{@{}l@{\hspace{3em}}l@{\hspace{2em}}l@{}}
     988\multicolumn{1}{c@{\hspace{3em}}}{\textbf{\CFA}}        & \multicolumn{1}{c@{\hspace{2em}}}{\textbf{C}} \\
     989\begin{cfa}
     990switch ( i ) {
     991  case Ā®1, 3, 5Ā®:
     992        ...
     993  case Ā®2, 4, 6Ā®:
     994        ...
     995}
    906996\end{cfa}
    907997&
    908998\begin{cfa}
    909 choose ( ... ) {
    910   case 3:
    911         ... Ā®fallthrough common;Ā®
    912   case 4:
    913         ... Ā®fallthrough common;Ā®
    914 
    915   Ā®common:Ā® // below fallthrough
    916                           // at case-clause level
    917         ...     // common code for cases 3/4
    918         // implicit break
    919   case 4:
    920 
    921 
     999switch ( i ) {
     1000  case 1: case 3 : case 5:
     1001        ...
     1002  case 2: case 4 : case 6:
     1003        ...
     1004}
    9221005\end{cfa}
    9231006&
    9241007\begin{cfa}
    925 choose ( ... ) {
    926   case 3:
    927         choose ( ... ) {
    928           case 4:
    929                 for ( ... ) {
    930                         // multi-level transfer
    931                         ... Ā®fallthru common;Ā®
    932                 }
    933                 ...
    934         }
     1008
     1009// odd values
     1010
     1011// even values
     1012
     1013
     1014\end{cfa}
     1015\end{tabular}
     1016\end{cquote}
     1017In addition, subranges are allowed to specify case values.\footnote{
     1018gcc has the same mechanism but awkward syntax, \lstinline@2 ...42@, because a space is required after a number, otherwise the period is a decimal point.}
     1019\begin{cfa}
     1020switch ( i ) {
     1021  case Ā®1~5:Ā® §\C{// 1, 2, 3, 4, 5}§
    9351022        ...
    936   Ā®common:Ā® // below fallthrough
    937                           // at case-clause level
    938 \end{cfa}
    939 \end{tabular}
    940 \end{center}
    941 The target label must be below the Ā©fallthroughĀ© and may not be nested in a control structure, and
    942 the target label must be at the same or higher level as the containing Ā©caseĀ© clause and located at
    943 the same level as a Ā©caseĀ© clause; the target label may be case Ā©defaultĀ©, but only associated
    944 with the current Ā©switchĀ©/Ā©chooseĀ© statement.
    945 
    946 
    947 \subsection{Loop Control}
    948 
    949 The Ā©forĀ©/Ā©whileĀ©/Ā©do-whileĀ© loop-control allows empty or simplified ranges (see Figure~\ref{f:LoopControlExamples}).
    950 \begin{itemize}
    951 \item
    952 The loop index is polymorphic in the type of the comparison value N (when the start value is implicit) or the start value M.
    953 \item
    954 An empty conditional implies comparison value of Ā©1Ā© (true).
    955 \item
    956 A comparison N is implicit up-to exclusive range [0,N©®)®©.
    957 \item
    958 A comparison Ā©=Ā© N is implicit up-to inclusive range [0,N©®]®©.
    959 \item
    960 The up-to range M Ā©~Ā©\index{~@Ā©~Ā©} N means exclusive range [M,N©®)®©.
    961 \item
    962 The up-to range M Ā©~=Ā©\index{~=@Ā©~=Ā©} N means inclusive range [M,N©®]®©.
    963 \item
    964 The down-to range M Ā©-~Ā©\index{-~@Ā©-~Ā©} N means exclusive range [N,M©®)®©.
    965 \item
    966 The down-to range M Ā©-~=Ā©\index{-~=@Ā©-~=Ā©} N means inclusive range [N,M©®]®©.
    967 \item
    968 Ā©0Ā© is the implicit start value;
    969 \item
    970 Ā©1Ā© is the implicit increment value.
    971 \item
    972 The up-to range uses operator Ā©+=Ā© for increment;
    973 \item
    974 The down-to range uses operator Ā©-=Ā© for decrement.
    975 \item
    976 Ā©@Ā© means put nothing in this field.
    977 \item
    978 Ā©:Ā© means start another index.
    979 \end{itemize}
    980 
    981 \begin{figure}
    982 \begin{tabular}{@{}l|l@{}}
    983 \multicolumn{1}{c|}{loop control} & \multicolumn{1}{c}{output} \\
    984 \hline
    985 \begin{cfa}[xleftmargin=0pt]
    986 while Ā®()Ā® { sout | "empty"; break; }
    987 do { sout | "empty"; break; } while Ā®()Ā®;
    988 for Ā®()Ā® { sout | "empty"; break; }
    989 for ( Ā®0Ā® ) { sout | "A"; } sout | "zero";
    990 for ( Ā®1Ā® ) { sout | "A"; }
    991 for ( Ā®10Ā® ) { sout | "A"; }
    992 for ( Ā®= 10Ā® ) { sout | "A"; }
    993 for ( Ā®1 ~= 10 ~ 2Ā® ) { sout | "B"; }
    994 for ( Ā®10 -~= 1 ~ 2Ā® ) { sout | "C"; }
    995 for ( Ā®0.5 ~ 5.5Ā® ) { sout | "D"; }
    996 for ( Ā®5.5 -~ 0.5Ā® ) { sout | "E"; }
    997 for ( Ā®i; 10Ā® ) { sout | i; }
    998 for ( Ā®i; = 10Ā® ) { sout | i; }
    999 for ( Ā®i; 1 ~= 10 ~ 2Ā® ) { sout | i; }
    1000 for ( Ā®i; 10 -~= 1 ~ 2Ā® ) { sout | i; }
    1001 for ( Ā®i; 0.5 ~ 5.5Ā® ) { sout | i; }
    1002 for ( Ā®i; 5.5 -~ 0.5Ā® ) { sout | i; }
    1003 for ( Ā®ui; 2u ~= 10u ~ 2uĀ® ) { sout | ui; }
    1004 for ( Ā®ui; 10u -~= 2u ~ 2uĀ® ) { sout | ui; }
    1005 enum { N = 10 };
    1006 for ( Ā®NĀ® ) { sout | "N"; }
    1007 for ( Ā®i; NĀ® ) { sout | i; }
    1008 for ( Ā®i; N -~ 0Ā® ) { sout | i; }
    1009 const int start = 3, comp = 10, inc = 2;
    1010 for ( Ā®i; start ~ comp ~ inc + 1Ā® ) { sout | i; }
    1011 for ( i; 1 ~ Ā®@Ā® ) { if ( i > 10 ) break; sout | i; }
    1012 for ( i; 10 -~ Ā®@Ā® ) { if ( i < 0 ) break; sout | i; }
    1013 for ( i; 2 ~ Ā®@Ā® ~ 2 ) { if ( i > 10 ) break; sout | i; }
    1014 for ( i; 2.1 ~ Ā®@Ā® ~ Ā®@Ā® ) { if ( i > 10.5 ) break; sout | i; i += 1.7; }
    1015 for ( i; 10 -~ Ā®@Ā® ~ 2 ) { if ( i < 0 ) break; sout | i; }
    1016 for ( i; 12.1 ~ Ā®@Ā® ~ Ā®@Ā® ) { if ( i < 2.5 ) break; sout | i; i -= 1.7; }
    1017 for ( i; 5 Ā®:Ā® j; -5 ~ @ ) { sout | i | j; }
    1018 for ( i; 5 Ā®:Ā® j; -5 -~ @ ) { sout | i | j; }
    1019 for ( i; 5 Ā®:Ā® j; -5 ~ @ ~ 2 ) { sout | i | j; }
    1020 for ( i; 5 Ā®:Ā® j; -5 -~ @ ~ 2 ) { sout | i | j; }
    1021 for ( i; 5 Ā®:Ā® j; -5 ~ @ ) { sout | i | j; }
    1022 for ( i; 5 Ā®:Ā® j; -5 -~ @ ) { sout | i | j; }
    1023 for ( i; 5 Ā®:Ā® j; -5 ~ @ ~ 2 ) { sout | i | j; }
    1024 for ( i; 5 Ā®:Ā® j; -5 -~ @ ~ 2 ) { sout | i | j; }
    1025 for ( i; 5 Ā®:Ā® j; -5 -~ @ ~ 2 Ā®:Ā® k; 1.5 ~ @ ) { sout | i | j | k; }
    1026 for ( i; 5 Ā®:Ā® j; -5 -~ @ ~ 2 Ā®:Ā® k; 1.5 ~ @ ) { sout | i | j | k; }
    1027 for ( i; 5 Ā®:Ā® k; 1.5 ~ @ Ā®:Ā® j; -5 -~ @ ~ 2 ) { sout | i | j | k; }
    1028 \end{cfa}
    1029 &
    1030 \begin{cfa}
    1031 empty
    1032 empty
    1033 empty
    1034 zero
    1035 A
    1036 A A A A A A A A A A
    1037 A A A A A A A A A A A
    1038 B B B B B
    1039 C C C C C
    1040 D D D D D
    1041 E E E E E
    1042 0 1 2 3 4 5 6 7 8 9
    1043 0 1 2 3 4 5 6 7 8 9 10
    1044 1 3 5 7 9
    1045 10 8 6 4 2
    1046 0.5 1.5 2.5 3.5 4.5
    1047 5.5 4.5 3.5 2.5 1.5
    1048 2 4 6 8 10
    1049 10 8 6 4 2
    1050 
    1051 N N N N N N N N N N
    1052 0 1 2 3 4 5 6 7 8 9
    1053 10 9 8 7 6 5 4 3 2 1
    1054 
    1055 3 6 9
    1056 1 2 3 4 5 6 7 8 9 10
    1057 10 9 8 7 6 5 4 3 2 1 0
    1058 2 4 6 8 10
    1059 2.1 3.8 5.5 7.2 8.9
    1060 10 8 6 4 2 0
    1061 12.1 10.4 8.7 7. 5.3 3.6
    1062 0 -5 1 -4 2 -3 3 -2 4 -1
    1063 0 -5 1 -6 2 -7 3 -8 4 -9
    1064 0 -5 1 -3 2 -1 3 1 4 3
    1065 0 -5 1 -7 2 -9 3 -11 4 -13
    1066 0 -5 1 -4 2 -3 3 -2 4 -1
    1067 0 -5 1 -6 2 -7 3 -8 4 -9
    1068 0 -5 1 -3 2 -1 3 1 4 3
    1069 0 -5 1 -7 2 -9 3 -11 4 -13
    1070 0 -5 1.5 1 -7 2.5 2 -9 3.5 3 -11 4.5 4 -13 5.5
    1071 0 -5 1.5 1 -7 2.5 2 -9 3.5 3 -11 4.5 4 -13 5.5
    1072 0 -5 1.5 1 -7 2.5 2 -9 3.5 3 -11 4.5 4 -13 5.5
    1073 \end{cfa}
    1074 \end{tabular}
    1075 \caption{Loop Control Examples}
    1076 \label{f:LoopControlExamples}
    1077 \end{figure}
     1023  case Ā®10~15:Ā® §\C{// 10, 11, 12, 13, 14, 15}§
     1024        ...
     1025}
     1026\end{cfa}
     1027Lists of subranges are also allowed.
     1028\begin{cfa}
     1029case Ā®1~5, 12~21, 35~42Ā®:
     1030\end{cfa}
     1031
    10781032
    10791033% for ()  => for ( ;; )
     
    65936547hence, names in these include files are not mangled\index{mangling!name} (see~\VRef{s:Interoperability}).
    65946548All other C header files must be explicitly wrapped in ©extern "C"© to prevent name mangling.
    6595 This approach is different from \Index*[C++]{\CC{}} where the name-mangling issue is handled internally in C header-files through checks for preprocessor variable Ā©__cplusplusĀ©, which adds appropriate Ā©extern "C"Ā© qualifiers.
     6549For \Index*[C++]{\CC{}}, the name-mangling issue is often handled internally in many C header-files through checks for preprocessor variable Ā©__cplusplusĀ©, which adds appropriate Ā©extern "C"Ā© qualifiers.
    65966550
    65976551
     
    66076561The storage-management routines extend their C equivalents by overloading, alternate names, providing shallow type-safety, and removing the need to specify the allocation size for non-array types.
    66086562
    6609 C storage management provides the following capabilities:
     6563Storage management provides the following capabilities:
    66106564\begin{description}
    6611 \item[filled]
    6612 after allocation with a specified character or value.
     6565\item[fill]
     6566after allocation the storage is filled with a specified character.
    66136567\item[resize]
    6614 an existing allocation to decreased or increased its size.
    6615 In either case, new storage may or may not be allocated and, if there is a new allocation, as much data from the existing allocation is copied into the new allocation.
     6568an existing allocation is decreased or increased in size.
     6569In either case, new storage may or may not be allocated and, if there is a new allocation, as much data from the existing allocation is copied.
    66166570For an increase in storage size, new storage after the copied data may be filled.
    6617 \item[align]
    6618 an allocation on a specified memory boundary, \eg, an address multiple of 64 or 128 for cache-line purposes.
     6571\item[alignment]
     6572an allocation starts on a specified memory boundary, \eg, an address multiple of 64 or 128 for cache-line purposes.
    66196573\item[array]
    66206574the allocation size is scaled to the specified number of array elements.
    66216575An array may be filled, resized, or aligned.
    66226576\end{description}
    6623 \VRef[Table]{t:AllocationVersusCapabilities} shows allocation routines supporting different combinations of storage-management capabilities.
    6624 \begin{table}
    6625 \centering
    6626 \begin{minipage}{0.75\textwidth}
    6627 \begin{tabular}{@{}r|l|l|l|l|l@{}}
     6577The table shows allocation routines supporting different combinations of storage-management capabilities:
     6578\begin{center}
     6579\begin{tabular}{@{}r|r|l|l|l|l@{}}
    66286580\multicolumn{1}{c}{}&           & \multicolumn{1}{c|}{fill}     & resize        & alignment     & array \\
    66296581\hline
    66306582C               & Ā©mallocĀ©                      & no                    & no            & no            & no    \\
    66316583                & Ā©callocĀ©                      & yes (0 only)  & no            & no            & yes   \\
    6632                 & Ā©reallocĀ©                     & copy                  & yes           & no            & no    \\
     6584                & Ā©reallocĀ©                     & no/copy               & yes           & no            & no    \\
    66336585                & Ā©memalignĀ©            & no                    & no            & yes           & no    \\
    6634                 & Ā©aligned_allocĀ©\footnote{Same as Ā©memalignĀ© but size is an integral multiple of alignment, which is universally ignored.}
    6635                                                         & no                    & no            & yes           & no    \\
    66366586                & Ā©posix_memalignĀ©      & no                    & no            & yes           & no    \\
    6637                 & Ā©vallocĀ©                      & no                    & no            & yes (page size)& no   \\
    6638                 & Ā©pvallocĀ©\footnote{Same as Ā©vallocĀ© but rounds size to multiple of page size.}
    6639                                                         & no                    & no            & yes (page size)& no   \\
    66406587\hline
    6641 \CFA    & Ā©cmemalignĀ©           & yes (0 only)  & no            & yes           & yes   \\
    6642                 & Ā©reallocĀ©                     & copy                  & yes           & yes           & no    \\
    6643                 & Ā©allocĀ©                       & no                    & yes           & no            & yes   \\
    6644                 & Ā©alloc_setĀ©           & yes                   & yes           & no            & yes   \\
    6645                 & Ā©alloc_alignĀ©         & no                    & yes           & yes           & yes   \\
    6646                 & Ā©alloc_align_setĀ©     & yes                   & yes           & yes           & yes   \\
     6588C11             & Ā©aligned_allocĀ©       & no                    & no            & yes           & no    \\
     6589\hline
     6590\CFA    & Ā©allocĀ©                       & no/copy/yes   & no/yes        & no            & yes   \\
     6591                & Ā©align_allocĀ©         & no/yes                & no            & yes           & yes   \\
    66476592\end{tabular}
    6648 \end{minipage}
    6649 \caption{Allocation Routines versus Storage-Management Capabilities}
    6650 \label{t:AllocationVersusCapabilities}
    6651 \end{table}
    6652 
    6653 \CFA memory management extends the type safety of all allocations by using the type of the left-hand-side type to determine the allocation size and return a matching type for the new storage.
    6654 Type-safe allocation is provided for all C allocation routines and new \CFA allocation routines, \eg in
    6655 \begin{cfa}
    6656 int * ip = (int *)malloc( sizeof(int) );                §\C{// C}§
    6657 int * ip = malloc();                                                    §\C{// \CFA type-safe version of C malloc}§
    6658 int * ip = alloc();                                                             Ā§\C{// \CFA type-safe uniform alloc}§
    6659 \end{cfa}
    6660 the latter two allocations determine the allocation size from the type of Ā©pĀ© (Ā©intĀ©) and cast the pointer to the allocated storage to Ā©int *Ā©.
    6661 
    6662 \CFA memory management extends allocation safety by implicitly honouring all alignment requirements, \eg in
    6663 \begin{cfa}
    6664 struct S { int i; } __attribute__(( aligned( 128 ) )); // cache-line alignment
    6665 S * sp = malloc();                                                              §\C{// honour type alignment}§
    6666 \end{cfa}
    6667 the storage allocation is implicitly aligned to 128 rather than the default 16.
    6668 The alignment check is performed at compile time so there is no runtime cost.
    6669 
    6670 \CFA memory management extends the resize capability with the notion of \newterm{sticky properties}.
    6671 Hence, initial allocation capabilities are remembered and maintained when resize requires copying.
    6672 For example, an initial alignment and fill capability are preserved during a resize copy so the copy has the same alignment and extended storage is filled.
    6673 Without sticky properties it is dangerous to use Ā©reallocĀ©, resulting in an idiom of manually performing the reallocation to maintain correctness.
    6674 
    6675 \CFA memory management extends allocation to support constructors for initialization of allocated storage, \eg in
    6676 \begin{cfa}
    6677 struct S { int i; };                                                    §\C{// cache-line aglinment}§
    6678 void ?{}( S & s, int i ) { s.i = i; }
    6679 // assume ?|? operator for printing an S
    6680 
    6681 S & sp = *Ā®newĀ®( 3 );                                                   Ā§\C{// call constructor after allocation}§
    6682 sout | sp.i;
    6683 Ā®deleteĀ®( &sp );
    6684 
    6685 S * spa = Ā®anewĀ®( 10, 5 );                                              §\C{// allocate array and initialize each array element}§
    6686 for ( i; 10 ) sout | spa[i] | nonl;
    6687 sout | nl;
    6688 Ā®adeleteĀ®( 10, spa );
    6689 \end{cfa}
    6690 Allocation routines Ā©newĀ©/Ā©anewĀ© allocate a variable/array and initialize storage using the allocated type's constructor.
    6691 Note, the matching deallocation routines Ā©deleteĀ©/Ā©adeleteĀ©.
     6593\end{center}
     6594It is impossible to resize with alignment because the underlying Ā©reallocĀ© allocates storage if more space is needed, and it does not honour alignment from the original allocation.
    66926595
    66936596\leavevmode
    66946597\begin{cfa}[aboveskip=0pt,belowskip=0pt]
     6598// C unsafe allocation
    66956599extern "C" {
    6696         // C unsafe allocation
    6697         void * malloc( size_t size );§\indexc{malloc}§
    6698         void * calloc( size_t dim, size_t size );§\indexc{calloc}§
    6699         void * realloc( void * ptr, size_t size );§\indexc{realloc}§
    6700         void * memalign( size_t align, size_t size );§\indexc{memalign}§
    6701         void * aligned_alloc( size_t align, size_t size );§\indexc{aligned_alloc}§
    6702         int posix_memalign( void ** ptr, size_t align, size_t size );§\indexc{posix_memalign}§
    6703         void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize );§\indexc{cmemalign}§ // CFA
    6704 
    6705         // C unsafe initialization/copy
    6706         void * memset( void * dest, int c, size_t size );§\indexc{memset}§
    6707         void * memcpy( void * dest, const void * src, size_t size );§\indexc{memcpy}§
    6708 }
    6709 
    6710 void * realloc( void * oaddr, size_t nalign, size_t size ); // CFA heap
     6600void * malloc( size_t size );§\indexc{memset}§
     6601void * calloc( size_t dim, size_t size );§\indexc{calloc}§
     6602void * realloc( void * ptr, size_t size );§\indexc{realloc}§
     6603void * memalign( size_t align, size_t size );§\indexc{memalign}§
     6604int posix_memalign( void ** ptr, size_t align, size_t size );§\indexc{posix_memalign}§
     6605
     6606// C unsafe initialization/copy
     6607void * memset( void * dest, int c, size_t size );
     6608void * memcpy( void * dest, const void * src, size_t size );
     6609}
    67116610
    67126611forall( dtype T | sized(T) ) {
    6713         // §\CFA§ safe equivalents, i.e., implicit size specification
     6612// §\CFA§ safe equivalents, i.e., implicit size specification
    67146613        T * malloc( void );
    67156614        T * calloc( size_t dim );
    67166615        T * realloc( T * ptr, size_t size );
    67176616        T * memalign( size_t align );
    6718         T * cmemalign( size_t align, size_t dim  );
    67196617        T * aligned_alloc( size_t align );
    67206618        int posix_memalign( T ** ptr, size_t align );
    67216619
    6722         // §\CFA§ safe general allocation, fill, resize, alignment, array
     6620// §\CFA§ safe general allocation, fill, resize, array
    67236621        T * alloc( void );§\indexc{alloc}§
     6622        T * alloc( char fill );
    67246623        T * alloc( size_t dim );
     6624        T * alloc( size_t dim, char fill );
    67256625        T * alloc( T ptr[], size_t dim );
    6726         T * alloc_set( char fill );§\indexc{alloc_set}§
    6727         T * alloc_set( T fill );
    6728         T * alloc_set( size_t dim, char fill );
    6729         T * alloc_set( size_t dim, T fill );
    6730         T * alloc_set( size_t dim, const T fill[] );
    6731         T * alloc_set( T ptr[], size_t dim, char fill );
    6732 
    6733         T * alloc_align( size_t align );
    6734         T * alloc_align( size_t align, size_t dim );
    6735         T * alloc_align( T ptr[], size_t align ); // aligned realloc array
    6736         T * alloc_align( T ptr[], size_t align, size_t dim ); // aligned realloc array
    6737         T * alloc_align_set( size_t align, char fill );
    6738         T * alloc_align_set( size_t align, T fill );
    6739         T * alloc_align_set( size_t align, size_t dim, char fill );
    6740         T * alloc_align_set( size_t align, size_t dim, T fill );
    6741         T * alloc_align_set( size_t align, size_t dim, const T fill[] );
    6742         T * alloc_align_set( T ptr[], size_t align, size_t dim, char fill );
    6743 
    6744         // §\CFA§ safe initialization/copy, i.e., implicit size specification
    6745         T * memset( T * dest, char fill );§\indexc{memset}§
     6626        T * alloc( T ptr[], size_t dim, char fill );
     6627
     6628// §\CFA§ safe general allocation, align, fill, array
     6629        T * align_alloc( size_t align );
     6630        T * align_alloc( size_t align, char fill );
     6631        T * align_alloc( size_t align, size_t dim );
     6632        T * align_alloc( size_t align, size_t dim, char fill );
     6633
     6634// §\CFA§ safe initialization/copy, i.e., implicit size specification
     6635        T * memset( T * dest, char c );§\indexc{memset}§
    67466636        T * memcpy( T * dest, const T * src );§\indexc{memcpy}§
    67476637
    6748         // §\CFA§ safe initialization/copy, i.e., implicit size specification, array types
    6749         T * amemset( T dest[], char fill, size_t dim );
     6638// §\CFA§ safe initialization/copy array
     6639        T * amemset( T dest[], char c, size_t dim );
    67506640        T * amemcpy( T dest[], const T src[], size_t dim );
    67516641}
    67526642
    6753 // §\CFA§ allocation/deallocation and constructor/destructor, non-array types
    6754 forall( dtype T | sized(T), ttype Params | { void ?{}( T &, Params ); } ) T * new( Params p );§\indexc{new}§
    6755 forall( dtype T | sized(T) | { void ^?{}( T & ); } ) void delete( T * ptr );§\indexc{delete}§
    6756 forall( dtype T, ttype Params | sized(T) | { void ^?{}( T & ); void delete( Params ); } )
     6643// §\CFA§ allocation/deallocation and constructor/destructor
     6644forall( dtype T | sized(T), ttype Params | { void ?{}( T *, Params ); } ) T * new( Params p );§\indexc{new}§
     6645forall( dtype T | { void ^?{}( T * ); } ) void delete( T * ptr );§\indexc{delete}§
     6646forall( dtype T, ttype Params | { void ^?{}( T * ); void delete( Params ); } )
    67576647  void delete( T * ptr, Params rest );
    67586648
    6759 // §\CFA§ allocation/deallocation and constructor/destructor, array types
    6760 forall( dtype T | sized(T), ttype Params | { void ?{}( T &, Params ); } ) T * anew( size_t dim, Params p );§\indexc{anew}§
    6761 forall( dtype T | sized(T) | { void ^?{}( T & ); } ) void adelete( size_t dim, T arr[] );§\indexc{adelete}§
    6762 forall( dtype T | sized(T) | { void ^?{}( T & ); }, ttype Params | { void adelete( Params ); } )
     6649// §\CFA§ allocation/deallocation and constructor/destructor, array
     6650forall( dtype T | sized(T), ttype Params | { void ?{}( T *, Params ); } ) T * anew( size_t dim, Params p );§\indexc{anew}§
     6651forall( dtype T | sized(T) | { void ^?{}( T * ); } ) void adelete( size_t dim, T arr[] );§\indexc{adelete}§
     6652forall( dtype T | sized(T) | { void ^?{}( T * ); }, ttype Params | { void adelete( Params ); } )
    67636653  void adelete( size_t dim, T arr[], Params rest );
    67646654\end{cfa}
  • driver/cfa.cc

    r6a490b2 rb7d6a36  
    385385        } // if
    386386
    387         string preludedir;
    388387        switch(path) {
    389         case Installed   : preludedir = libdir; break;
    390         case BuildTree   : preludedir = libdir + "/prelude"; break;
    391         case Distributed : preludedir = dir(argv[0]); break;
     388        case Installed   : Putenv( argv, "--prelude-dir=" + libdir ); break;
     389        case BuildTree   : Putenv( argv, "--prelude-dir=" + libdir + "/prelude" ); break;
     390        case Distributed : Putenv( argv, "--prelude-dir=" + dir(argv[0]) ); break;
    392391        }
    393 
    394         Putenv( argv, "--prelude-dir=" + preludedir );
    395         args[nargs++] = "-include";
    396         args[nargs++] = (*new string(preludedir + "/defines.hfa")).c_str();
    397392
    398393        for ( int i = 0; i < nlibs; i += 1 ) {                          // copy non-user libraries after all user libraries
  • libcfa/Makefile.in

    r6a490b2 rb7d6a36  
    106106 configure.lineno config.status.lineno
    107107mkinstalldirs = $(install_sh) -d
    108 CONFIG_HEADER = $(top_builddir)/prelude/defines.hfa
    109108CONFIG_CLEAN_FILES =
    110109CONFIG_CLEAN_VPATH_FILES =
  • libcfa/configure

    r6a490b2 rb7d6a36  
    790790enable_distcc
    791791with_cfa_name
     792enable_shared
    792793enable_static
    793 enable_shared
    794794with_pic
    795795enable_fast_install
     
    14521452  --disable-silent-rules  verbose build output (undo: "make V=0")
    14531453  --enable-distcc     whether or not to enable distributed compilation
    1454   --enable-static[=PKGS]  build static libraries [default=no]
    14551454  --enable-shared[=PKGS]  build shared libraries [default=yes]
     1455  --enable-static[=PKGS]  build static libraries [default=yes]
    14561456  --enable-fast-install[=PKGS]
    14571457                          optimize for fast installation [default=yes]
     
    19601960
    19611961} # ac_fn_cxx_try_link
    1962 
    1963 # ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
    1964 # -------------------------------------------------------
    1965 # Tests whether HEADER exists, giving a warning if it cannot be compiled using
    1966 # the include files in INCLUDES and setting the cache variable VAR
    1967 # accordingly.
    1968 ac_fn_c_check_header_mongrel ()
    1969 {
    1970   as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
    1971   if eval \${$3+:} false; then :
    1972   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
    1973 $as_echo_n "checking for $2... " >&6; }
    1974 if eval \${$3+:} false; then :
    1975   $as_echo_n "(cached) " >&6
    1976 fi
    1977 eval ac_res=\$$3
    1978                { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
    1979 $as_echo "$ac_res" >&6; }
    1980 else
    1981   # Is the header compilable?
    1982 { $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
    1983 $as_echo_n "checking $2 usability... " >&6; }
    1984 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
    1985 /* end confdefs.h.  */
    1986 $4
    1987 #include <$2>
    1988 _ACEOF
    1989 if ac_fn_c_try_compile "$LINENO"; then :
    1990   ac_header_compiler=yes
    1991 else
    1992   ac_header_compiler=no
    1993 fi
    1994 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
    1995 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
    1996 $as_echo "$ac_header_compiler" >&6; }
    1997 
    1998 # Is the header present?
    1999 { $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
    2000 $as_echo_n "checking $2 presence... " >&6; }
    2001 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
    2002 /* end confdefs.h.  */
    2003 #include <$2>
    2004 _ACEOF
    2005 if ac_fn_c_try_cpp "$LINENO"; then :
    2006   ac_header_preproc=yes
    2007 else
    2008   ac_header_preproc=no
    2009 fi
    2010 rm -f conftest.err conftest.i conftest.$ac_ext
    2011 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
    2012 $as_echo "$ac_header_preproc" >&6; }
    2013 
    2014 # So?  What about this header?
    2015 case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
    2016   yes:no: )
    2017     { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
    2018 $as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
    2019     { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
    2020 $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
    2021     ;;
    2022   no:yes:* )
    2023     { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
    2024 $as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
    2025     { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     check for missing prerequisite headers?" >&5
    2026 $as_echo "$as_me: WARNING: $2:     check for missing prerequisite headers?" >&2;}
    2027     { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
    2028 $as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
    2029     { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&5
    2030 $as_echo "$as_me: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&2;}
    2031     { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
    2032 $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
    2033 ( $as_echo "## --------------------------------------- ##
    2034 ## Report this to cforall@plg.uwaterloo.ca ##
    2035 ## --------------------------------------- ##"
    2036      ) | sed "s/^/$as_me: WARNING:     /" >&2
    2037     ;;
    2038 esac
    2039   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
    2040 $as_echo_n "checking for $2... " >&6; }
    2041 if eval \${$3+:} false; then :
    2042   $as_echo_n "(cached) " >&6
    2043 else
    2044   eval "$3=\$ac_header_compiler"
    2045 fi
    2046 eval ac_res=\$$3
    2047                { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
    2048 $as_echo "$ac_res" >&6; }
    2049 fi
    2050   eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
    2051 
    2052 } # ac_fn_c_check_header_mongrel
    20531962cat >config.log <<_ACEOF
    20541963This file contains any messages produced by compilers while
     
    80307939
    80317940# Set options
    8032 # Check whether --enable-static was given.
    8033 if test "${enable_static+set}" = set; then :
    8034   enableval=$enable_static; p=${PACKAGE-default}
    8035     case $enableval in
    8036     yes) enable_static=yes ;;
    8037     no) enable_static=no ;;
    8038     *)
    8039      enable_static=no
    8040       # Look at the argument we got.  We use all the common list separators.
    8041       lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR,
    8042       for pkg in $enableval; do
    8043         IFS=$lt_save_ifs
    8044         if test "X$pkg" = "X$p"; then
    8045           enable_static=yes
    8046         fi
    8047       done
    8048       IFS=$lt_save_ifs
    8049       ;;
    8050     esac
    8051 else
    8052   enable_static=no
    8053 fi
    8054 
    8055 
    8056 
    8057 
    8058 
    8059 
    8060 
    80617941
    80627942
     
    80917971fi
    80927972
     7973
     7974
     7975
     7976
     7977
     7978
     7979
     7980
     7981  # Check whether --enable-static was given.
     7982if test "${enable_static+set}" = set; then :
     7983  enableval=$enable_static; p=${PACKAGE-default}
     7984    case $enableval in
     7985    yes) enable_static=yes ;;
     7986    no) enable_static=no ;;
     7987    *)
     7988     enable_static=no
     7989      # Look at the argument we got.  We use all the common list separators.
     7990      lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR,
     7991      for pkg in $enableval; do
     7992        IFS=$lt_save_ifs
     7993        if test "X$pkg" = "X$p"; then
     7994          enable_static=yes
     7995        fi
     7996      done
     7997      IFS=$lt_save_ifs
     7998      ;;
     7999    esac
     8000else
     8001  enable_static=yes
     8002fi
    80938003
    80948004
     
    1694916859
    1695016860
    16951 for ac_header in linux/io_uring.h
    16952 do :
    16953   ac_fn_c_check_header_mongrel "$LINENO" "linux/io_uring.h" "ac_cv_header_linux_io_uring_h" "$ac_includes_default"
    16954 if test "x$ac_cv_header_linux_io_uring_h" = xyes; then :
    16955   cat >>confdefs.h <<_ACEOF
    16956 #define HAVE_LINUX_IO_URING_H 1
    16957 _ACEOF
    16958 
    16959 fi
    16960 
    16961 done
    16962 
    16963 for ac_func in preadv2 pwritev2
    16964 do :
    16965   as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
    16966 ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
    16967 if eval test \"x\$"$as_ac_var"\" = x"yes"; then :
    16968   cat >>confdefs.h <<_ACEOF
    16969 #define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1
    16970 _ACEOF
    16971 
    16972 fi
    16973 done
    16974 
    16975 
    1697616861ac_config_files="$ac_config_files Makefile src/Makefile prelude/Makefile"
    16977 
    16978 
    16979 ac_config_headers="$ac_config_headers prelude/defines.hfa"
    1698016862
    1698116863
     
    1707016952test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
    1707116953
    17072 DEFS=-DHAVE_CONFIG_H
     16954# Transform confdefs.h into DEFS.
     16955# Protect against shell expansion while executing Makefile rules.
     16956# Protect against Makefile macro expansion.
     16957#
     16958# If the first sed substitution is executed (which looks for macros that
     16959# take arguments), then branch to the quote section.  Otherwise,
     16960# look for a macro that doesn't take arguments.
     16961ac_script='
     16962:mline
     16963/\\$/{
     16964 N
     16965 s,\\\n,,
     16966 b mline
     16967}
     16968t clear
     16969:clear
     16970s/^[     ]*#[    ]*define[       ][      ]*\([^  (][^    (]*([^)]*)\)[   ]*\(.*\)/-D\1=\2/g
     16971t quote
     16972s/^[     ]*#[    ]*define[       ][      ]*\([^  ][^     ]*\)[   ]*\(.*\)/-D\1=\2/g
     16973t quote
     16974b any
     16975:quote
     16976s/[      `~#$^&*(){}\\|;'\''"<>?]/\\&/g
     16977s/\[/\\&/g
     16978s/\]/\\&/g
     16979s/\$/$$/g
     16980H
     16981:any
     16982${
     16983        g
     16984        s/^\n//
     16985        s/\n/ /g
     16986        p
     16987}
     16988'
     16989DEFS=`sed -n "$ac_script" confdefs.h`
     16990
    1707316991
    1707416992ac_libobjs=
     
    1754817466esac
    1754917467
    17550 case $ac_config_headers in *"
    17551 "*) set x $ac_config_headers; shift; ac_config_headers=$*;;
    17552 esac
    1755317468
    1755417469
     
    1755617471# Files that config.status was made for.
    1755717472config_files="$ac_config_files"
    17558 config_headers="$ac_config_headers"
    1755917473config_commands="$ac_config_commands"
    1756017474
     
    1757817492      --file=FILE[:TEMPLATE]
    1757917493                   instantiate the configuration file FILE
    17580       --header=FILE[:TEMPLATE]
    17581                    instantiate the configuration header FILE
    1758217494
    1758317495Configuration files:
    1758417496$config_files
    17585 
    17586 Configuration headers:
    17587 $config_headers
    1758817497
    1758917498Configuration commands:
     
    1765317562    as_fn_append CONFIG_FILES " '$ac_optarg'"
    1765417563    ac_need_defaults=false;;
    17655   --header | --heade | --head | --hea )
    17656     $ac_shift
    17657     case $ac_optarg in
    17658     *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
    17659     esac
    17660     as_fn_append CONFIG_HEADERS " '$ac_optarg'"
    17661     ac_need_defaults=false;;
    17662   --he | --h)
    17663     # Conflict between --help and --header
    17664     as_fn_error $? "ambiguous option: \`$1'
    17665 Try \`$0 --help' for more information.";;
    17666   --help | --hel | -h )
     17564  --he | --h |  --help | --hel | -h )
    1766717565    $as_echo "$ac_cs_usage"; exit ;;
    1766817566  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
     
    1772717625macro_version='`$ECHO "$macro_version" | $SED "$delay_single_quote_subst"`'
    1772817626macro_revision='`$ECHO "$macro_revision" | $SED "$delay_single_quote_subst"`'
     17627enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`'
    1772917628enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`'
    17730 enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`'
    1773117629pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`'
    1773217630enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`'
     
    1811118009    "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;;
    1811218010    "prelude/Makefile") CONFIG_FILES="$CONFIG_FILES prelude/Makefile" ;;
    18113     "prelude/defines.hfa") CONFIG_HEADERS="$CONFIG_HEADERS prelude/defines.hfa" ;;
    1811418011
    1811518012  *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
     
    1812418021if $ac_need_defaults; then
    1812518022  test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
    18126   test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
    1812718023  test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands
    1812818024fi
     
    1831318209fi # test -n "$CONFIG_FILES"
    1831418210
    18315 # Set up the scripts for CONFIG_HEADERS section.
    18316 # No need to generate them if there are no CONFIG_HEADERS.
    18317 # This happens for instance with `./config.status Makefile'.
    18318 if test -n "$CONFIG_HEADERS"; then
    18319 cat >"$ac_tmp/defines.awk" <<\_ACAWK ||
    18320 BEGIN {
    18321 _ACEOF
    18322 
    18323 # Transform confdefs.h into an awk script `defines.awk', embedded as
    18324 # here-document in config.status, that substitutes the proper values into
    18325 # config.h.in to produce config.h.
    18326 
    18327 # Create a delimiter string that does not exist in confdefs.h, to ease
    18328 # handling of long lines.
    18329 ac_delim='%!_!# '
    18330 for ac_last_try in false false :; do
    18331   ac_tt=`sed -n "/$ac_delim/p" confdefs.h`
    18332   if test -z "$ac_tt"; then
    18333     break
    18334   elif $ac_last_try; then
    18335     as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5
    18336   else
    18337     ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
    18338   fi
    18339 done
    18340 
    18341 # For the awk script, D is an array of macro values keyed by name,
    18342 # likewise P contains macro parameters if any.  Preserve backslash
    18343 # newline sequences.
    18344 
    18345 ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]*
    18346 sed -n '
    18347 s/.\{148\}/&'"$ac_delim"'/g
    18348 t rset
    18349 :rset
    18350 s/^[     ]*#[    ]*define[       ][      ]*/ /
    18351 t def
    18352 d
    18353 :def
    18354 s/\\$//
    18355 t bsnl
    18356 s/["\\]/\\&/g
    18357 s/^ \('"$ac_word_re"'\)\(([^()]*)\)[     ]*\(.*\)/P["\1"]="\2"\
    18358 D["\1"]=" \3"/p
    18359 s/^ \('"$ac_word_re"'\)[         ]*\(.*\)/D["\1"]=" \2"/p
    18360 d
    18361 :bsnl
    18362 s/["\\]/\\&/g
    18363 s/^ \('"$ac_word_re"'\)\(([^()]*)\)[     ]*\(.*\)/P["\1"]="\2"\
    18364 D["\1"]=" \3\\\\\\n"\\/p
    18365 t cont
    18366 s/^ \('"$ac_word_re"'\)[         ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p
    18367 t cont
    18368 d
    18369 :cont
    18370 n
    18371 s/.\{148\}/&'"$ac_delim"'/g
    18372 t clear
    18373 :clear
    18374 s/\\$//
    18375 t bsnlc
    18376 s/["\\]/\\&/g; s/^/"/; s/$/"/p
    18377 d
    18378 :bsnlc
    18379 s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p
    18380 b cont
    18381 ' <confdefs.h | sed '
    18382 s/'"$ac_delim"'/"\\\
    18383 "/g' >>$CONFIG_STATUS || ac_write_fail=1
    18384 
    18385 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
    18386   for (key in D) D_is_set[key] = 1
    18387   FS = ""
    18388 }
    18389 /^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ {
    18390   line = \$ 0
    18391   split(line, arg, " ")
    18392   if (arg[1] == "#") {
    18393     defundef = arg[2]
    18394     mac1 = arg[3]
    18395   } else {
    18396     defundef = substr(arg[1], 2)
    18397     mac1 = arg[2]
    18398   }
    18399   split(mac1, mac2, "(") #)
    18400   macro = mac2[1]
    18401   prefix = substr(line, 1, index(line, defundef) - 1)
    18402   if (D_is_set[macro]) {
    18403     # Preserve the white space surrounding the "#".
    18404     print prefix "define", macro P[macro] D[macro]
    18405     next
    18406   } else {
    18407     # Replace #undef with comments.  This is necessary, for example,
    18408     # in the case of _POSIX_SOURCE, which is predefined and required
    18409     # on some systems where configure will not decide to define it.
    18410     if (defundef == "undef") {
    18411       print "/*", prefix defundef, macro, "*/"
    18412       next
    18413     }
    18414   }
    18415 }
    18416 { print }
    18417 _ACAWK
    18418 _ACEOF
    18419 cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
    18420   as_fn_error $? "could not setup config headers machinery" "$LINENO" 5
    18421 fi # test -n "$CONFIG_HEADERS"
    18422 
    18423 
    18424 eval set X "  :F $CONFIG_FILES  :H $CONFIG_HEADERS    :C $CONFIG_COMMANDS"
     18211
     18212eval set X "  :F $CONFIG_FILES      :C $CONFIG_COMMANDS"
    1842518213shift
    1842618214for ac_tag
     
    1864118429  || as_fn_error $? "could not create $ac_file" "$LINENO" 5
    1864218430 ;;
    18643   :H)
    18644   #
    18645   # CONFIG_HEADER
    18646   #
    18647   if test x"$ac_file" != x-; then
    18648     {
    18649       $as_echo "/* $configure_input  */" \
    18650       && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs"
    18651     } >"$ac_tmp/config.h" \
    18652       || as_fn_error $? "could not create $ac_file" "$LINENO" 5
    18653     if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then
    18654       { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5
    18655 $as_echo "$as_me: $ac_file is unchanged" >&6;}
    18656     else
    18657       rm -f "$ac_file"
    18658       mv "$ac_tmp/config.h" "$ac_file" \
    18659         || as_fn_error $? "could not create $ac_file" "$LINENO" 5
    18660     fi
    18661   else
    18662     $as_echo "/* $configure_input  */" \
    18663       && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \
    18664       || as_fn_error $? "could not create -" "$LINENO" 5
    18665   fi
    18666 # Compute "$ac_file"'s index in $config_headers.
    18667 _am_arg="$ac_file"
    18668 _am_stamp_count=1
    18669 for _am_header in $config_headers :; do
    18670   case $_am_header in
    18671     $_am_arg | $_am_arg:* )
    18672       break ;;
    18673     * )
    18674       _am_stamp_count=`expr $_am_stamp_count + 1` ;;
    18675   esac
    18676 done
    18677 echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" ||
    18678 $as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
    18679          X"$_am_arg" : 'X\(//\)[^/]' \| \
    18680          X"$_am_arg" : 'X\(//\)$' \| \
    18681          X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null ||
    18682 $as_echo X"$_am_arg" |
    18683     sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
    18684             s//\1/
    18685             q
    18686           }
    18687           /^X\(\/\/\)[^/].*/{
    18688             s//\1/
    18689             q
    18690           }
    18691           /^X\(\/\/\)$/{
    18692             s//\1/
    18693             q
    18694           }
    18695           /^X\(\/\).*/{
    18696             s//\1/
    18697             q
    18698           }
    18699           s/.*/./; q'`/stamp-h$_am_stamp_count
    18700  ;;
     18431
    1870118432
    1870218433  :C)  { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5
     
    1885618587macro_revision=$macro_revision
    1885718588
     18589# Whether or not to build shared libraries.
     18590build_libtool_libs=$enable_shared
     18591
    1885818592# Whether or not to build static libraries.
    1885918593build_old_libs=$enable_static
    18860 
    18861 # Whether or not to build shared libraries.
    18862 build_libtool_libs=$enable_shared
    1886318594
    1886418595# What type of objects to build.
  • libcfa/configure.ac

    r6a490b2 rb7d6a36  
    109109
    110110# Checks for programs.
    111 LT_INIT([disable-static])
     111LT_INIT
    112112
    113113AC_PROG_CXX
     
    118118AC_PROG_MAKE_SET
    119119
    120 AC_CHECK_HEADERS([linux/io_uring.h])
    121 AC_CHECK_FUNCS([preadv2 pwritev2])
    122 
    123120AC_CONFIG_FILES([
    124121        Makefile
     
    127124        ])
    128125
    129 AC_CONFIG_HEADERS(prelude/defines.hfa)
    130 
    131126AC_OUTPUT()
    132127
  • libcfa/prelude/Makefile.am

    r6a490b2 rb7d6a36  
    2121# put into lib for now
    2222cfalibdir = ${CFA_LIBDIR}
    23 cfalib_DATA = gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c defines.hfa
     23cfalib_DATA = gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c
    2424
    2525CC = @LOCAL_CFACC@
  • libcfa/prelude/Makefile.in

    r6a490b2 rb7d6a36  
    1 # Makefile.in generated by automake 1.15 from Makefile.am.
     1# Makefile.in generated by automake 1.16.1 from Makefile.am.
    22# @configure_input@
    33
    4 # Copyright (C) 1994-2014 Free Software Foundation, Inc.
     4# Copyright (C) 1994-2018 Free Software Foundation, Inc.
    55
    66# This Makefile.in is free software; the Free Software Foundation
     
    104104DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
    105105mkinstalldirs = $(install_sh) -d
    106 CONFIG_HEADER = defines.hfa
    107106CONFIG_CLEAN_FILES =
    108107CONFIG_CLEAN_VPATH_FILES =
     
    155154am__installdirs = "$(DESTDIR)$(cfalibdir)"
    156155DATA = $(cfalib_DATA)
    157 am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) \
    158         $(LISP)defines.hfa.in
    159 # Read a list of newline-separated strings from the standard input,
    160 # and print each of them once, without duplicates.  Input order is
    161 # *not* preserved.
    162 am__uniquify_input = $(AWK) '\
    163   BEGIN { nonempty = 0; } \
    164   { items[$$0] = 1; nonempty = 1; } \
    165   END { if (nonempty) { for (i in items) print i; }; } \
    166 '
    167 # Make sure the list of sources is unique.  This is necessary because,
    168 # e.g., the same source file might be shared among _SOURCES variables
    169 # for different programs/libraries.
    170 am__define_uniq_tagged_files = \
    171   list='$(am__tagged_files)'; \
    172   unique=`for i in $$list; do \
    173     if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
    174   done | $(am__uniquify_input)`
    175 ETAGS = etags
    176 CTAGS = ctags
    177 am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/defines.hfa.in
     156am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
     157am__DIST_COMMON = $(srcdir)/Makefile.in
    178158DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
    179159ACLOCAL = @ACLOCAL@
     
    326306# put into lib for now
    327307cfalibdir = ${CFA_LIBDIR}
    328 cfalib_DATA = gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c defines.hfa
     308cfalib_DATA = gcc-builtins.cf builtins.cf extras.cf prelude.cfa bootloader.c
    329309AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC @ARCH_FLAGS@ @CONFIG_CFLAGS@
    330310AM_CFAFLAGS = @CONFIG_CFAFLAGS@
    331311MOSTLYCLEANFILES = bootloader.c builtins.cf extras.cf gcc-builtins.c gcc-builtins.cf prelude.cfa
    332312MAINTAINERCLEANFILES = ${addprefix ${libdir}/,${cfalib_DATA}} ${addprefix ${libdir}/,${lib_LIBRARIES}}
    333 all: defines.hfa
    334         $(MAKE) $(AM_MAKEFLAGS) all-am
     313all: all-am
    335314
    336315.SUFFIXES:
     
    352331            cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
    353332          *) \
    354             echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
    355             cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
     333            echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
     334            cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
    356335        esac;
    357336
     
    364343        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
    365344$(am__aclocal_m4_deps):
    366 
    367 defines.hfa: stamp-h1
    368         @test -f $@ || rm -f stamp-h1
    369         @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) stamp-h1
    370 
    371 stamp-h1: $(srcdir)/defines.hfa.in $(top_builddir)/config.status
    372         @rm -f stamp-h1
    373         cd $(top_builddir) && $(SHELL) ./config.status prelude/defines.hfa
    374 $(srcdir)/defines.hfa.in:  $(am__configure_deps)
    375         ($(am__cd) $(top_srcdir) && $(AUTOHEADER))
    376         rm -f stamp-h1
    377         touch $@
    378 
    379 distclean-hdr:
    380         -rm -f defines.hfa stamp-h1
    381345
    382346mostlyclean-libtool:
     
    406370        files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
    407371        dir='$(DESTDIR)$(cfalibdir)'; $(am__uninstall_files_from_dir)
    408 
    409 ID: $(am__tagged_files)
    410         $(am__define_uniq_tagged_files); mkid -fID $$unique
    411 tags: tags-am
    412 TAGS: tags
    413 
    414 tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
    415         set x; \
    416         here=`pwd`; \
    417         $(am__define_uniq_tagged_files); \
    418         shift; \
    419         if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
    420           test -n "$$unique" || unique=$$empty_fix; \
    421           if test $$# -gt 0; then \
    422             $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
    423               "$$@" $$unique; \
    424           else \
    425             $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
    426               $$unique; \
    427           fi; \
    428         fi
    429 ctags: ctags-am
    430 
    431 CTAGS: ctags
    432 ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
    433         $(am__define_uniq_tagged_files); \
    434         test -z "$(CTAGS_ARGS)$$unique" \
    435           || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
    436              $$unique
    437 
    438 GTAGS:
    439         here=`$(am__cd) $(top_builddir) && pwd` \
    440           && $(am__cd) $(top_srcdir) \
    441           && gtags -i $(GTAGS_ARGS) "$$here"
    442 cscopelist: cscopelist-am
    443 
    444 cscopelist-am: $(am__tagged_files)
    445         list='$(am__tagged_files)'; \
    446         case "$(srcdir)" in \
    447           [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
    448           *) sdir=$(subdir)/$(srcdir) ;; \
    449         esac; \
    450         for i in $$list; do \
    451           if test -f "$$i"; then \
    452             echo "$(subdir)/$$i"; \
    453           else \
    454             echo "$$sdir/$$i"; \
    455           fi; \
    456         done >> $(top_builddir)/cscope.files
    457 
    458 distclean-tags:
    459         -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
    460 
    461 distdir: $(DISTFILES)
     372tags TAGS:
     373
     374ctags CTAGS:
     375
     376cscope cscopelist:
     377
     378
     379distdir: $(BUILT_SOURCES)
     380        $(MAKE) $(AM_MAKEFLAGS) distdir-am
     381
     382distdir-am: $(DISTFILES)
    462383        @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
    463384        topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
     
    491412check-am: all-am
    492413check: check-am
    493 all-am: Makefile $(DATA) defines.hfa
     414all-am: Makefile $(DATA)
    494415installdirs:
    495416        for dir in "$(DESTDIR)$(cfalibdir)"; do \
     
    534455distclean: distclean-am
    535456        -rm -f Makefile
    536 distclean-am: clean-am distclean-generic distclean-hdr distclean-tags
     457distclean-am: clean-am distclean-generic
    537458
    538459dvi: dvi-am
     
    595516uninstall-am: uninstall-cfalibDATA
    596517
    597 .MAKE: all install-am install-strip
    598 
    599 .PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
    600         clean-libtool cscopelist-am ctags ctags-am distclean \
    601         distclean-generic distclean-hdr distclean-libtool \
    602         distclean-tags distdir dvi dvi-am html html-am info info-am \
     518.MAKE: install-am install-strip
     519
     520.PHONY: all all-am check check-am clean clean-generic clean-libtool \
     521        cscopelist-am ctags-am distclean distclean-generic \
     522        distclean-libtool distdir dvi dvi-am html html-am info info-am \
    603523        install install-am install-cfalibDATA install-data \
    604524        install-data-am install-dvi install-dvi-am install-exec \
     
    609529        maintainer-clean-generic maintainer-clean-local mostlyclean \
    610530        mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
    611         tags tags-am uninstall uninstall-am uninstall-cfalibDATA
     531        tags-am uninstall uninstall-am uninstall-cfalibDATA
    612532
    613533.PRECIOUS: Makefile
  • libcfa/prelude/builtins.c

    r6a490b2 rb7d6a36  
    4848void exit( int status, const char fmt[], ... ) __attribute__ (( format(printf, 2, 3), __nothrow__, __leaf__, __noreturn__ ));
    4949void abort( const char fmt[], ... ) __attribute__ (( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ ));
    50 
    51 forall(dtype T)
    52 static inline T & identity(T & i) {
    53         return i;
    54 }
    55 
    56 // generator support
    57 struct $generator {
    58         inline int;
    59 };
    60 
    61 static inline void  ?{}($generator & this) { ((int&)this) = 0; }
    62 static inline void ^?{}($generator &) {}
    63 
    64 trait is_generator(dtype T) {
    65       void main(T & this);
    66       $generator * get_generator(T & this);
    67 };
    68 
    69 forall(dtype T | is_generator(T))
    70 static inline T & resume(T & gen) {
    71         main(gen);
    72         return gen;
    73 }
    7450
    7551// implicit increment, decrement if += defined, and implicit not if != defined
  • libcfa/src/Makefile.am

    r6a490b2 rb7d6a36  
    1111## Created On       : Sun May 31 08:54:01 2015
    1212## Last Modified By : Peter A. Buhr
    13 ## Last Modified On : Mon Mar 16 18:07:59 2020
    14 ## Update Count     : 242
     13## Last Modified On : Mon Jul 15 22:43:27 2019
     14## Update Count     : 241
    1515###############################################################################
    1616
     
    3333# The built sources must not depend on the installed headers
    3434AM_CFAFLAGS = -quiet -cfalib -I$(srcdir)/stdhdr $(if $(findstring ${gdbwaittarget}, ${@}), -XCFA --gdb) @CONFIG_CFAFLAGS@
    35 AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC -fexceptions -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@
     35AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@
    3636AM_CCASFLAGS = -g -Wall -Wno-unused-function @ARCH_FLAGS@ @CONFIG_CFLAGS@
    3737CFACC = @CFACC@
     
    3939#----------------------------------------------------------------------------------------------------------------
    4040if BUILDLIB
    41 headers_nosrc = bitmanip.hfa math.hfa gmp.hfa time_t.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa containers/list.hfa
     41headers_nosrc = math.hfa gmp.hfa time_t.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa
    4242headers = fstream.hfa iostream.hfa iterator.hfa limits.hfa rational.hfa time.hfa stdlib.hfa common.hfa \
    4343          containers/maybe.hfa containers/pair.hfa containers/result.hfa containers/vector.hfa
     
    4848thread_headers_nosrc = concurrency/invoke.h
    4949thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa
    50 thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/io.cfa concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa}
     50thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa}
    5151else
    5252headers =
  • libcfa/src/Makefile.in

    r6a490b2 rb7d6a36  
    105105        $(am__nobase_cfa_include_HEADERS_DIST) $(am__DIST_COMMON)
    106106mkinstalldirs = $(install_sh) -d
    107 CONFIG_HEADER = $(top_builddir)/prelude/defines.hfa
    108107CONFIG_CLEAN_FILES =
    109108CONFIG_CLEAN_VPATH_FILES =
     
    165164am__libcfathread_la_SOURCES_DIST =  \
    166165        concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa \
    167         concurrency/invoke.c concurrency/io.cfa \
    168         concurrency/preemption.cfa concurrency/ready_queue.cfa \
    169         concurrency/coroutine.cfa concurrency/thread.cfa \
    170         concurrency/kernel.cfa concurrency/monitor.cfa \
    171         concurrency/mutex.cfa
     166        concurrency/invoke.c concurrency/preemption.cfa \
     167        concurrency/ready_queue.cfa concurrency/coroutine.cfa \
     168        concurrency/thread.cfa concurrency/kernel.cfa \
     169        concurrency/monitor.cfa concurrency/mutex.cfa
    172170@BUILDLIB_TRUE@am__objects_3 = concurrency/coroutine.lo \
    173171@BUILDLIB_TRUE@ concurrency/thread.lo concurrency/kernel.lo \
     
    176174@BUILDLIB_TRUE@ concurrency/CtxSwitch-@ARCHITECTURE@.lo \
    177175@BUILDLIB_TRUE@ concurrency/alarm.lo concurrency/invoke.lo \
    178 @BUILDLIB_TRUE@ concurrency/io.lo concurrency/preemption.lo \
     176@BUILDLIB_TRUE@ concurrency/preemption.lo \
    179177@BUILDLIB_TRUE@ concurrency/ready_queue.lo $(am__objects_3)
    180178am_libcfathread_la_OBJECTS = $(am__objects_4)
     
    196194am__v_at_0 = @
    197195am__v_at_1 =
    198 DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/prelude
     196DEFAULT_INCLUDES = -I.@am__isrc@
    199197depcomp = $(SHELL) $(top_srcdir)/automake/depcomp
    200198am__depfiles_maybe = depfiles
     
    240238        limits.hfa rational.hfa time.hfa stdlib.hfa common.hfa \
    241239        containers/maybe.hfa containers/pair.hfa containers/result.hfa \
    242         containers/vector.hfa bitmanip.hfa math.hfa gmp.hfa time_t.hfa \
     240        containers/vector.hfa math.hfa gmp.hfa time_t.hfa \
    243241        bits/align.hfa bits/containers.hfa bits/defs.hfa \
    244         bits/debug.hfa bits/locks.hfa containers/list.hfa \
    245         concurrency/coroutine.hfa concurrency/thread.hfa \
    246         concurrency/kernel.hfa concurrency/monitor.hfa \
    247         concurrency/mutex.hfa concurrency/invoke.h
     242        bits/debug.hfa bits/locks.hfa concurrency/coroutine.hfa \
     243        concurrency/thread.hfa concurrency/kernel.hfa \
     244        concurrency/monitor.hfa concurrency/mutex.hfa \
     245        concurrency/invoke.h
    248246HEADERS = $(nobase_cfa_include_HEADERS)
    249247am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
     
    435433am__v_GOC_0 = @echo "  GOC     " $@;
    436434am__v_GOC_1 =
    437 AM_V_PY = $(am__v_PY_@AM_V@)
    438 am__v_PY_ = $(am__v_PY_@AM_DEFAULT_V@)
    439 am__v_PY_0 = @echo "  PYTHON  " $@;
    440 am__v_PY_1 =
    441435AM_V_RUST = $(am__v_RUST_@AM_V@)
    442436am__v_RUST_ = $(am__v_RUST_@AM_DEFAULT_V@)
    443 am__v_RUST_0 = @echo "  RUST    " $@;
     437am__v_RUST_0 = @echo "  RUST     " $@;
    444438am__v_RUST_1 =
    445439AM_V_NODEJS = $(am__v_NODEJS_@AM_V@)
    446440am__v_NODEJS_ = $(am__v_NODEJS_@AM_DEFAULT_V@)
    447 am__v_NODEJS_0 = @echo "  NODEJS  " $@;
     441am__v_NODEJS_0 = @echo "  NODEJS     " $@;
    448442am__v_NODEJS_1 =
    449443AM_V_JAVAC = $(am__v_JAVAC_@AM_V@)
     
    459453# The built sources must not depend on the installed headers
    460454AM_CFAFLAGS = -quiet -cfalib -I$(srcdir)/stdhdr $(if $(findstring ${gdbwaittarget}, ${@}), -XCFA --gdb) @CONFIG_CFAFLAGS@
    461 AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC -fexceptions -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@
     455AM_CFLAGS = -g -Wall -Wno-unused-function -fPIC -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@
    462456AM_CCASFLAGS = -g -Wall -Wno-unused-function @ARCH_FLAGS@ @CONFIG_CFLAGS@
    463457@BUILDLIB_FALSE@headers_nosrc =
    464458
    465459#----------------------------------------------------------------------------------------------------------------
    466 @BUILDLIB_TRUE@headers_nosrc = bitmanip.hfa math.hfa gmp.hfa time_t.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa containers/list.hfa
     460@BUILDLIB_TRUE@headers_nosrc = math.hfa gmp.hfa time_t.hfa bits/align.hfa bits/containers.hfa bits/defs.hfa bits/debug.hfa bits/locks.hfa
    467461@BUILDLIB_FALSE@headers =
    468462@BUILDLIB_TRUE@headers = fstream.hfa iostream.hfa iterator.hfa limits.hfa rational.hfa time.hfa stdlib.hfa common.hfa \
     
    477471@BUILDLIB_FALSE@thread_headers =
    478472@BUILDLIB_TRUE@thread_headers = concurrency/coroutine.hfa concurrency/thread.hfa concurrency/kernel.hfa concurrency/monitor.hfa concurrency/mutex.hfa
    479 @BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/io.cfa concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa}
     473@BUILDLIB_TRUE@thread_libsrc = concurrency/CtxSwitch-@ARCHITECTURE@.S concurrency/alarm.cfa concurrency/invoke.c concurrency/preemption.cfa concurrency/ready_queue.cfa ${thread_headers:.hfa=.cfa}
    480474
    481475#----------------------------------------------------------------------------------------------------------------
     
    611605        concurrency/$(DEPDIR)/$(am__dirstamp)
    612606concurrency/invoke.lo: concurrency/$(am__dirstamp) \
    613         concurrency/$(DEPDIR)/$(am__dirstamp)
    614 concurrency/io.lo: concurrency/$(am__dirstamp) \
    615607        concurrency/$(DEPDIR)/$(am__dirstamp)
    616608concurrency/preemption.lo: concurrency/$(am__dirstamp) \
  • libcfa/src/bits/containers.hfa

    r6a490b2 rb7d6a36  
    146146        static inline forall( dtype T | is_node(T) ) {
    147147                void ?{}( __queue(T) & this ) with( this ) {
    148                         head{ 1p };
     148                        head{ 0p };
    149149                        tail{ &head };
    150                         verify(*tail == 1p);
    151150                }
    152151
    153152                void append( __queue(T) & this, T * val ) with( this ) {
    154153                        verify(tail != 0p);
    155                         verify(*tail == 1p);
    156154                        *tail = val;
    157155                        tail = &get_next( *val );
    158                         *tail = 1p;
    159156                }
    160157
    161158                T * pop_head( __queue(T) & this ) {
    162                         verify(*this.tail == 1p);
    163159                        T * head = this.head;
    164                         if( head != 1p ) {
     160                        if( head ) {
    165161                                this.head = get_next( *head );
    166                                 if( get_next( *head ) == 1p ) {
     162                                if( !get_next( *head ) ) {
    167163                                        this.tail = &this.head;
    168164                                }
    169165                                get_next( *head ) = 0p;
    170                                 verify(*this.tail == 1p);
    171                                 verify( get_next(*head) == 0p );
    172                                 return head;
    173                         }
    174                         verify(*this.tail == 1p);
    175                         return 0p;
     166                        }
     167                        return head;
    176168                }
    177169
     
    188180                        get_next( *val ) = 0p;
    189181
    190                         verify( (head == 1p) == (&head == tail) );
    191                         verify( *tail == 1p );
     182                        verify( (head == 0p) == (&head == tail) );
     183                        verify( *tail == 0p );
    192184                        return val;
    193185                }
     
    274266                        return this.head != 0;
    275267                }
    276 
    277                 void move_to_front( __dllist(T) & src, __dllist(T) & dst, T & node ) {
    278                         remove    (src, node);
    279                         push_front(dst, node);
    280                 }
    281268        }
    282269        #undef next
  • libcfa/src/bits/debug.hfa

    r6a490b2 rb7d6a36  
    99// Author           : Thierry Delisle
    1010// Created On       : Mon Nov 28 12:27:26 2016
    11 // Last Modified By : Andrew Beach
    12 // Last Modified On : Mon Apr 27 10:15:00 2020
    13 // Update Count     : 10
     11// Last Modified By : Peter A. Buhr
     12// Last Modified On : Tue Feb  4 12:29:21 2020
     13// Update Count     : 9
    1414//
    1515
     
    2323        #define __cfaabi_dbg_ctx_param const char caller[]
    2424        #define __cfaabi_dbg_ctx_param2 , const char caller[]
    25         #define __cfaabi_dbg_ctx_fwd caller
    26         #define __cfaabi_dbg_ctx_fwd2 , caller
    2725#else
    2826        #define __cfaabi_dbg_debug_do(...)
     
    3230        #define __cfaabi_dbg_ctx_param
    3331        #define __cfaabi_dbg_ctx_param2
    34         #define __cfaabi_dbg_ctx_fwd
    35         #define __cfaabi_dbg_ctx_fwd2
    3632#endif
    3733
     
    4036#endif
    4137        #include <stdarg.h>
     38        #include <stdio.h>
     39        #include <unistd.h>
    4240
    4341        extern void __cfaabi_bits_write( int fd, const char buffer[], int len );
     
    4846        extern void __cfaabi_bits_print_vararg( int fd, const char fmt[], va_list arg );
    4947        extern void __cfaabi_bits_print_buffer( int fd, char buffer[], int buffer_size, const char fmt[], ... ) __attribute__(( format(printf, 4, 5) ));
    50 
    51 #if defined(__CFA_DEBUG_PRINT__) \
    52                 || defined(__CFA_DEBUG_PRINT_IO__) || defined(__CFA_DEBUG_PRINT_IO_CORE__) \
    53                 || defined(__CFA_DEBUG_PRINT_MONITOR__) || defined(__CFA_DEBUG_PRINT_PREEMPTION__) \
    54                 || defined(__CFA_DEBUG_PRINT_RUNTIME_CORE__) || defined(__CFA_DEBUG_PRINT_EXCEPTION__)
    55         #include <stdio.h>
    56         #include <unistd.h>
    57 #endif
    5848#ifdef __cforall
    5949}
    6050#endif
    6151
    62 // Deprecated: Use the versions with the new module names.
     52// #define __CFA_DEBUG_PRINT__
     53
    6354#ifdef __CFA_DEBUG_PRINT__
    6455        #define __cfaabi_dbg_write( buffer, len )         __cfaabi_bits_write( STDERR_FILENO, buffer, len )
    6556        #define __cfaabi_dbg_acquire()                    __cfaabi_bits_acquire()
    6657        #define __cfaabi_dbg_release()                    __cfaabi_bits_release()
    67         #define __cfaabi_dbg_print_safe(...)              __cfaabi_bits_print_safe   ( STDERR_FILENO, __VA_ARGS__ )
    68         #define __cfaabi_dbg_print_nolock(...)            __cfaabi_bits_print_nolock ( STDERR_FILENO, __VA_ARGS__ )
    69         #define __cfaabi_dbg_print_buffer(...)            __cfaabi_bits_print_buffer ( STDERR_FILENO, __VA_ARGS__ )
     58        #define __cfaabi_dbg_print_safe(...)              __cfaabi_bits_print_safe  ( STDERR_FILENO, __VA_ARGS__ )
     59        #define __cfaabi_dbg_print_nolock(...)            __cfaabi_bits_print_nolock( STDERR_FILENO, __VA_ARGS__ )
     60        #define __cfaabi_dbg_print_buffer(...)            __cfaabi_bits_print_buffer( STDERR_FILENO, __VA_ARGS__ )
    7061        #define __cfaabi_dbg_print_buffer_decl(...)       char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( STDERR_FILENO, __dbg_text, __dbg_len );
    71         #define __cfaabi_dbg_print_buffer_local(...)      __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_dbg_write( STDERR_FILENO, __dbg_text, __dbg_len );
     62        #define __cfaabi_dbg_print_buffer_local(...)      __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( STDERR_FILENO, __dbg_text, __dbg_len );
    7263#else
    7364        #define __cfaabi_dbg_write(...)               ((void)0)
     
    8172#endif
    8273
    83 // Debug print functions and statements:
    84 // Most are wrappers around the bits printing function but are not always used.
    85 // If they are used depends if the group (first argument) is active or not. The group must be one
    86 // defined belowe. The other arguments depend on the wrapped function.
    87 #define __cfadbg_write(group, buffer, len) \
    88         __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_write(STDERR_FILENO, buffer, len))
    89 #define __cfadbg_acquire(group) \
    90         __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_acquire())
    91 #define __cfadbg_release(group) \
    92         __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_release())
    93 #define __cfadbg_print_safe(group, ...) \
    94         __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_print_safe(STDERR_FILENO, __VA_ARGS__))
    95 #define __cfadbg_print_nolock(group, ...) \
    96         __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_print_nolock(STDERR_FILENO, __VA_ARGS__))
    97 #define __cfadbg_print_buffer(group, ...) \
    98         __CFADBG_PRINT_GROUP_##group(__cfaabi_bits_print_buffer(STDERR_FILENO, __VA_ARGS__))
    99 #define __cfadbg_print_buffer_decl(group, ...) \
    100         __CFADBG_PRINT_GROUP_##group(char __dbg_text[256]; int __dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write( __dbg_text, __dbg_len ))
    101 #define __cfadbg_print_buffer_local(group, ...) \
    102         __CFADBG_PRINT_GROUP_##group(__dbg_len = snprintf( __dbg_text, 256, __VA_ARGS__ ); __cfaabi_bits_write(STDERR_FILENO, __dbg_text, __dbg_len))
    103 
    104 // The debug print groups:
    105 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_IO__)
    106 #       define __CFADBG_PRINT_GROUP_io(...) __VA_ARGS__
    107 #else
    108 #       define __CFADBG_PRINT_GROUP_io(...) ((void)0)
    109 #endif
    110 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_IO__) || defined(__CFA_DEBUG_PRINT_IO_CORE__)
    111 #       define __CFADBG_PRINT_GROUP_io_core(...) __VA_ARGS__
    112 #else
    113 #       define __CFADBG_PRINT_GROUP_io_core(...) ((void)0)
    114 #endif
    115 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_MONITOR__)
    116 #       define __CFADBG_PRINT_GROUP_monitor(...) __VA_ARGS__
    117 #else
    118 #       define __CFADBG_PRINT_GROUP_monitor(...) ((void)0)
    119 #endif
    120 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_PREEMPTION__)
    121 #       define __CFADBG_PRINT_GROUP_preemption(...) __VA_ARGS__
    122 #else
    123 #       define __CFADBG_PRINT_GROUP_preemption(...) ((void)0)
    124 #endif
    125 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_RUNTIME_CORE__)
    126 #       define __CFADBG_PRINT_GROUP_runtime_core(...) __VA_ARGS__
    127 #else
    128 #       define __CFADBG_PRINT_GROUP_runtime_core(...) ((void)0)
    129 #endif
    130 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_READY_QUEUE__)
    131 #       define __CFADBG_PRINT_GROUP_ready_queue(...) __VA_ARGS__
    132 #else
    133 #       define __CFADBG_PRINT_GROUP_ready_queue(...) ((void)0)
    134 #endif
    135 #if defined(__CFA_DEBUG_PRINT__) || defined(__CFA_DEBUG_PRINT_EXCEPTION__)
    136 #       define __CFADBG_PRINT_GROUP_exception(...) __VA_ARGS__
    137 #else
    138 #       define __CFADBG_PRINT_GROUP_exception(...) ((void)0)
    139 #endif
    140 
    14174// Local Variables: //
    14275// mode: c //
  • libcfa/src/bits/locks.hfa

    r6a490b2 rb7d6a36  
    5454
    5555                #ifdef __CFA_DEBUG__
    56                         void __cfaabi_dbg_record_lock(__spinlock_t & this, const char prev_name[]);
     56                        void __cfaabi_dbg_record(__spinlock_t & this, const char prev_name[]);
    5757                #else
    58                         #define __cfaabi_dbg_record_lock(x, y)
     58                        #define __cfaabi_dbg_record(x, y)
    5959                #endif
    6060        }
     61
     62        extern void yield( unsigned int );
    6163
    6264        static inline void ?{}( __spinlock_t & this ) {
     
    6668        // Lock the spinlock, return false if already acquired
    6769        static inline bool try_lock  ( __spinlock_t & this __cfaabi_dbg_ctx_param2 ) {
    68                 disable_interrupts();
    6970                bool result = (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0);
    7071                if( result ) {
    71                         __cfaabi_dbg_record_lock( this, caller );
    72                 } else {
    73                         enable_interrupts_noPoll();
     72                        disable_interrupts();
     73                        __cfaabi_dbg_record( this, caller );
    7474                }
    7575                return result;
     
    8383                #endif
    8484
    85                 disable_interrupts();
    8685                for ( unsigned int i = 1;; i += 1 ) {
    8786                        if ( (this.lock == 0) && (__atomic_test_and_set( &this.lock, __ATOMIC_ACQUIRE ) == 0) ) break;
     
    9998                        #endif
    10099                }
    101                 __cfaabi_dbg_record_lock( this, caller );
     100                disable_interrupts();
     101                __cfaabi_dbg_record( this, caller );
    102102        }
    103103
    104104        static inline void unlock( __spinlock_t & this ) {
     105                enable_interrupts_noPoll();
    105106                __atomic_clear( &this.lock, __ATOMIC_RELEASE );
    106                 enable_interrupts_noPoll();
    107107        }
    108108
     
    112112        #endif
    113113
    114         extern "C" {
    115                 char * strerror(int);
    116         }
    117         #define CHECKED(x) { int err = x; if( err != 0 ) abort("KERNEL ERROR: Operation \"" #x "\" return error %d - %s\n", err, strerror(err)); }
    118 
    119114        struct __bin_sem_t {
     115                bool                    signaled;
    120116                pthread_mutex_t         lock;
    121117                pthread_cond_t          cond;
    122                 int                     val;
    123118        };
    124119
    125120        static inline void ?{}(__bin_sem_t & this) with( this ) {
    126                 // Create the mutex with error checking
    127                 pthread_mutexattr_t mattr;
    128                 pthread_mutexattr_init( &mattr );
    129                 pthread_mutexattr_settype( &mattr, PTHREAD_MUTEX_ERRORCHECK_NP);
    130                 pthread_mutex_init(&lock, &mattr);
    131 
    132                 pthread_cond_init (&cond, 0p);
    133                 val = 0;
     121                signaled = false;
     122                pthread_mutex_init(&lock, NULL);
     123                pthread_cond_init (&cond, NULL);
    134124        }
    135125
    136126        static inline void ^?{}(__bin_sem_t & this) with( this ) {
    137                 CHECKED( pthread_mutex_destroy(&lock) );
    138                 CHECKED( pthread_cond_destroy (&cond) );
     127                pthread_mutex_destroy(&lock);
     128                pthread_cond_destroy (&cond);
    139129        }
    140130
    141131        static inline void wait(__bin_sem_t & this) with( this ) {
    142132                verify(__cfaabi_dbg_in_kernel());
    143                 CHECKED( pthread_mutex_lock(&lock) );
    144                         while(val < 1) {
     133                pthread_mutex_lock(&lock);
     134                        if(!signaled) {   // this must be a loop, not if!
    145135                                pthread_cond_wait(&cond, &lock);
    146136                        }
    147                         val -= 1;
    148                 CHECKED( pthread_mutex_unlock(&lock) );
     137                        signaled = false;
     138                pthread_mutex_unlock(&lock);
    149139        }
    150140
    151         static inline bool post(__bin_sem_t & this) with( this ) {
    152                 bool needs_signal = false;
     141        static inline void post(__bin_sem_t & this) with( this ) {
     142                verify(__cfaabi_dbg_in_kernel());
    153143
    154                 CHECKED( pthread_mutex_lock(&lock) );
    155                         if(val < 1) {
    156                                 val += 1;
    157                                 pthread_cond_signal(&cond);
    158                                 needs_signal = true;
    159                         }
    160                 CHECKED( pthread_mutex_unlock(&lock) );
     144                pthread_mutex_lock(&lock);
     145                        bool needs_signal = !signaled;
     146                        signaled = true;
     147                pthread_mutex_unlock(&lock);
    161148
    162                 return needs_signal;
     149                if (needs_signal)
     150                        pthread_cond_signal(&cond);
    163151        }
    164 
    165         #undef CHECKED
    166152#endif
  • libcfa/src/bits/signal.hfa

    r6a490b2 rb7d6a36  
    5454                        sig, handler, flags, errno, strerror( errno )
    5555                );
    56                 _Exit( EXIT_FAILURE );
     56                _exit( EXIT_FAILURE );
    5757        } // if
    5858}
  • libcfa/src/concurrency/CtxSwitch-arm.S

    r6a490b2 rb7d6a36  
    1313        .text
    1414        .align  2
    15         .global __cfactx_switch
    16         .type   __cfactx_switch, %function
     15        .global CtxSwitch
     16        .type   CtxSwitch, %function
    1717
    18 __cfactx_switch:
     18CtxSwitch:
    1919        @ save callee-saved registers: r4-r8, r10, r11, r13(sp) (plus r9 depending on platform specification)
    2020        @ I've seen reference to 31 registers on 64-bit, if this is the case, more need to be saved
     
    5252        mov r15, r14
    5353        #endif // R9_SPECIAL
    54 
     54       
    5555        .text
    5656        .align  2
    57         .global __cfactx_invoke_stub
    58         .type   __cfactx_invoke_stub, %function
     57        .global CtxInvokeStub
     58        .type   CtxInvokeStub, %function
    5959
    60 __cfactx_invoke_stub:
     60CtxInvokeStub:
    6161        ldmfd r13!, {r0-r1}
    6262        mov r15, r1
  • libcfa/src/concurrency/CtxSwitch-i386.S

    r6a490b2 rb7d6a36  
    4343        .text
    4444        .align 2
    45         .globl __cfactx_switch
    46         .type  __cfactx_switch, @function
    47 __cfactx_switch:
     45        .globl CtxSwitch
     46        .type  CtxSwitch, @function
     47CtxSwitch:
    4848
    4949        // Copy the "from" context argument from the stack to register eax
     
    8383
    8484        ret
    85         .size  __cfactx_switch, .-__cfactx_switch
     85        .size  CtxSwitch, .-CtxSwitch
    8686
    8787// Local Variables: //
  • libcfa/src/concurrency/CtxSwitch-x86_64.S

    r6a490b2 rb7d6a36  
    4444        .text
    4545        .align 2
    46         .globl __cfactx_switch
    47         .type  __cfactx_switch, @function
    48 __cfactx_switch:
     46        .globl CtxSwitch
     47        .type  CtxSwitch, @function
     48CtxSwitch:
    4949
    5050        // Save volatile registers on the stack.
     
    7777
    7878        ret
    79         .size  __cfactx_switch, .-__cfactx_switch
     79        .size  CtxSwitch, .-CtxSwitch
    8080
    8181//-----------------------------------------------------------------------------
     
    8383        .text
    8484        .align 2
    85         .globl __cfactx_invoke_stub
    86         .type    __cfactx_invoke_stub, @function
    87 __cfactx_invoke_stub:
     85        .globl CtxInvokeStub
     86        .type    CtxInvokeStub, @function
     87CtxInvokeStub:
    8888        movq %rbx, %rdi
    8989        movq %r12, %rsi
    9090        jmp *%r13
    91         .size  __cfactx_invoke_stub, .-__cfactx_invoke_stub
     91        .size  CtxInvokeStub, .-CtxInvokeStub
    9292
    9393// Local Variables: //
  • libcfa/src/concurrency/alarm.cfa

    r6a490b2 rb7d6a36  
    4747//=============================================================================================
    4848
    49 void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period ) with( this ) {
     49void ?{}( alarm_node_t & this, thread_desc * thrd, Time alarm, Duration period ) with( this ) {
    5050        this.thrd = thrd;
    5151        this.alarm = alarm;
    5252        this.period = period;
     53        next = 0;
    5354        set = false;
    5455        kernel_alarm = false;
     
    5960        this.alarm = alarm;
    6061        this.period = period;
     62        next = 0;
    6163        set = false;
    6264        kernel_alarm = true;
     
    6971}
    7072
    71 void insert( alarm_list_t * this, alarm_node_t * n ) {
    72         alarm_node_t * it = & (*this)`first;
    73         while( it && (n->alarm > it->alarm) ) {
    74                 it = & (*it)`next;
    75         }
    76         if ( it ) {
    77                 insert_before( *it, *n );
    78         } else {
    79                 insert_last(*this, *n);
     73#if !defined(NDEBUG) && (defined(__CFA_DEBUG__) || defined(__CFA_VERIFY__))
     74bool validate( alarm_list_t * this ) {
     75        alarm_node_t ** it = &this->head;
     76        while( (*it) ) {
     77                it = &(*it)->next;
    8078        }
    8179
    82         verify( validate( *this ) );
     80        return it == this->tail;
     81}
     82#endif
     83
     84static inline void insert_at( alarm_list_t * this, alarm_node_t * n, __alarm_it_t p ) {
     85        verify( !n->next );
     86        if( p == this->tail ) {
     87                this->tail = &n->next;
     88        }
     89        else {
     90                n->next = *p;
     91        }
     92        *p = n;
     93
     94        verify( validate( this ) );
     95}
     96
     97void insert( alarm_list_t * this, alarm_node_t * n ) {
     98        alarm_node_t ** it = &this->head;
     99        while( (*it) && (n->alarm > (*it)->alarm) ) {
     100                it = &(*it)->next;
     101        }
     102
     103        insert_at( this, n, it );
     104
     105        verify( validate( this ) );
    83106}
    84107
    85108alarm_node_t * pop( alarm_list_t * this ) {
    86         verify( validate( *this ) );
    87         alarm_node_t * head = & (*this)`first;
     109        alarm_node_t * head = this->head;
    88110        if( head ) {
    89                 remove(*head);
     111                this->head = head->next;
     112                if( !head->next ) {
     113                        this->tail = &this->head;
     114                }
     115                head->next = 0p;
    90116        }
    91         verify( validate( *this ) );
     117        verify( validate( this ) );
    92118        return head;
    93119}
    94120
     121static inline void remove_at( alarm_list_t * this, alarm_node_t * n, __alarm_it_t it ) {
     122        verify( it );
     123        verify( (*it) == n );
     124
     125        (*it) = n->next;
     126        if( !n-> next ) {
     127                this->tail = it;
     128        }
     129        n->next = 0p;
     130
     131        verify( validate( this ) );
     132}
     133
     134static inline void remove( alarm_list_t * this, alarm_node_t * n ) {
     135        alarm_node_t ** it = &this->head;
     136        while( (*it) && (*it) != n ) {
     137                it = &(*it)->next;
     138        }
     139
     140        verify( validate( this ) );
     141
     142        if( *it ) { remove_at( this, n, it ); }
     143
     144        verify( validate( this ) );
     145}
     146
    95147void register_self( alarm_node_t * this ) {
    96         alarm_list_t & alarms = event_kernel->alarms;
     148        alarm_list_t * alarms = &event_kernel->alarms;
    97149
    98150        disable_interrupts();
     
    100152        {
    101153                verify( validate( alarms ) );
    102                 bool first = ! & alarms`first;
     154                bool first = !alarms->head;
    103155
    104                 insert( &alarms, this );
     156                insert( alarms, this );
    105157                if( first ) {
    106                         __kernel_set_timer( alarms`first.alarm - __kernel_get_time() );
     158                        __kernel_set_timer( alarms->head->alarm - __kernel_get_time() );
    107159                }
    108160        }
     
    116168        lock( event_kernel->lock __cfaabi_dbg_ctx2 );
    117169        {
    118                 verify( validate( event_kernel->alarms ) );
    119                 remove( *this );
     170                verify( validate( &event_kernel->alarms ) );
     171                remove( &event_kernel->alarms, this );
    120172        }
    121173        unlock( event_kernel->lock );
     
    124176}
    125177
    126 //=============================================================================================
    127 // Utilities
    128 //=============================================================================================
    129 
    130 void sleep( Duration duration ) {
    131         alarm_node_t node = { active_thread(), __kernel_get_time() + duration, 0`s };
    132 
    133         register_self( &node );
    134         park( __cfaabi_dbg_ctx );
    135 
    136         /* paranoid */ verify( !node.set );
    137         /* paranoid */ verify( & node`next == 0p );
    138         /* paranoid */ verify( & node`prev == 0p );
    139 }
    140 
    141178// Local Variables: //
    142179// mode: c //
  • libcfa/src/concurrency/alarm.hfa

    r6a490b2 rb7d6a36  
    2323#include "time.hfa"
    2424
    25 #include <containers/list.hfa>
    26 
    27 struct $thread;
     25struct thread_desc;
    2826struct processor;
    2927
     
    4240        Time alarm;                             // time when alarm goes off
    4341        Duration period;                        // if > 0 => period of alarm
    44 
    45         DLISTED_MGD_IMPL_IN(alarm_node_t)
     42        alarm_node_t * next;            // intrusive link list field
    4643
    4744        union {
    48                 $thread * thrd; // thrd who created event
     45                thread_desc * thrd;     // thrd who created event
    4946                processor * proc;               // proc who created event
    5047        };
     
    5350        bool kernel_alarm       :1;             // true if this is not a user defined alarm
    5451};
    55 DLISTED_MGD_IMPL_OUT(alarm_node_t)
    5652
    57 void ?{}( alarm_node_t & this, $thread * thrd, Time alarm, Duration period );
     53typedef alarm_node_t ** __alarm_it_t;
     54
     55void ?{}( alarm_node_t & this, thread_desc * thrd, Time alarm, Duration period );
    5856void ?{}( alarm_node_t & this, processor   * proc, Time alarm, Duration period );
    5957void ^?{}( alarm_node_t & this );
    6058
    61 typedef dlist(alarm_node_t, alarm_node_t) alarm_list_t;
     59struct alarm_list_t {
     60        alarm_node_t * head;
     61        __alarm_it_t tail;
     62};
     63
     64static inline void ?{}( alarm_list_t & this ) with( this ) {
     65        head = 0;
     66        tail = &head;
     67}
    6268
    6369void insert( alarm_list_t * this, alarm_node_t * n );
  • libcfa/src/concurrency/coroutine.cfa

    r6a490b2 rb7d6a36  
    3737
    3838extern "C" {
    39         void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct $coroutine *) __attribute__ ((__noreturn__));
     39        void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct coroutine_desc *) __attribute__ ((__noreturn__));
    4040        static void _CtxCoroutine_UnwindCleanup(_Unwind_Reason_Code, struct _Unwind_Exception *) __attribute__ ((__noreturn__));
    4141        static void _CtxCoroutine_UnwindCleanup(_Unwind_Reason_Code, struct _Unwind_Exception *) {
     
    8989}
    9090
    91 void ?{}( $coroutine & this, const char name[], void * storage, size_t storageSize ) with( this ) {
     91void ?{}( coroutine_desc & this, const char name[], void * storage, size_t storageSize ) with( this ) {
    9292        (this.context){0p, 0p};
    9393        (this.stack){storage, storageSize};
     
    9999}
    100100
    101 void ^?{}($coroutine& this) {
     101void ^?{}(coroutine_desc& this) {
    102102        if(this.state != Halted && this.state != Start && this.state != Primed) {
    103                 $coroutine * src = TL_GET( this_thread )->curr_cor;
    104                 $coroutine * dst = &this;
     103                coroutine_desc * src = TL_GET( this_thread )->curr_cor;
     104                coroutine_desc * dst = &this;
    105105
    106106                struct _Unwind_Exception storage;
     
    115115                }
    116116
    117                 $ctx_switch( src, dst );
     117                CoroutineCtxSwitch( src, dst );
    118118        }
    119119}
     
    123123forall(dtype T | is_coroutine(T))
    124124void prime(T& cor) {
    125         $coroutine* this = get_coroutine(cor);
     125        coroutine_desc* this = get_coroutine(cor);
    126126        assert(this->state == Start);
    127127
     
    187187// is not inline (We can't inline Cforall in C)
    188188extern "C" {
    189         void __cfactx_cor_leave( struct $coroutine * src ) {
    190                 $coroutine * starter = src->cancellation != 0 ? src->last : src->starter;
     189        void __leave_coroutine( struct coroutine_desc * src ) {
     190                coroutine_desc * starter = src->cancellation != 0 ? src->last : src->starter;
    191191
    192192                src->state = Halted;
     
    201201                        src->name, src, starter->name, starter );
    202202
    203                 $ctx_switch( src, starter );
    204         }
    205 
    206         struct $coroutine * __cfactx_cor_finish(void) {
    207                 struct $coroutine * cor = kernelTLS.this_thread->curr_cor;
     203                CoroutineCtxSwitch( src, starter );
     204        }
     205
     206        struct coroutine_desc * __finish_coroutine(void) {
     207                struct coroutine_desc * cor = kernelTLS.this_thread->curr_cor;
    208208
    209209                if(cor->state == Primed) {
    210                         __cfactx_suspend();
     210                        suspend();
    211211                }
    212212
  • libcfa/src/concurrency/coroutine.hfa

    r6a490b2 rb7d6a36  
    2525trait is_coroutine(dtype T) {
    2626      void main(T & this);
    27       $coroutine * get_coroutine(T & this);
     27      coroutine_desc * get_coroutine(T & this);
    2828};
    2929
    30 #define DECL_COROUTINE(X) static inline $coroutine* get_coroutine(X& this) { return &this.__cor; } void main(X& this)
     30#define DECL_COROUTINE(X) static inline coroutine_desc* get_coroutine(X& this) { return &this.__cor; } void main(X& this)
    3131
    3232//-----------------------------------------------------------------------------
     
    3535// void ^?{}( coStack_t & this );
    3636
    37 void  ?{}( $coroutine & this, const char name[], void * storage, size_t storageSize );
    38 void ^?{}( $coroutine & this );
     37void ?{}( coroutine_desc & this, const char name[], void * storage, size_t storageSize );
     38void ^?{}( coroutine_desc & this );
    3939
    40 static inline void ?{}( $coroutine & this)                                       { this{ "Anonymous Coroutine", 0p, 0 }; }
    41 static inline void ?{}( $coroutine & this, size_t stackSize)                     { this{ "Anonymous Coroutine", 0p, stackSize }; }
    42 static inline void ?{}( $coroutine & this, void * storage, size_t storageSize )  { this{ "Anonymous Coroutine", storage, storageSize }; }
    43 static inline void ?{}( $coroutine & this, const char name[])                    { this{ name, 0p, 0 }; }
    44 static inline void ?{}( $coroutine & this, const char name[], size_t stackSize ) { this{ name, 0p, stackSize }; }
     40static inline void ?{}( coroutine_desc & this)                                       { this{ "Anonymous Coroutine", 0p, 0 }; }
     41static inline void ?{}( coroutine_desc & this, size_t stackSize)                     { this{ "Anonymous Coroutine", 0p, stackSize }; }
     42static inline void ?{}( coroutine_desc & this, void * storage, size_t storageSize )  { this{ "Anonymous Coroutine", storage, storageSize }; }
     43static inline void ?{}( coroutine_desc & this, const char name[])                    { this{ name, 0p, 0 }; }
     44static inline void ?{}( coroutine_desc & this, const char name[], size_t stackSize ) { this{ name, 0p, stackSize }; }
    4545
    4646//-----------------------------------------------------------------------------
    4747// Public coroutine API
     48static inline void suspend(void);
     49
     50forall(dtype T | is_coroutine(T))
     51static inline T & resume(T & cor);
     52
    4853forall(dtype T | is_coroutine(T))
    4954void prime(T & cor);
    5055
    51 static inline struct $coroutine * active_coroutine() { return TL_GET( this_thread )->curr_cor; }
     56static inline struct coroutine_desc * active_coroutine() { return TL_GET( this_thread )->curr_cor; }
    5257
    5358//-----------------------------------------------------------------------------
     
    5661// Start coroutine routines
    5762extern "C" {
    58         void __cfactx_invoke_coroutine(void (*main)(void *), void * this);
     63        void CtxInvokeCoroutine(void (*main)(void *), void * this);
    5964
    6065        forall(dtype T)
    61         void __cfactx_start(void (*main)(T &), struct $coroutine * cor, T & this, void (*invoke)(void (*main)(void *), void *));
     66        void CtxStart(void (*main)(T &), struct coroutine_desc * cor, T & this, void (*invoke)(void (*main)(void *), void *));
    6267
    63         extern void __cfactx_coroutine_unwind(struct _Unwind_Exception * storage, struct $coroutine *) __attribute__ ((__noreturn__));
     68        extern void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct coroutine_desc *) __attribute__ ((__noreturn__));
    6469
    65         extern void __cfactx_switch( struct __stack_context_t * from, struct __stack_context_t * to ) asm ("__cfactx_switch");
     70        extern void CtxSwitch( struct __stack_context_t * from, struct __stack_context_t * to ) asm ("CtxSwitch");
    6671}
    6772
    6873// Private wrappers for context switch and stack creation
    6974// Wrapper for co
    70 static inline void $ctx_switch( $coroutine * src, $coroutine * dst ) __attribute__((nonnull (1, 2))) {
     75static inline void CoroutineCtxSwitch(coroutine_desc* src, coroutine_desc* dst) {
    7176        // set state of current coroutine to inactive
    72         src->state = src->state == Halted ? Halted : Blocked;
     77        src->state = src->state == Halted ? Halted : Inactive;
    7378
    7479        // set new coroutine that task is executing
     
    7782        // context switch to specified coroutine
    7883        verify( dst->context.SP );
    79         __cfactx_switch( &src->context, &dst->context );
    80         // when __cfactx_switch returns we are back in the src coroutine
     84        CtxSwitch( &src->context, &dst->context );
     85        // when CtxSwitch returns we are back in the src coroutine
    8186
    8287        // set state of new coroutine to active
     
    8489
    8590        if( unlikely(src->cancellation != 0p) ) {
    86                 __cfactx_coroutine_unwind(src->cancellation, src);
     91                _CtxCoroutine_Unwind(src->cancellation, src);
    8792        }
    8893}
     
    9196
    9297// Suspend implementation inlined for performance
    93 extern "C" {
    94         static inline void __cfactx_suspend(void) {
    95                 // optimization : read TLS once and reuse it
    96                 // Safety note: this is preemption safe since if
    97                 // preemption occurs after this line, the pointer
    98                 // will also migrate which means this value will
    99                 // stay in syn with the TLS
    100                 $coroutine * src = TL_GET( this_thread )->curr_cor;
     98static inline void suspend(void) {
     99        // optimization : read TLS once and reuse it
     100        // Safety note: this is preemption safe since if
     101        // preemption occurs after this line, the pointer
     102        // will also migrate which means this value will
     103        // stay in syn with the TLS
     104        coroutine_desc * src = TL_GET( this_thread )->curr_cor;
    101105
    102                 assertf( src->last != 0,
    103                         "Attempt to suspend coroutine \"%.256s\" (%p) that has never been resumed.\n"
    104                         "Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
    105                         src->name, src );
    106                 assertf( src->last->state != Halted,
    107                         "Attempt by coroutine \"%.256s\" (%p) to suspend back to terminated coroutine \"%.256s\" (%p).\n"
    108                         "Possible cause is terminated coroutine's main routine has already returned.",
    109                         src->name, src, src->last->name, src->last );
     106        assertf( src->last != 0,
     107                "Attempt to suspend coroutine \"%.256s\" (%p) that has never been resumed.\n"
     108                "Possible cause is a suspend executed in a member called by a coroutine user rather than by the coroutine main.",
     109                src->name, src );
     110        assertf( src->last->state != Halted,
     111                "Attempt by coroutine \"%.256s\" (%p) to suspend back to terminated coroutine \"%.256s\" (%p).\n"
     112                "Possible cause is terminated coroutine's main routine has already returned.",
     113                src->name, src, src->last->name, src->last );
    110114
    111                 $ctx_switch( src, src->last );
    112         }
     115        CoroutineCtxSwitch( src, src->last );
    113116}
    114117
     
    121124        // will also migrate which means this value will
    122125        // stay in syn with the TLS
    123         $coroutine * src = TL_GET( this_thread )->curr_cor;
    124         $coroutine * dst = get_coroutine(cor);
     126        coroutine_desc * src = TL_GET( this_thread )->curr_cor;
     127        coroutine_desc * dst = get_coroutine(cor);
    125128
    126129        if( unlikely(dst->context.SP == 0p) ) {
    127130                TL_GET( this_thread )->curr_cor = dst;
    128131                __stack_prepare(&dst->stack, 65000);
    129                 __cfactx_start(main, dst, cor, __cfactx_invoke_coroutine);
     132                CtxStart(main, dst, cor, CtxInvokeCoroutine);
    130133                TL_GET( this_thread )->curr_cor = src;
    131134        }
     
    144147
    145148        // always done for performance testing
    146         $ctx_switch( src, dst );
     149        CoroutineCtxSwitch( src, dst );
    147150
    148151        return cor;
    149152}
    150153
    151 static inline void resume( $coroutine * dst ) __attribute__((nonnull (1))) {
     154static inline void resume(coroutine_desc * dst) {
    152155        // optimization : read TLS once and reuse it
    153156        // Safety note: this is preemption safe since if
     
    155158        // will also migrate which means this value will
    156159        // stay in syn with the TLS
    157         $coroutine * src = TL_GET( this_thread )->curr_cor;
     160        coroutine_desc * src = TL_GET( this_thread )->curr_cor;
    158161
    159162        // not resuming self ?
     
    169172
    170173        // always done for performance testing
    171         $ctx_switch( src, dst );
     174        CoroutineCtxSwitch( src, dst );
    172175}
    173176
  • libcfa/src/concurrency/invoke.c

    r6a490b2 rb7d6a36  
    2929// Called from the kernel when starting a coroutine or task so must switch back to user mode.
    3030
    31 extern struct $coroutine * __cfactx_cor_finish(void);
    32 extern void __cfactx_cor_leave ( struct $coroutine * );
    33 extern void __cfactx_thrd_leave();
    34 
     31extern void __leave_coroutine ( struct coroutine_desc * );
     32extern struct coroutine_desc * __finish_coroutine(void);
     33extern void __leave_thread_monitor();
    3534extern void disable_interrupts() OPTIONAL_THREAD;
    3635extern void enable_interrupts( __cfaabi_dbg_ctx_param );
    3736
    38 void __cfactx_invoke_coroutine(
     37void CtxInvokeCoroutine(
    3938        void (*main)(void *),
    4039        void *this
    4140) {
    4241        // Finish setting up the coroutine by setting its state
    43         struct $coroutine * cor = __cfactx_cor_finish();
     42        struct coroutine_desc * cor = __finish_coroutine();
    4443
    4544        // Call the main of the coroutine
     
    4746
    4847        //Final suspend, should never return
    49         __cfactx_cor_leave( cor );
     48        __leave_coroutine( cor );
    5049        __cabi_abort( "Resumed dead coroutine" );
    5150}
    5251
    53 static _Unwind_Reason_Code __cfactx_coroutine_unwindstop(
     52static _Unwind_Reason_Code _CtxCoroutine_UnwindStop(
    5453        __attribute((__unused__)) int version,
    5554        _Unwind_Action actions,
     
    6261                // We finished unwinding the coroutine,
    6362                // leave it
    64                 __cfactx_cor_leave( param );
     63                __leave_coroutine( param );
    6564                __cabi_abort( "Resumed dead coroutine" );
    6665        }
     
    7069}
    7170
    72 void __cfactx_coroutine_unwind(struct _Unwind_Exception * storage, struct $coroutine * cor) __attribute__ ((__noreturn__));
    73 void __cfactx_coroutine_unwind(struct _Unwind_Exception * storage, struct $coroutine * cor) {
    74         _Unwind_Reason_Code ret = _Unwind_ForcedUnwind( storage, __cfactx_coroutine_unwindstop, cor );
     71void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct coroutine_desc * cor) __attribute__ ((__noreturn__));
     72void _CtxCoroutine_Unwind(struct _Unwind_Exception * storage, struct coroutine_desc * cor) {
     73        _Unwind_Reason_Code ret = _Unwind_ForcedUnwind( storage, _CtxCoroutine_UnwindStop, cor );
    7574        printf("UNWIND ERROR %d after force unwind\n", ret);
    7675        abort();
    7776}
    7877
    79 void __cfactx_invoke_thread(
     78void CtxInvokeThread(
    8079        void (*main)(void *),
    8180        void *this
     
    9493        // The order of these 4 operations is very important
    9594        //Final suspend, should never return
    96         __cfactx_thrd_leave();
     95        __leave_thread_monitor();
    9796        __cabi_abort( "Resumed dead thread" );
    9897}
    9998
    100 void __cfactx_start(
     99void CtxStart(
    101100        void (*main)(void *),
    102         struct $coroutine * cor,
     101        struct coroutine_desc * cor,
    103102        void *this,
    104103        void (*invoke)(void *)
     
    140139
    141140        fs->dummyReturn = NULL;
    142         fs->rturn = __cfactx_invoke_stub;
     141        fs->rturn = CtxInvokeStub;
    143142        fs->fixedRegisters[0] = main;
    144143        fs->fixedRegisters[1] = this;
     
    158157        struct FakeStack *fs = (struct FakeStack *)cor->context.SP;
    159158
    160         fs->intRegs[8] = __cfactx_invoke_stub;
     159        fs->intRegs[8] = CtxInvokeStub;
    161160        fs->arg[0] = this;
    162161        fs->arg[1] = invoke;
  • libcfa/src/concurrency/invoke.h

    r6a490b2 rb7d6a36  
    4747        extern "Cforall" {
    4848                extern __attribute__((aligned(128))) thread_local struct KernelThreadData {
    49                         struct $thread    * volatile this_thread;
     49                        struct thread_desc    * volatile this_thread;
    5050                        struct processor      * volatile this_processor;
    5151
     
    9292        };
    9393
    94         enum coroutine_state { Halted, Start, Primed, Blocked, Ready, Active, Rerun };
    95         enum __Preemption_Reason { __NO_PREEMPTION, __ALARM_PREEMPTION, __POLL_PREEMPTION, __MANUAL_PREEMPTION };
    96 
    97         struct $coroutine {
    98                 // context that is switch during a __cfactx_switch
     94        enum coroutine_state { Halted, Start, Inactive, Active, Primed };
     95
     96        struct coroutine_desc {
     97                // context that is switch during a CtxSwitch
    9998                struct __stack_context_t context;
    10099
     
    109108
    110109                // first coroutine to resume this one
    111                 struct $coroutine * starter;
     110                struct coroutine_desc * starter;
    112111
    113112                // last coroutine to resume this one
    114                 struct $coroutine * last;
     113                struct coroutine_desc * last;
    115114
    116115                // If non-null stack must be unwound with this exception
     
    118117
    119118        };
    120 
    121         static inline struct __stack_t * __get_stack( struct $coroutine * cor ) { return (struct __stack_t*)(((uintptr_t)cor->stack.storage) & ((uintptr_t)-2)); }
    122119
    123120        // struct which calls the monitor is accepting
     
    130127        };
    131128
    132         struct $monitor {
     129        struct monitor_desc {
    133130                // spinlock to protect internal data
    134131                struct __spinlock_t lock;
    135132
    136133                // current owner of the monitor
    137                 struct $thread * owner;
     134                struct thread_desc * owner;
    138135
    139136                // queue of threads that are blocked waiting for the monitor
    140                 __queue_t(struct $thread) entry_queue;
     137                __queue_t(struct thread_desc) entry_queue;
    141138
    142139                // stack of conditions to run next once we exit the monitor
     
    155152        struct __monitor_group_t {
    156153                // currently held monitors
    157                 __cfa_anonymous_object( __small_array_t($monitor*) );
     154                __cfa_anonymous_object( __small_array_t(monitor_desc*) );
    158155
    159156                // last function that acquired monitors
     
    164161        // instrusive link field for threads
    165162        struct __thread_desc_link {
    166                 struct $thread * next;
    167                 struct $thread * prev;
     163                struct thread_desc * next;
     164                struct thread_desc * prev;
    168165                unsigned long long ts;
    169166        };
    170167
    171         struct $thread {
     168        struct thread_desc {
    172169                // Core threading fields
    173                 // context that is switch during a __cfactx_switch
     170                // context that is switch during a CtxSwitch
    174171                struct __stack_context_t context;
    175172
    176173                // current execution status for coroutine
    177                 volatile int state;
    178                 enum __Preemption_Reason preempted;
     174                enum coroutine_state state;
    179175
    180176                //SKULLDUGGERY errno is not save in the thread data structure because returnToKernel appears to be the only function to require saving and restoring it
    181177
    182178                // coroutine body used to store context
    183                 struct $coroutine  self_cor;
     179                struct coroutine_desc  self_cor;
    184180
    185181                // current active context
    186                 struct $coroutine * curr_cor;
     182                struct coroutine_desc * curr_cor;
    187183
    188184                // monitor body used for mutual exclusion
    189                 struct $monitor    self_mon;
     185                struct monitor_desc    self_mon;
    190186
    191187                // pointer to monitor with sufficient lifetime for current monitors
    192                 struct $monitor *  self_mon_p;
     188                struct monitor_desc *  self_mon_p;
    193189
    194190                // pointer to the cluster on which the thread is running
     
    203199
    204200                struct {
    205                         struct $thread * next;
    206                         struct $thread * prev;
     201                        struct thread_desc * next;
     202                        struct thread_desc * prev;
    207203                } node;
    208 
    209                 #ifdef __CFA_DEBUG__
    210                         // previous function to park/unpark the thread
    211                         const char * park_caller;
    212                         enum coroutine_state park_result;
    213                         bool park_stale;
    214                         const char * unpark_caller;
    215                         enum coroutine_state unpark_result;
    216                         bool unpark_stale;
    217                 #endif
    218         };
    219 
    220         #ifdef __CFA_DEBUG__
    221                 void __cfaabi_dbg_record_thrd($thread & this, bool park, const char prev_name[]);
    222         #else
    223                 #define __cfaabi_dbg_record_thrd(x, y, z)
    224         #endif
     204        };
    225205
    226206        #ifdef __cforall
    227207        extern "Cforall" {
    228 
    229                 static inline $thread *& get_next( $thread & this ) __attribute__((const)) {
     208                static inline thread_desc *& get_next( thread_desc & this ) {
    230209                        return this.link.next;
    231210                }
    232211
    233                 static inline [$thread *&, $thread *& ] __get( $thread & this ) __attribute__((const)) {
     212                static inline [thread_desc *&, thread_desc *& ] __get( thread_desc & this ) {
    234213                        return this.node.[next, prev];
    235214                }
     
    241220                }
    242221
    243                 static inline void ?{}(__monitor_group_t & this, struct $monitor ** data, __lock_size_t size, fptr_t func) {
     222                static inline void ?{}(__monitor_group_t & this, struct monitor_desc ** data, __lock_size_t size, fptr_t func) {
    244223                        (this.data){data};
    245224                        (this.size){size};
     
    247226                }
    248227
    249                 static inline bool ?==?( const __monitor_group_t & lhs, const __monitor_group_t & rhs ) __attribute__((const)) {
     228                static inline bool ?==?( const __monitor_group_t & lhs, const __monitor_group_t & rhs ) {
    250229                        if( (lhs.data != 0) != (rhs.data != 0) ) return false;
    251230                        if( lhs.size != rhs.size ) return false;
     
    281260
    282261        // assembler routines that performs the context switch
    283         extern void __cfactx_invoke_stub( void );
    284         extern void __cfactx_switch( struct __stack_context_t * from, struct __stack_context_t * to ) asm ("__cfactx_switch");
     262        extern void CtxInvokeStub( void );
     263        extern void CtxSwitch( struct __stack_context_t * from, struct __stack_context_t * to ) asm ("CtxSwitch");
    285264        // void CtxStore ( void * this ) asm ("CtxStore");
    286265        // void CtxRet   ( void * dst  ) asm ("CtxRet");
  • libcfa/src/concurrency/kernel.cfa

    r6a490b2 rb7d6a36  
    1515
    1616#define __cforall_thread__
    17 // #define __CFA_DEBUG_PRINT_RUNTIME_CORE__
    1817
    1918//C Includes
     
    4140#include "invoke.h"
    4241
    43 
    4442//-----------------------------------------------------------------------------
    4543// Some assembly required
     
    112110//-----------------------------------------------------------------------------
    113111//Start and stop routine for the kernel, declared first to make sure they run first
    114 static void __kernel_startup (void) __attribute__(( constructor( STARTUP_PRIORITY_KERNEL ) ));
    115 static void __kernel_shutdown(void) __attribute__(( destructor ( STARTUP_PRIORITY_KERNEL ) ));
    116 
    117 //-----------------------------------------------------------------------------
    118 // Kernel Scheduling logic
    119 static $thread * __next_thread(cluster * this);
    120 static void __run_thread(processor * this, $thread * dst);
    121 static $thread * __halt(processor * this);
    122 static bool __wake_one(cluster * cltr, bool was_empty);
    123 static bool __wake_proc(processor *);
     112static void kernel_startup(void)  __attribute__(( constructor( STARTUP_PRIORITY_KERNEL ) ));
     113static void kernel_shutdown(void) __attribute__(( destructor ( STARTUP_PRIORITY_KERNEL ) ));
    124114
    125115//-----------------------------------------------------------------------------
     
    127117KERNEL_STORAGE(cluster,         mainCluster);
    128118KERNEL_STORAGE(processor,       mainProcessor);
    129 KERNEL_STORAGE($thread, mainThread);
     119KERNEL_STORAGE(thread_desc,     mainThread);
    130120KERNEL_STORAGE(__stack_t,       mainThreadCtx);
    131121
    132122cluster     * mainCluster;
    133123processor   * mainProcessor;
    134 $thread * mainThread;
     124thread_desc * mainThread;
    135125
    136126extern "C" {
     
    174164// Main thread construction
    175165
    176 void ?{}( $coroutine & this, current_stack_info_t * info) with( this ) {
     166void ?{}( coroutine_desc & this, current_stack_info_t * info) with( this ) {
    177167        stack.storage = info->storage;
    178168        with(*stack.storage) {
     
    189179}
    190180
    191 void ?{}( $thread & this, current_stack_info_t * info) with( this ) {
     181void ?{}( thread_desc & this, current_stack_info_t * info) with( this ) {
    192182        state = Start;
    193183        self_cor{ info };
     
    219209}
    220210
    221 static void * __invoke_processor(void * arg);
    222 
     211static void start(processor * this);
    223212void ?{}(processor & this, const char name[], cluster & cltr) with( this ) {
    224213        this.name = name;
     
    226215        id = -1u;
    227216        terminated{ 0 };
    228         destroyer = 0p;
    229217        do_terminate = false;
    230218        preemption_alarm = 0p;
     
    232220        runner.proc = &this;
    233221
    234         idle{};
    235 
    236         __cfadbg_print_safe(runtime_core, "Kernel : Starting core %p\n", &this);
    237 
    238         this.stack = __create_pthread( &this.kernel_thread, __invoke_processor, (void *)&this );
    239 
    240         __cfadbg_print_safe(runtime_core, "Kernel : core %p created\n", &this);
     222        idleLock{};
     223
     224        start( &this );
    241225}
    242226
    243227void ^?{}(processor & this) with( this ){
    244228        if( ! __atomic_load_n(&do_terminate, __ATOMIC_ACQUIRE) ) {
    245                 __cfadbg_print_safe(runtime_core, "Kernel : core %p signaling termination\n", &this);
     229                __cfaabi_dbg_print_safe("Kernel : core %p signaling termination\n", &this);
    246230
    247231                __atomic_store_n(&do_terminate, true, __ATOMIC_RELAXED);
    248                 __wake_proc( &this );
     232                wake( &this );
    249233
    250234                P( terminated );
     
    252236        }
    253237
    254         int err = pthread_join( kernel_thread, 0p );
    255         if( err != 0 ) abort("KERNEL ERROR: joining processor %p caused error %s\n", &this, strerror(err));
    256 
     238        pthread_join( kernel_thread, 0p );
    257239        free( this.stack );
    258240}
    259241
    260 void ?{}(cluster & this, const char name[], Duration preemption_rate, int io_flags) with( this ) {
     242void ?{}(cluster & this, const char name[], Duration preemption_rate) with( this ) {
    261243        this.name = name;
    262244        this.preemption_rate = preemption_rate;
     
    264246        ready_lock{};
    265247
    266         #if !defined(__CFA_NO_STATISTICS__)
    267                 print_stats = false;
    268         #endif
    269 
    270         procs{ __get };
    271248        idles{ __get };
    272249        threads{ __get };
    273250
    274         __kernel_io_startup( this, io_flags, &this == mainCluster );
    275 
    276251        doregister(this);
    277252}
    278253
    279254void ^?{}(cluster & this) {
    280         __kernel_io_shutdown( this, &this == mainCluster );
    281 
    282255        unregister(this);
    283256}
     
    286259// Kernel Scheduling logic
    287260//=============================================================================================
     261static void runThread(processor * this, thread_desc * dst);
     262static void finishRunning(processor * this);
     263static void halt(processor * this);
     264
    288265//Main of the processor contexts
    289266void main(processorCtx_t & runner) {
     
    295272        verify(this);
    296273
    297         __cfadbg_print_safe(runtime_core, "Kernel : core %p starting\n", this);
     274        __cfaabi_dbg_print_safe("Kernel : core %p starting\n", this);
    298275
    299276        // register the processor unless it's the main thread which is handled in the boot sequence
     
    308285                preemption_scope scope = { this };
    309286
    310                 __cfadbg_print_safe(runtime_core, "Kernel : core %p started\n", this);
    311 
    312                 $thread * readyThread = 0p;
     287                __cfaabi_dbg_print_safe("Kernel : core %p started\n", this);
     288
     289                thread_desc * readyThread = 0p;
    313290                for( unsigned int spin_count = 0; ! __atomic_load_n(&this->do_terminate, __ATOMIC_SEQ_CST); spin_count++ ) {
    314                         // Try to get the next thread
    315                         readyThread = __next_thread( this->cltr );
    316 
    317                         // If no ready thread
    318                         if( readyThread == 0p ) {
    319                                 // Block until a thread is ready
    320                                 readyThread = __halt(this);
    321                         }
    322 
    323                         // Check if we actually found a thread
    324                         if( readyThread ) {
    325                                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    326                                 /* paranoid */ verifyf( readyThread->state == Ready || readyThread->preempted != __NO_PREEMPTION, "state : %d, preempted %d\n", readyThread->state, readyThread->preempted);
    327                                 /* paranoid */ verifyf( readyThread->next == 0p, "Expected null got %p", readyThread->next );
    328 
    329                                 // We found a thread run it
    330                                 __run_thread(this, readyThread);
    331 
    332                                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     291                        readyThread = nextThread( this->cltr );
     292
     293                        if(readyThread) {
     294                                verify( ! kernelTLS.preemption_state.enabled );
     295
     296                                runThread(this, readyThread);
     297
     298                                verify( ! kernelTLS.preemption_state.enabled );
     299
     300                                //Some actions need to be taken from the kernel
     301                                finishRunning(this);
     302
     303                                spin_count = 0;
     304                        } else {
     305                                // spin(this, &spin_count);
     306                                halt(this);
    333307                        }
    334308                }
    335309
    336                 __cfadbg_print_safe(runtime_core, "Kernel : core %p stopping\n", this);
     310                __cfaabi_dbg_print_safe("Kernel : core %p stopping\n", this);
    337311        }
    338312
    339313        V( this->terminated );
     314
    340315
    341316        // unregister the processor unless it's the main thread which is handled in the boot sequence
     
    344319                unregister(this->cltr, this);
    345320        }
    346         else {
    347                 // HACK : the coroutine context switch expects this_thread to be set
    348                 // and it make sense for it to be set in all other cases except here
    349                 // fake it
    350                 kernelTLS.this_thread = mainThread;
    351         }
    352 
    353         __cfadbg_print_safe(runtime_core, "Kernel : core %p terminated\n", this);
     321
     322        __cfaabi_dbg_print_safe("Kernel : core %p terminated\n", this);
    354323
    355324        stats_tls_tally(this->cltr);
     
    362331// runThread runs a thread by context switching
    363332// from the processor coroutine to the target thread
    364 static void __run_thread(processor * this, $thread * thrd_dst) {
    365         $coroutine * proc_cor = get_coroutine(this->runner);
     333static void runThread(processor * this, thread_desc * thrd_dst) {
     334        coroutine_desc * proc_cor = get_coroutine(this->runner);
     335
     336        // Reset the terminating actions here
     337        this->finish.action_code = No_Action;
    366338
    367339        // Update global state
    368340        kernelTLS.this_thread = thrd_dst;
    369341
    370         // set state of processor coroutine to inactive
    371         verify(proc_cor->state == Active);
    372         proc_cor->state = Blocked;
    373 
    374         // Actually run the thread
    375         RUNNING:  while(true) {
    376                 if(unlikely(thrd_dst->preempted)) {
    377                         thrd_dst->preempted = __NO_PREEMPTION;
    378                         verify(thrd_dst->state == Active  || thrd_dst->state == Rerun);
    379                 } else {
    380                         verify(thrd_dst->state == Blocked || thrd_dst->state == Ready); // Ready means scheduled normally, blocked means rerun
    381                         thrd_dst->state = Active;
     342        // set state of processor coroutine to inactive and the thread to active
     343        proc_cor->state = proc_cor->state == Halted ? Halted : Inactive;
     344        thrd_dst->state = Active;
     345
     346        // set context switch to the thread that the processor is executing
     347        verify( thrd_dst->context.SP );
     348        CtxSwitch( &proc_cor->context, &thrd_dst->context );
     349        // when CtxSwitch returns we are back in the processor coroutine
     350
     351        // set state of processor coroutine to active and the thread to inactive
     352        thrd_dst->state = thrd_dst->state == Halted ? Halted : Inactive;
     353        proc_cor->state = Active;
     354}
     355
     356// KERNEL_ONLY
     357static void returnToKernel() {
     358        coroutine_desc * proc_cor = get_coroutine(kernelTLS.this_processor->runner);
     359        thread_desc * thrd_src = kernelTLS.this_thread;
     360
     361        // set state of current coroutine to inactive
     362        thrd_src->state = thrd_src->state == Halted ? Halted : Inactive;
     363        proc_cor->state = Active;
     364        int local_errno = *__volatile_errno();
     365        #if defined( __i386 ) || defined( __x86_64 )
     366                __x87_store;
     367        #endif
     368
     369        // set new coroutine that the processor is executing
     370        // and context switch to it
     371        verify( proc_cor->context.SP );
     372        CtxSwitch( &thrd_src->context, &proc_cor->context );
     373
     374        // set state of new coroutine to active
     375        proc_cor->state = proc_cor->state == Halted ? Halted : Inactive;
     376        thrd_src->state = Active;
     377
     378        #if defined( __i386 ) || defined( __x86_64 )
     379                __x87_load;
     380        #endif
     381        *__volatile_errno() = local_errno;
     382}
     383
     384// KERNEL_ONLY
     385// Once a thread has finished running, some of
     386// its final actions must be executed from the kernel
     387static void finishRunning(processor * this) with( this->finish ) {
     388        verify( ! kernelTLS.preemption_state.enabled );
     389        choose( action_code ) {
     390        case No_Action:
     391                break;
     392        case Release:
     393                unlock( *lock );
     394        case Schedule:
     395                ScheduleThread( thrd );
     396        case Release_Schedule:
     397                unlock( *lock );
     398                ScheduleThread( thrd );
     399        case Release_Multi:
     400                for(int i = 0; i < lock_count; i++) {
     401                        unlock( *locks[i] );
    382402                }
    383 
    384                 __cfaabi_dbg_debug_do(
    385                         thrd_dst->park_stale   = true;
    386                         thrd_dst->unpark_stale = true;
    387                 )
    388 
    389                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    390                 /* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
    391                 /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ) || thrd_dst->curr_cor == proc_cor, "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst ); // add escape condition if we are setting up the processor
    392                 /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit) || thrd_dst->curr_cor == proc_cor, "ERROR : Destination $thread %p has been corrupted.\n StackPointer too large.\n", thrd_dst ); // add escape condition if we are setting up the processor
    393 
    394                 // set context switch to the thread that the processor is executing
    395                 verify( thrd_dst->context.SP );
    396                 __cfactx_switch( &proc_cor->context, &thrd_dst->context );
    397                 // when __cfactx_switch returns we are back in the processor coroutine
    398 
    399                 /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) > ((uintptr_t)__get_stack(thrd_dst->curr_cor)->limit), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too large.\n", thrd_dst );
    400                 /* paranoid */ verifyf( ((uintptr_t)thrd_dst->context.SP) < ((uintptr_t)__get_stack(thrd_dst->curr_cor)->base ), "ERROR : Destination $thread %p has been corrupted.\n StackPointer too small.\n", thrd_dst );
    401                 /* paranoid */ verify( kernelTLS.this_thread == thrd_dst );
    402                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    403 
    404 
    405                 // We just finished running a thread, there are a few things that could have happened.
    406                 // 1 - Regular case : the thread has blocked and now one has scheduled it yet.
    407                 // 2 - Racy case    : the thread has blocked but someone has already tried to schedule it.
    408                 // 4 - Preempted
    409                 // In case 1, we may have won a race so we can't write to the state again.
    410                 // In case 2, we lost the race so we now own the thread.
    411 
    412                 if(unlikely(thrd_dst->preempted != __NO_PREEMPTION)) {
    413                         // The thread was preempted, reschedule it and reset the flag
    414                         __schedule_thread( thrd_dst );
    415                         break RUNNING;
     403        case Release_Multi_Schedule:
     404                for(int i = 0; i < lock_count; i++) {
     405                        unlock( *locks[i] );
    416406                }
    417 
    418                 // set state of processor coroutine to active and the thread to inactive
    419                 static_assert(sizeof(thrd_dst->state) == sizeof(int));
    420                 enum coroutine_state old_state = __atomic_exchange_n(&thrd_dst->state, Blocked, __ATOMIC_SEQ_CST);
    421                 __cfaabi_dbg_debug_do( thrd_dst->park_result = old_state; )
    422                 switch(old_state) {
    423                         case Halted:
    424                                 // The thread has halted, it should never be scheduled/run again, leave it back to Halted and move on
    425                                 thrd_dst->state = Halted;
    426 
    427                                 // We may need to wake someone up here since
    428                                 unpark( this->destroyer __cfaabi_dbg_ctx2 );
    429                                 this->destroyer = 0p;
    430                                 break RUNNING;
    431                         case Active:
    432                                 // This is case 1, the regular case, nothing more is needed
    433                                 break RUNNING;
    434                         case Rerun:
    435                                 // This is case 2, the racy case, someone tried to run this thread before it finished blocking
    436                                 // In this case, just run it again.
    437                                 continue RUNNING;
    438                         default:
    439                                 // This makes no sense, something is wrong abort
    440                                 abort("Finished running a thread that was Blocked/Start/Primed %d\n", old_state);
     407                for(int i = 0; i < thrd_count; i++) {
     408                        ScheduleThread( thrds[i] );
    441409                }
    442         }
    443 
    444         // Just before returning to the processor, set the processor coroutine to active
    445         proc_cor->state = Active;
    446         kernelTLS.this_thread = 0p;
    447 }
    448 
    449 // KERNEL_ONLY
    450 void returnToKernel() {
    451         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    452         $coroutine * proc_cor = get_coroutine(kernelTLS.this_processor->runner);
    453         $thread * thrd_src = kernelTLS.this_thread;
    454 
    455         // Run the thread on this processor
    456         {
    457                 int local_errno = *__volatile_errno();
    458                 #if defined( __i386 ) || defined( __x86_64 )
    459                         __x87_store;
    460                 #endif
    461                 verify( proc_cor->context.SP );
    462                 __cfactx_switch( &thrd_src->context, &proc_cor->context );
    463                 #if defined( __i386 ) || defined( __x86_64 )
    464                         __x87_load;
    465                 #endif
    466                 *__volatile_errno() = local_errno;
    467         }
    468 
    469         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    470         /* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) < ((uintptr_t)__get_stack(thrd_src->curr_cor)->base ), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too small.\n", thrd_src );
    471         /* paranoid */ verifyf( ((uintptr_t)thrd_src->context.SP) > ((uintptr_t)__get_stack(thrd_src->curr_cor)->limit), "ERROR : Returning $thread %p has been corrupted.\n StackPointer too large.\n", thrd_src );
     410        case Callback:
     411                callback();
     412        default:
     413                abort("KERNEL ERROR: Unexpected action to run after thread");
     414        }
    472415}
    473416
     
    476419// This is the entry point for processors (kernel threads)
    477420// It effectively constructs a coroutine by stealing the pthread stack
    478 static void * __invoke_processor(void * arg) {
     421static void * CtxInvokeProcessor(void * arg) {
    479422        processor * proc = (processor *) arg;
    480423        kernelTLS.this_processor = proc;
     
    495438
    496439        //We now have a proper context from which to schedule threads
    497         __cfadbg_print_safe(runtime_core, "Kernel : core %p created (%p, %p)\n", proc, &proc->runner, &ctx);
     440        __cfaabi_dbg_print_safe("Kernel : core %p created (%p, %p)\n", proc, &proc->runner, &ctx);
    498441
    499442        // SKULLDUGGERY: Since the coroutine doesn't have its own stack, we can't
     
    506449
    507450        // Main routine of the core returned, the core is now fully terminated
    508         __cfadbg_print_safe(runtime_core, "Kernel : core %p main ended (%p)\n", proc, &proc->runner);
     451        __cfaabi_dbg_print_safe("Kernel : core %p main ended (%p)\n", proc, &proc->runner);
    509452
    510453        return 0p;
     
    517460} // Abort
    518461
    519 void * __create_pthread( pthread_t * pthread, void * (*start)(void *), void * arg ) {
     462void * create_pthread( pthread_t * pthread, void * (*start)(void *), void * arg ) {
    520463        pthread_attr_t attr;
    521464
     
    545488}
    546489
     490static void start(processor * this) {
     491        __cfaabi_dbg_print_safe("Kernel : Starting core %p\n", this);
     492
     493        this->stack = create_pthread( &this->kernel_thread, CtxInvokeProcessor, (void *)this );
     494
     495        __cfaabi_dbg_print_safe("Kernel : core %p started\n", this);
     496}
     497
    547498// KERNEL_ONLY
    548 static void __kernel_first_resume( processor * this ) {
    549         $thread * src = mainThread;
    550         $coroutine * dst = get_coroutine(this->runner);
     499void kernel_first_resume( processor * this ) {
     500        thread_desc * src = mainThread;
     501        coroutine_desc * dst = get_coroutine(this->runner);
    551502
    552503        verify( ! kernelTLS.preemption_state.enabled );
     
    554505        kernelTLS.this_thread->curr_cor = dst;
    555506        __stack_prepare( &dst->stack, 65000 );
    556         __cfactx_start(main, dst, this->runner, __cfactx_invoke_coroutine);
     507        CtxStart(main, dst, this->runner, CtxInvokeCoroutine);
    557508
    558509        verify( ! kernelTLS.preemption_state.enabled );
     
    561512        dst->starter = dst->starter ? dst->starter : &src->self_cor;
    562513
    563         // make sure the current state is still correct
    564         /* paranoid */ verify(src->state == Ready);
     514        // set state of current coroutine to inactive
     515        src->state = src->state == Halted ? Halted : Inactive;
    565516
    566517        // context switch to specified coroutine
    567518        verify( dst->context.SP );
    568         __cfactx_switch( &src->context, &dst->context );
    569         // when __cfactx_switch returns we are back in the src coroutine
     519        CtxSwitch( &src->context, &dst->context );
     520        // when CtxSwitch returns we are back in the src coroutine
    570521
    571522        mainThread->curr_cor = &mainThread->self_cor;
    572523
    573         // make sure the current state has been update
    574         /* paranoid */ verify(src->state == Active);
     524        // set state of new coroutine to active
     525        src->state = Active;
    575526
    576527        verify( ! kernelTLS.preemption_state.enabled );
     
    578529
    579530// KERNEL_ONLY
    580 static void __kernel_last_resume( processor * this ) {
    581         $coroutine * src = &mainThread->self_cor;
    582         $coroutine * dst = get_coroutine(this->runner);
     531void kernel_last_resume( processor * this ) {
     532        coroutine_desc * src = &mainThread->self_cor;
     533        coroutine_desc * dst = get_coroutine(this->runner);
    583534
    584535        verify( ! kernelTLS.preemption_state.enabled );
     
    586537        verify( dst->context.SP );
    587538
    588         // SKULLDUGGERY in debug the processors check that the
    589         // stack is still within the limit of the stack limits after running a thread.
    590         // that check doesn't make sense if we context switch to the processor using the
    591         // coroutine semantics. Since this is a special case, use the current context
    592         // info to populate these fields.
    593         __cfaabi_dbg_debug_do(
    594                 __stack_context_t ctx;
    595                 CtxGet( ctx );
    596                 mainThread->context.SP = ctx.SP;
    597                 mainThread->context.FP = ctx.FP;
    598         )
    599 
    600539        // context switch to the processor
    601         __cfactx_switch( &src->context, &dst->context );
     540        CtxSwitch( &src->context, &dst->context );
    602541}
    603542
    604543//-----------------------------------------------------------------------------
    605544// Scheduler routines
     545
    606546// KERNEL ONLY
    607 void __schedule_thread( $thread * thrd ) {
    608         /* paranoid */ verify( thrd );
    609         /* paranoid */ verify( thrd->state != Halted );
    610         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    611         /* paranoid */ #if defined( __CFA_WITH_VERIFY__ )
    612         /* paranoid */ if( thrd->state == Blocked || thrd->state == Start ) assertf( thrd->preempted == __NO_PREEMPTION,
    613                           "Error inactive thread marked as preempted, state %d, preemption %d\n", thrd->state, thrd->preempted );
    614         /* paranoid */ if( thrd->preempted != __NO_PREEMPTION ) assertf(thrd->state == Active || thrd->state == Rerun,
    615                           "Error preempted thread marked as not currently running, state %d, preemption %d\n", thrd->state, thrd->preempted );
    616         /* paranoid */ #endif
    617         /* paranoid */ verifyf( thrd->link.next == 0p, "Expected null got %p", thrd->link.next );
    618 
    619         if (thrd->preempted == __NO_PREEMPTION) thrd->state = Ready;
     547void ScheduleThread( thread_desc * thrd ) {
     548        verify( thrd );
     549        verify( thrd->state != Halted );
     550
     551        verify( ! kernelTLS.preemption_state.enabled );
     552
     553        verifyf( thrd->link.next == 0p, "Expected null got %p", thrd->link.next );
     554
    620555
    621556        ready_schedule_lock(thrd->curr_cluster, kernelTLS.this_processor);
     
    623558        ready_schedule_unlock(thrd->curr_cluster, kernelTLS.this_processor);
    624559
    625         __wake_one(thrd->curr_cluster, was_empty);
    626 
    627         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     560        with( *thrd->curr_cluster ) {
     561                // if(was_empty) {
     562                //      lock      (proc_list_lock __cfaabi_dbg_ctx2);
     563                //      if(idles) {
     564                //              wake_fast(idles.head);
     565                //      }
     566                //      unlock    (proc_list_lock);
     567                // }
     568                // else if( struct processor * idle = idles.head ) {
     569                //      wake_fast(idle);
     570                // }
     571        }
     572
     573        verify( ! kernelTLS.preemption_state.enabled );
    628574}
    629575
    630576// KERNEL ONLY
    631 static $thread * __next_thread(cluster * this) with( *this ) {
    632         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     577thread_desc * nextThread(cluster * this) with( *this ) {
     578        verify( ! kernelTLS.preemption_state.enabled );
    633579
    634580        ready_schedule_lock(this, kernelTLS.this_processor);
    635                 $thread * head = pop( this );
     581                thread_desc * head = pop( this );
    636582        ready_schedule_unlock(this, kernelTLS.this_processor);
    637583
    638         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     584        verify( ! kernelTLS.preemption_state.enabled );
    639585        return head;
    640586}
    641587
    642 // KERNEL ONLY unpark with out disabling interrupts
    643 void __unpark( $thread * thrd __cfaabi_dbg_ctx_param2 ) {
    644         static_assert(sizeof(thrd->state) == sizeof(int));
    645 
    646         // record activity
    647         __cfaabi_dbg_record_thrd( *thrd, false, caller );
    648 
    649         enum coroutine_state old_state = __atomic_exchange_n(&thrd->state, Rerun, __ATOMIC_SEQ_CST);
    650         __cfaabi_dbg_debug_do( thrd->unpark_result = old_state; )
    651         switch(old_state) {
    652                 case Active:
    653                         // Wake won the race, the thread will reschedule/rerun itself
    654                         break;
    655                 case Blocked:
    656                         /* paranoid */ verify( ! thrd->preempted != __NO_PREEMPTION );
    657 
    658                         // Wake lost the race,
    659                         thrd->state = Blocked;
    660                         __schedule_thread( thrd );
    661                         break;
    662                 case Rerun:
    663                         abort("More than one thread attempted to schedule thread %p\n", thrd);
    664                         break;
    665                 case Halted:
    666                 case Start:
    667                 case Primed:
    668                 default:
    669                         // This makes no sense, something is wrong abort
    670                         abort();
    671         }
    672 }
    673 
    674 void unpark( $thread * thrd __cfaabi_dbg_ctx_param2 ) {
    675         if( !thrd ) return;
    676 
     588void BlockInternal() {
    677589        disable_interrupts();
    678         __unpark( thrd __cfaabi_dbg_ctx_fwd2 );
     590        verify( ! kernelTLS.preemption_state.enabled );
     591        returnToKernel();
     592        verify( ! kernelTLS.preemption_state.enabled );
    679593        enable_interrupts( __cfaabi_dbg_ctx );
    680594}
    681595
    682 void park( __cfaabi_dbg_ctx_param ) {
    683         /* paranoid */ verify( kernelTLS.preemption_state.enabled );
     596void BlockInternal( __spinlock_t * lock ) {
    684597        disable_interrupts();
    685         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    686         /* paranoid */ verify( kernelTLS.this_thread->preempted == __NO_PREEMPTION );
    687 
    688         // record activity
    689         __cfaabi_dbg_record_thrd( *kernelTLS.this_thread, true, caller );
    690 
     598        with( *kernelTLS.this_processor ) {
     599                finish.action_code = Release;
     600                finish.lock        = lock;
     601        }
     602
     603        verify( ! kernelTLS.preemption_state.enabled );
    691604        returnToKernel();
    692 
    693         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     605        verify( ! kernelTLS.preemption_state.enabled );
     606
    694607        enable_interrupts( __cfaabi_dbg_ctx );
    695         /* paranoid */ verify( kernelTLS.preemption_state.enabled );
    696 
     608}
     609
     610void BlockInternal( thread_desc * thrd ) {
     611        disable_interrupts();
     612        with( * kernelTLS.this_processor ) {
     613                finish.action_code = Schedule;
     614                finish.thrd        = thrd;
     615        }
     616
     617        verify( ! kernelTLS.preemption_state.enabled );
     618        returnToKernel();
     619        verify( ! kernelTLS.preemption_state.enabled );
     620
     621        enable_interrupts( __cfaabi_dbg_ctx );
     622}
     623
     624void BlockInternal( __spinlock_t * lock, thread_desc * thrd ) {
     625        assert(thrd);
     626        disable_interrupts();
     627        with( * kernelTLS.this_processor ) {
     628                finish.action_code = Release_Schedule;
     629                finish.lock        = lock;
     630                finish.thrd        = thrd;
     631        }
     632
     633        verify( ! kernelTLS.preemption_state.enabled );
     634        returnToKernel();
     635        verify( ! kernelTLS.preemption_state.enabled );
     636
     637        enable_interrupts( __cfaabi_dbg_ctx );
     638}
     639
     640void BlockInternal(__spinlock_t * locks [], unsigned short count) {
     641        disable_interrupts();
     642        with( * kernelTLS.this_processor ) {
     643                finish.action_code = Release_Multi;
     644                finish.locks       = locks;
     645                finish.lock_count  = count;
     646        }
     647
     648        verify( ! kernelTLS.preemption_state.enabled );
     649        returnToKernel();
     650        verify( ! kernelTLS.preemption_state.enabled );
     651
     652        enable_interrupts( __cfaabi_dbg_ctx );
     653}
     654
     655void BlockInternal(__spinlock_t * locks [], unsigned short lock_count, thread_desc * thrds [], unsigned short thrd_count) {
     656        disable_interrupts();
     657        with( *kernelTLS.this_processor ) {
     658                finish.action_code = Release_Multi_Schedule;
     659                finish.locks       = locks;
     660                finish.lock_count  = lock_count;
     661                finish.thrds       = thrds;
     662                finish.thrd_count  = thrd_count;
     663        }
     664
     665        verify( ! kernelTLS.preemption_state.enabled );
     666        returnToKernel();
     667        verify( ! kernelTLS.preemption_state.enabled );
     668
     669        enable_interrupts( __cfaabi_dbg_ctx );
     670}
     671
     672void BlockInternal(__finish_callback_fptr_t callback) {
     673        disable_interrupts();
     674        with( *kernelTLS.this_processor ) {
     675                finish.action_code = Callback;
     676                finish.callback    = callback;
     677        }
     678
     679        verify( ! kernelTLS.preemption_state.enabled );
     680        returnToKernel();
     681        verify( ! kernelTLS.preemption_state.enabled );
     682
     683        enable_interrupts( __cfaabi_dbg_ctx );
    697684}
    698685
    699686// KERNEL ONLY
    700 void __leave_thread() {
    701         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
     687void LeaveThread(__spinlock_t * lock, thread_desc * thrd) {
     688        verify( ! kernelTLS.preemption_state.enabled );
     689        with( * kernelTLS.this_processor ) {
     690                finish.action_code = thrd ? Release_Schedule : Release;
     691                finish.lock        = lock;
     692                finish.thrd        = thrd;
     693        }
     694
    702695        returnToKernel();
    703         abort();
    704 }
    705 
    706 // KERNEL ONLY
    707 bool force_yield( __Preemption_Reason reason ) {
    708         /* paranoid */ verify( kernelTLS.preemption_state.enabled );
    709         disable_interrupts();
    710         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    711 
    712         $thread * thrd = kernelTLS.this_thread;
    713         /* paranoid */ verify(thrd->state == Active || thrd->state == Rerun);
    714 
    715         // SKULLDUGGERY: It is possible that we are preempting this thread just before
    716         // it was going to park itself. If that is the case and it is already using the
    717         // intrusive fields then we can't use them to preempt the thread
    718         // If that is the case, abandon the preemption.
    719         bool preempted = false;
    720         if(thrd->next == 0p) {
    721                 preempted = true;
    722                 thrd->preempted = reason;
    723                 returnToKernel();
    724         }
    725 
    726         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    727         enable_interrupts_noPoll();
    728         /* paranoid */ verify( kernelTLS.preemption_state.enabled );
    729 
    730         return preempted;
    731696}
    732697
     
    736701//-----------------------------------------------------------------------------
    737702// Kernel boot procedures
    738 static void __kernel_startup(void) {
    739         verify( ! kernelTLS.preemption_state.enabled );
    740         __cfadbg_print_safe(runtime_core, "Kernel : Starting\n");
     703static void kernel_startup(void) {
     704        verify( ! kernelTLS.preemption_state.enabled );
     705        __cfaabi_dbg_print_safe("Kernel : Starting\n");
    741706
    742707        __page_size = sysconf( _SC_PAGESIZE );
     
    749714        (*mainCluster){"Main Cluster"};
    750715
    751         __cfadbg_print_safe(runtime_core, "Kernel : Main cluster ready\n");
     716        __cfaabi_dbg_print_safe("Kernel : Main cluster ready\n");
    752717
    753718        // Start by initializing the main thread
    754719        // SKULLDUGGERY: the mainThread steals the process main thread
    755720        // which will then be scheduled by the mainProcessor normally
    756         mainThread = ($thread *)&storage_mainThread;
     721        mainThread = (thread_desc *)&storage_mainThread;
    757722        current_stack_info_t info;
    758723        info.storage = (__stack_t*)&storage_mainThreadCtx;
    759724        (*mainThread){ &info };
    760725
    761         __cfadbg_print_safe(runtime_core, "Kernel : Main thread ready\n");
     726        __cfaabi_dbg_print_safe("Kernel : Main thread ready\n");
    762727
    763728
     
    781746
    782747                runner{ &this };
    783                 __cfadbg_print_safe(runtime_core, "Kernel : constructed main processor context %p\n", &runner);
     748                __cfaabi_dbg_print_safe("Kernel : constructed main processor context %p\n", &runner);
    784749        }
    785750
     
    800765        // Add the main thread to the ready queue
    801766        // once resume is called on mainProcessor->runner the mainThread needs to be scheduled like any normal thread
    802         __schedule_thread(mainThread);
     767        ScheduleThread(mainThread);
    803768
    804769        // SKULLDUGGERY: Force a context switch to the main processor to set the main thread's context to the current UNIX
    805         // context. Hence, the main thread does not begin through __cfactx_invoke_thread, like all other threads. The trick here is that
     770        // context. Hence, the main thread does not begin through CtxInvokeThread, like all other threads. The trick here is that
    806771        // mainThread is on the ready queue when this call is made.
    807         __kernel_first_resume( kernelTLS.this_processor );
     772        kernel_first_resume( kernelTLS.this_processor );
     773
    808774
    809775
    810776        // THE SYSTEM IS NOW COMPLETELY RUNNING
    811 
    812 
    813         // Now that the system is up, finish creating systems that need threading
    814         __kernel_io_finish_start( *mainCluster );
    815 
    816 
    817         __cfadbg_print_safe(runtime_core, "Kernel : Started\n--------------------------------------------------\n\n");
     777        __cfaabi_dbg_print_safe("Kernel : Started\n--------------------------------------------------\n\n");
    818778
    819779        verify( ! kernelTLS.preemption_state.enabled );
     
    822782}
    823783
    824 static void __kernel_shutdown(void) {
    825         //Before we start shutting things down, wait for systems that need threading to shutdown
    826         __kernel_io_prepare_stop( *mainCluster );
    827 
    828         /* paranoid */ verify( TL_GET( preemption_state.enabled ) );
     784static void kernel_shutdown(void) {
     785        __cfaabi_dbg_print_safe("\n--------------------------------------------------\nKernel : Shutting down\n");
     786
     787        verify( TL_GET( preemption_state.enabled ) );
    829788        disable_interrupts();
    830         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    831 
    832         __cfadbg_print_safe(runtime_core, "\n--------------------------------------------------\nKernel : Shutting down\n");
     789        verify( ! kernelTLS.preemption_state.enabled );
    833790
    834791        // SKULLDUGGERY: Notify the mainProcessor it needs to terminates.
     
    836793        // which is currently here
    837794        __atomic_store_n(&mainProcessor->do_terminate, true, __ATOMIC_RELEASE);
    838         __kernel_last_resume( kernelTLS.this_processor );
     795        kernel_last_resume( kernelTLS.this_processor );
    839796        mainThread->self_cor.state = Halted;
    840797
     
    848805        // Destroy the main processor and its context in reverse order of construction
    849806        // These were manually constructed so we need manually destroy them
    850         void ^?{}(processor & this) with( this ){
    851                 /* paranoid */ verify( this.do_terminate == true );
     807        void ^?{}(processor & this) with( this ) {
     808                //don't join the main thread here, that wouldn't make any sense
    852809                __cfaabi_dbg_print_safe("Kernel : destroyed main processor context %p\n", &runner);
    853810        }
     
    856813
    857814        // Final step, destroy the main thread since it is no longer needed
    858 
    859         // Since we provided a stack to this taxk it will not destroy anything
    860         /* paranoid */ verify(mainThread->self_cor.stack.storage == (__stack_t*)(((uintptr_t)&storage_mainThreadCtx)| 0x1));
     815        // Since we provided a stack to this task it will not destroy anything
    861816        ^(*mainThread){};
    862817
     
    866821        ^(__cfa_dbg_global_clusters.lock){};
    867822
    868         __cfadbg_print_safe(runtime_core, "Kernel : Shutdown complete\n");
     823        __cfaabi_dbg_print_safe("Kernel : Shutdown complete\n");
    869824}
    870825
    871826//=============================================================================================
    872 // Kernel Idle Sleep
     827// Kernel Quiescing
    873828//=============================================================================================
    874 static $thread * __halt(processor * this) with( *this ) {
    875         if( do_terminate ) return 0p;
    876 
    877         // First, lock the cluster idle
    878         lock( cltr->idle_lock __cfaabi_dbg_ctx2 );
    879 
    880         // Check if we can find a thread
    881         if( $thread * found = __next_thread( cltr ) ) {
    882                 unlock( cltr->idle_lock );
    883                 return found;
    884         }
    885 
    886         // Move this processor from the active list to the idle list
    887         move_to_front(cltr->procs, cltr->idles, *this);
    888 
    889         // Unlock the idle lock so we don't go to sleep with a lock
    890         unlock    (cltr->idle_lock);
    891 
    892         // We are ready to sleep
    893         __cfadbg_print_safe(runtime_core, "Kernel : Processor %p ready to sleep\n", this);
    894         wait( idle );
    895 
    896         // We have woken up
    897         __cfadbg_print_safe(runtime_core, "Kernel : Processor %p woke up and ready to run\n", this);
    898 
    899         // Get ourself off the idle list
    900         with( *cltr ) {
    901                 lock  (idle_lock __cfaabi_dbg_ctx2);
    902                 move_to_front(idles, procs, *this);
    903                 unlock(idle_lock);
    904         }
    905 
    906         // Don't check the ready queue again, we may not be in a position to run a thread
    907         return 0p;
    908 }
    909 
    910 // Wake a thread from the front if there are any
    911 static bool __wake_one(cluster * this, __attribute__((unused)) bool force) {
    912         // if we don't want to force check if we know it's false
    913         // if( !this->idles.head && !force ) return false;
    914 
    915         // First, lock the cluster idle
    916         lock( this->idle_lock __cfaabi_dbg_ctx2 );
    917 
    918         // Check if there is someone to wake up
    919         if( !this->idles.head ) {
    920                 // Nope unlock and return false
    921                 unlock( this->idle_lock );
    922                 return false;
    923         }
    924 
    925         // Wake them up
    926         __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this->idles.head);
    927         /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    928         post( this->idles.head->idle );
    929 
    930         // Unlock and return true
    931         unlock( this->idle_lock );
    932         return true;
    933 }
    934 
    935 // Unconditionnaly wake a thread
    936 static bool __wake_proc(processor * this) {
    937         __cfadbg_print_safe(runtime_core, "Kernel : waking Processor %p\n", this);
    938 
    939         disable_interrupts();
    940                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    941                 bool ret = post( this->idle );
    942         enable_interrupts( __cfaabi_dbg_ctx );
    943 
    944         return ret;
     829static void halt(processor * this) with( *this ) {
     830        // // verify( ! __atomic_load_n(&do_terminate, __ATOMIC_SEQ_CST) );
     831
     832        // with( *cltr ) {
     833        //      lock      (proc_list_lock __cfaabi_dbg_ctx2);
     834        //      push_front(idles, *this);
     835        //      unlock    (proc_list_lock);
     836        // }
     837
     838        // __cfaabi_dbg_print_safe("Kernel : Processor %p ready to sleep\n", this);
     839
     840        // wait( idleLock );
     841
     842        // __cfaabi_dbg_print_safe("Kernel : Processor %p woke up and ready to run\n", this);
     843
     844        // with( *cltr ) {
     845        //      lock      (proc_list_lock __cfaabi_dbg_ctx2);
     846        //      remove    (idles, *this);
     847        //      unlock    (proc_list_lock);
     848        // }
    945849}
    946850
     
    976880
    977881void kernel_abort_msg( void * kernel_data, char * abort_text, int abort_text_size ) {
    978         $thread * thrd = kernel_data;
     882        thread_desc * thrd = kernel_data;
    979883
    980884        if(thrd) {
     
    1024928void ^?{}(semaphore & this) {}
    1025929
    1026 bool P(semaphore & this) with( this ){
     930void P(semaphore & this) with( this ){
    1027931        lock( lock __cfaabi_dbg_ctx2 );
    1028932        count -= 1;
     
    1032936
    1033937                // atomically release spin lock and block
    1034                 unlock( lock );
    1035                 park( __cfaabi_dbg_ctx );
    1036                 return true;
     938                BlockInternal( &lock );
    1037939        }
    1038940        else {
    1039941            unlock( lock );
    1040             return false;
    1041         }
    1042 }
    1043 
    1044 bool V(semaphore & this) with( this ) {
    1045         $thread * thrd = 0p;
     942        }
     943}
     944
     945void V(semaphore & this) with( this ) {
     946        thread_desc * thrd = 0p;
    1046947        lock( lock __cfaabi_dbg_ctx2 );
    1047948        count += 1;
     
    1054955
    1055956        // make new owner
    1056         unpark( thrd __cfaabi_dbg_ctx2 );
    1057 
    1058         return thrd != 0p;
    1059 }
    1060 
    1061 bool V(semaphore & this, unsigned diff) with( this ) {
    1062         $thread * thrd = 0p;
    1063         lock( lock __cfaabi_dbg_ctx2 );
    1064         int release = max(-count, (int)diff);
    1065         count += diff;
    1066         for(release) {
    1067                 unpark( pop_head( waiting ) __cfaabi_dbg_ctx2 );
    1068         }
    1069 
    1070         unlock( lock );
    1071 
    1072         return thrd != 0p;
     957        WakeThread( thrd );
    1073958}
    1074959
     
    1087972}
    1088973
    1089 void doregister( cluster * cltr, $thread & thrd ) {
     974void doregister( cluster * cltr, thread_desc & thrd ) {
    1090975        lock      (cltr->thread_list_lock __cfaabi_dbg_ctx2);
    1091976        cltr->nthreads += 1;
     
    1094979}
    1095980
    1096 void unregister( cluster * cltr, $thread & thrd ) {
     981void unregister( cluster * cltr, thread_desc & thrd ) {
    1097982        lock  (cltr->thread_list_lock __cfaabi_dbg_ctx2);
    1098983        remove(cltr->threads, thrd );
     
    1105990__cfaabi_dbg_debug_do(
    1106991        extern "C" {
    1107                 void __cfaabi_dbg_record_lock(__spinlock_t & this, const char prev_name[]) {
     992                void __cfaabi_dbg_record(__spinlock_t & this, const char prev_name[]) {
    1108993                        this.prev_name = prev_name;
    1109994                        this.prev_thrd = kernelTLS.this_thread;
    1110995                }
    1111 
    1112                 void __cfaabi_dbg_record_thrd($thread & this, bool park, const char prev_name[]) {
    1113                         if(park) {
    1114                                 this.park_caller   = prev_name;
    1115                                 this.park_stale    = false;
    1116                         }
    1117                         else {
    1118                                 this.unpark_caller = prev_name;
    1119                                 this.unpark_stale  = false;
    1120                         }
    1121                 }
    1122996        }
    1123997)
     
    1125999//-----------------------------------------------------------------------------
    11261000// Debug
    1127 bool threading_enabled(void) __attribute__((const)) {
     1001bool threading_enabled(void) {
    11281002        return true;
    11291003}
  • libcfa/src/concurrency/kernel.hfa

    r6a490b2 rb7d6a36  
    1717
    1818#include <stdbool.h>
    19 #include <stdint.h>
    2019
    2120#include "invoke.h"
     
    3332        __spinlock_t lock;
    3433        int count;
    35         __queue_t($thread) waiting;
     34        __queue_t(thread_desc) waiting;
    3635};
    3736
    3837void  ?{}(semaphore & this, int count = 1);
    3938void ^?{}(semaphore & this);
    40 bool   P (semaphore & this);
    41 bool   V (semaphore & this);
    42 bool   V (semaphore & this, unsigned count);
     39void   P (semaphore & this);
     40void   V (semaphore & this);
    4341
    4442
     
    4644// Processor
    4745extern struct cluster * mainCluster;
     46
     47enum FinishOpCode { No_Action, Release, Schedule, Release_Schedule, Release_Multi, Release_Multi_Schedule, Callback };
     48
     49typedef void (*__finish_callback_fptr_t)(void);
     50
     51//TODO use union, many of these fields are mutually exclusive (i.e. MULTI vs NOMULTI)
     52struct FinishAction {
     53        FinishOpCode action_code;
     54        /*
     55        // Union of possible actions
     56        union {
     57                // Option 1 : locks and threads
     58                struct {
     59                        // 1 thread or N thread
     60                        union {
     61                                thread_desc * thrd;
     62                                struct {
     63                                        thread_desc ** thrds;
     64                                        unsigned short thrd_count;
     65                                };
     66                        };
     67                        // 1 lock or N lock
     68                        union {
     69                                __spinlock_t * lock;
     70                                struct {
     71                                        __spinlock_t ** locks;
     72                                        unsigned short lock_count;
     73                                };
     74                        };
     75                };
     76                // Option 2 : action pointer
     77                __finish_callback_fptr_t callback;
     78        };
     79        /*/
     80        thread_desc * thrd;
     81        thread_desc ** thrds;
     82        unsigned short thrd_count;
     83        __spinlock_t * lock;
     84        __spinlock_t ** locks;
     85        unsigned short lock_count;
     86        __finish_callback_fptr_t callback;
     87        //*/
     88};
     89static inline void ?{}(FinishAction & this) {
     90        this.action_code = No_Action;
     91        this.thrd = 0p;
     92        this.lock = 0p;
     93}
     94static inline void ^?{}(FinishAction &) {}
    4895
    4996// Processor
     
    70117        // RunThread data
    71118        // Action to do after a thread is ran
    72         $thread * destroyer;
     119        struct FinishAction finish;
    73120
    74121        // Preemption data
     
    79126        bool pending_preemption;
    80127
    81         // Idle lock (kernel semaphore)
    82         __bin_sem_t idle;
     128        // Idle lock
     129        __bin_sem_t idleLock;
    83130
    84131        // Termination
     
    86133        volatile bool do_terminate;
    87134
    88         // Termination synchronisation (user semaphore)
     135        // Termination synchronisation
    89136        semaphore terminated;
    90137
     
    111158static inline void  ?{}(processor & this, const char name[]) { this{name, *mainCluster }; }
    112159
    113 static inline [processor *&, processor *& ] __get( processor & this ) __attribute__((const)) { return this.node.[next, prev]; }
    114 
    115 //-----------------------------------------------------------------------------
    116 // I/O
    117 struct __io_data;
    118 
    119 #define CFA_CLUSTER_IO_POLLER_USER_THREAD 1 << 0
    120 // #define CFA_CLUSTER_IO_POLLER_KERNEL_SIDE 1 << 1
     160static inline [processor *&, processor *& ] __get( processor & this ) {
     161        return this.node.[next, prev];
     162}
    121163
    122164
     
    291333        // List of threads
    292334        __spinlock_t thread_list_lock;
    293         __dllist_t(struct $thread) threads;
     335        __dllist_t(struct thread_desc) threads;
    294336        unsigned int nthreads;
    295337
     
    299341                cluster * prev;
    300342        } node;
    301 
    302         struct __io_data * io;
    303 
    304         #if !defined(__CFA_NO_STATISTICS__)
    305                 bool print_stats;
    306         #endif
    307343};
    308344extern Duration default_preemption();
    309345
    310 void ?{} (cluster & this, const char name[], Duration preemption_rate, int flags);
     346void ?{} (cluster & this, const char name[], Duration preemption_rate);
    311347void ^?{}(cluster & this);
    312348
    313 static inline void ?{} (cluster & this)                                      { this{"Anonymous Cluster", default_preemption(), 0}; }
    314 static inline void ?{} (cluster & this, Duration preemption_rate)            { this{"Anonymous Cluster", preemption_rate, 0}; }
    315 static inline void ?{} (cluster & this, const char name[])                   { this{name, default_preemption(), 0}; }
    316 static inline void ?{} (cluster & this, int flags)                           { this{"Anonymous Cluster", default_preemption(), flags}; }
    317 static inline void ?{} (cluster & this, Duration preemption_rate, int flags) { this{"Anonymous Cluster", preemption_rate, flags}; }
    318 static inline void ?{} (cluster & this, const char name[], int flags)        { this{name, default_preemption(), flags}; }
    319 
    320 static inline [cluster *&, cluster *& ] __get( cluster & this ) __attribute__((const)) { return this.node.[next, prev]; }
     349static inline void ?{} (cluster & this)                           { this{"Anonymous Cluster", default_preemption()}; }
     350static inline void ?{} (cluster & this, Duration preemption_rate) { this{"Anonymous Cluster", preemption_rate}; }
     351static inline void ?{} (cluster & this, const char name[])        { this{name, default_preemption()}; }
     352
     353static inline [cluster *&, cluster *& ] __get( cluster & this ) {
     354        return this.node.[next, prev];
     355}
    321356
    322357static inline struct processor * active_processor() { return TL_GET( this_processor ); } // UNSAFE
    323358static inline struct cluster   * active_cluster  () { return TL_GET( this_processor )->cltr; }
    324 
    325 #if !defined(__CFA_NO_STATISTICS__)
    326         static inline void print_stats_at_exit( cluster & this ) {
    327                 this.print_stats = true;
    328         }
    329 #endif
    330359
    331360// Local Variables: //
  • libcfa/src/concurrency/kernel_private.hfa

    r6a490b2 rb7d6a36  
    3131}
    3232
    33 void __schedule_thread( $thread * ) __attribute__((nonnull (1)));
     33void ScheduleThread( thread_desc * );
     34static inline void WakeThread( thread_desc * thrd ) {
     35        if( !thrd ) return;
     36
     37        verify(thrd->state == Inactive);
     38
     39        disable_interrupts();
     40        ScheduleThread( thrd );
     41        enable_interrupts( __cfaabi_dbg_ctx );
     42}
     43thread_desc * nextThread(cluster * this);
    3444
    3545//Block current thread and release/wake-up the following resources
    36 void __leave_thread() __attribute__((noreturn));
     46void BlockInternal(void);
     47void BlockInternal(__spinlock_t * lock);
     48void BlockInternal(thread_desc * thrd);
     49void BlockInternal(__spinlock_t * lock, thread_desc * thrd);
     50void BlockInternal(__spinlock_t * locks [], unsigned short count);
     51void BlockInternal(__spinlock_t * locks [], unsigned short count, thread_desc * thrds [], unsigned short thrd_count);
     52void BlockInternal(__finish_callback_fptr_t callback);
     53void LeaveThread(__spinlock_t * lock, thread_desc * thrd);
    3754
    3855//-----------------------------------------------------------------------------
     
    4057void main(processorCtx_t *);
    4158
    42 void * __create_pthread( pthread_t *, void * (*)(void *), void * );
    43 
    44 
     59void * create_pthread( pthread_t *, void * (*)(void *), void * );
     60
     61static inline void wake_fast(processor * this) {
     62        __cfaabi_dbg_print_safe("Kernel : Waking up processor %p\n", this);
     63        post( this->idleLock );
     64}
     65
     66static inline void wake(processor * this) {
     67        disable_interrupts();
     68        wake_fast(this);
     69        enable_interrupts( __cfaabi_dbg_ctx );
     70}
    4571
    4672struct event_kernel_t {
     
    5985extern volatile thread_local __cfa_kernel_preemption_state_t preemption_state __attribute__ ((tls_model ( "initial-exec" )));
    6086
    61 extern cluster * mainCluster;
    62 
    6387//-----------------------------------------------------------------------------
    6488// Threads
    6589extern "C" {
    66       void __cfactx_invoke_thread(void (*main)(void *), void * this);
    67 }
     90      void CtxInvokeThread(void (*main)(void *), void * this);
     91}
     92
     93extern void ThreadCtxSwitch(coroutine_desc * src, coroutine_desc * dst);
    6894
    6995__cfaabi_dbg_debug_do(
    70         extern void __cfaabi_dbg_thread_register  ( $thread * thrd );
    71         extern void __cfaabi_dbg_thread_unregister( $thread * thrd );
     96        extern void __cfaabi_dbg_thread_register  ( thread_desc * thrd );
     97        extern void __cfaabi_dbg_thread_unregister( thread_desc * thrd );
    7298)
    73 
    74 // KERNEL ONLY unpark with out disabling interrupts
    75 void __unpark( $thread * thrd __cfaabi_dbg_ctx_param2 );
    76 
    77 //-----------------------------------------------------------------------------
    78 // I/O
    79 void __kernel_io_startup     ( cluster &, int, bool );
    80 void __kernel_io_finish_start( cluster & );
    81 void __kernel_io_prepare_stop( cluster & );
    82 void __kernel_io_shutdown    ( cluster &, bool );
    8399
    84100//-----------------------------------------------------------------------------
     
    86102#define KERNEL_STORAGE(T,X) __attribute((aligned(__alignof__(T)))) static char storage_##X[sizeof(T)]
    87103
    88 static inline uint32_t __tls_rand() {
     104static inline uint32_t tls_rand() {
    89105        kernelTLS.rand_seed ^= kernelTLS.rand_seed << 6;
    90106        kernelTLS.rand_seed ^= kernelTLS.rand_seed >> 21;
     
    97113void unregister( struct cluster & cltr );
    98114
    99 void doregister( struct cluster * cltr, struct $thread & thrd );
    100 void unregister( struct cluster * cltr, struct $thread & thrd );
     115void doregister( struct cluster * cltr, struct thread_desc & thrd );
     116void unregister( struct cluster * cltr, struct thread_desc & thrd );
    101117
    102118//=======================================================================
  • libcfa/src/concurrency/monitor.cfa

    r6a490b2 rb7d6a36  
    55// file "LICENCE" distributed with Cforall.
    66//
    7 // $monitor.c --
     7// monitor_desc.c --
    88//
    99// Author           : Thierry Delisle
     
    2727//-----------------------------------------------------------------------------
    2828// Forward declarations
    29 static inline void __set_owner ( $monitor * this, $thread * owner );
    30 static inline void __set_owner ( $monitor * storage [], __lock_size_t count, $thread * owner );
    31 static inline void set_mask  ( $monitor * storage [], __lock_size_t count, const __waitfor_mask_t & mask );
    32 static inline void reset_mask( $monitor * this );
    33 
    34 static inline $thread * next_thread( $monitor * this );
    35 static inline bool is_accepted( $monitor * this, const __monitor_group_t & monitors );
     29static inline void set_owner ( monitor_desc * this, thread_desc * owner );
     30static inline void set_owner ( monitor_desc * storage [], __lock_size_t count, thread_desc * owner );
     31static inline void set_mask  ( monitor_desc * storage [], __lock_size_t count, const __waitfor_mask_t & mask );
     32static inline void reset_mask( monitor_desc * this );
     33
     34static inline thread_desc * next_thread( monitor_desc * this );
     35static inline bool is_accepted( monitor_desc * this, const __monitor_group_t & monitors );
    3636
    3737static inline void lock_all  ( __spinlock_t * locks [], __lock_size_t count );
    38 static inline void lock_all  ( $monitor * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count );
     38static inline void lock_all  ( monitor_desc * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count );
    3939static inline void unlock_all( __spinlock_t * locks [], __lock_size_t count );
    40 static inline void unlock_all( $monitor * locks [], __lock_size_t count );
    41 
    42 static inline void save   ( $monitor * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*out*/ recursions [], __waitfor_mask_t /*out*/ masks [] );
    43 static inline void restore( $monitor * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*in */ recursions [], __waitfor_mask_t /*in */ masks [] );
    44 
    45 static inline void init     ( __lock_size_t count, $monitor * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] );
    46 static inline void init_push( __lock_size_t count, $monitor * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] );
    47 
    48 static inline $thread *        check_condition   ( __condition_criterion_t * );
     40static inline void unlock_all( monitor_desc * locks [], __lock_size_t count );
     41
     42static inline void save   ( monitor_desc * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*out*/ recursions [], __waitfor_mask_t /*out*/ masks [] );
     43static inline void restore( monitor_desc * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*in */ recursions [], __waitfor_mask_t /*in */ masks [] );
     44
     45static inline void init     ( __lock_size_t count, monitor_desc * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] );
     46static inline void init_push( __lock_size_t count, monitor_desc * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] );
     47
     48static inline thread_desc *        check_condition   ( __condition_criterion_t * );
    4949static inline void                 brand_condition   ( condition & );
    50 static inline [$thread *, int] search_entry_queue( const __waitfor_mask_t &, $monitor * monitors [], __lock_size_t count );
     50static inline [thread_desc *, int] search_entry_queue( const __waitfor_mask_t &, monitor_desc * monitors [], __lock_size_t count );
    5151
    5252forall(dtype T | sized( T ))
    5353static inline __lock_size_t insert_unique( T * array [], __lock_size_t & size, T * val );
    5454static inline __lock_size_t count_max    ( const __waitfor_mask_t & mask );
    55 static inline __lock_size_t aggregate    ( $monitor * storage [], const __waitfor_mask_t & mask );
     55static inline __lock_size_t aggregate    ( monitor_desc * storage [], const __waitfor_mask_t & mask );
    5656
    5757//-----------------------------------------------------------------------------
     
    6868
    6969#define monitor_ctx( mons, cnt )                                /* Define that create the necessary struct for internal/external scheduling operations */ \
    70         $monitor ** monitors = mons;                          /* Save the targeted monitors                                                          */ \
     70        monitor_desc ** monitors = mons;                          /* Save the targeted monitors                                                          */ \
    7171        __lock_size_t count = cnt;                                /* Save the count to a local variable                                                  */ \
    7272        unsigned int recursions[ count ];                         /* Save the current recursion levels to restore them later                             */ \
     
    8080//-----------------------------------------------------------------------------
    8181// Enter/Leave routines
    82 // Enter single monitor
    83 static void __enter( $monitor * this, const __monitor_group_t & group ) {
    84         // Lock the monitor spinlock
    85         lock( this->lock __cfaabi_dbg_ctx2 );
    86         // Interrupts disable inside critical section
    87         $thread * thrd = kernelTLS.this_thread;
    88 
    89         __cfaabi_dbg_print_safe( "Kernel : %10p Entering mon %p (%p)\n", thrd, this, this->owner);
    90 
    91         if( !this->owner ) {
    92                 // No one has the monitor, just take it
    93                 __set_owner( this, thrd );
    94 
    95                 __cfaabi_dbg_print_safe( "Kernel :  mon is free \n" );
    96         }
    97         else if( this->owner == thrd) {
    98                 // We already have the monitor, just note how many times we took it
    99                 this->recursion += 1;
    100 
    101                 __cfaabi_dbg_print_safe( "Kernel :  mon already owned \n" );
    102         }
    103         else if( is_accepted( this, group) ) {
    104                 // Some one was waiting for us, enter
    105                 __set_owner( this, thrd );
    106 
    107                 // Reset mask
    108                 reset_mask( this );
    109 
    110                 __cfaabi_dbg_print_safe( "Kernel :  mon accepts \n" );
    111         }
    112         else {
    113                 __cfaabi_dbg_print_safe( "Kernel :  blocking \n" );
    114 
    115                 // Some one else has the monitor, wait in line for it
    116                 /* paranoid */ verify( thrd->next == 0p );
    117                 append( this->entry_queue, thrd );
    118                 /* paranoid */ verify( thrd->next == 1p );
    119 
    120                 unlock( this->lock );
    121                 park( __cfaabi_dbg_ctx );
     82
     83
     84extern "C" {
     85        // Enter single monitor
     86        static void __enter_monitor_desc( monitor_desc * this, const __monitor_group_t & group ) {
     87                // Lock the monitor spinlock
     88                lock( this->lock __cfaabi_dbg_ctx2 );
     89                // Interrupts disable inside critical section
     90                thread_desc * thrd = kernelTLS.this_thread;
     91
     92                __cfaabi_dbg_print_safe( "Kernel : %10p Entering mon %p (%p)\n", thrd, this, this->owner);
     93
     94                if( !this->owner ) {
     95                        // No one has the monitor, just take it
     96                        set_owner( this, thrd );
     97
     98                        __cfaabi_dbg_print_safe( "Kernel :  mon is free \n" );
     99                }
     100                else if( this->owner == thrd) {
     101                        // We already have the monitor, just note how many times we took it
     102                        this->recursion += 1;
     103
     104                        __cfaabi_dbg_print_safe( "Kernel :  mon already owned \n" );
     105                }
     106                else if( is_accepted( this, group) ) {
     107                        // Some one was waiting for us, enter
     108                        set_owner( this, thrd );
     109
     110                        // Reset mask
     111                        reset_mask( this );
     112
     113                        __cfaabi_dbg_print_safe( "Kernel :  mon accepts \n" );
     114                }
     115                else {
     116                        __cfaabi_dbg_print_safe( "Kernel :  blocking \n" );
     117
     118                        // Some one else has the monitor, wait in line for it
     119                        append( this->entry_queue, thrd );
     120
     121                        BlockInternal( &this->lock );
     122
     123                        __cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
     124
     125                        // BlockInternal will unlock spinlock, no need to unlock ourselves
     126                        return;
     127                }
    122128
    123129                __cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
    124130
    125                 /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
    126                 return;
    127         }
    128 
    129         __cfaabi_dbg_print_safe( "Kernel : %10p Entered  mon %p\n", thrd, this);
    130 
    131         /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
    132         /* paranoid */ verify( this->lock.lock );
    133 
    134         // Release the lock and leave
    135         unlock( this->lock );
    136         return;
    137 }
    138 
    139 static void __dtor_enter( $monitor * this, fptr_t func ) {
    140         // Lock the monitor spinlock
    141         lock( this->lock __cfaabi_dbg_ctx2 );
    142         // Interrupts disable inside critical section
    143         $thread * thrd = kernelTLS.this_thread;
    144 
    145         __cfaabi_dbg_print_safe( "Kernel : %10p Entering dtor for mon %p (%p)\n", thrd, this, this->owner);
    146 
    147 
    148         if( !this->owner ) {
    149                 __cfaabi_dbg_print_safe( "Kernel : Destroying free mon %p\n", this);
    150 
    151                 // No one has the monitor, just take it
    152                 __set_owner( this, thrd );
    153 
    154                 verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
    155 
     131                // Release the lock and leave
    156132                unlock( this->lock );
    157133                return;
    158134        }
    159         else if( this->owner == thrd) {
    160                 // We already have the monitor... but where about to destroy it so the nesting will fail
    161                 // Abort!
    162                 abort( "Attempt to destroy monitor %p by thread \"%.256s\" (%p) in nested mutex.", this, thrd->self_cor.name, thrd );
    163         }
    164 
    165         __lock_size_t count = 1;
    166         $monitor ** monitors = &this;
    167         __monitor_group_t group = { &this, 1, func };
    168         if( is_accepted( this, group) ) {
    169                 __cfaabi_dbg_print_safe( "Kernel :  mon accepts dtor, block and signal it \n" );
    170 
    171                 // Wake the thread that is waiting for this
    172                 __condition_criterion_t * urgent = pop( this->signal_stack );
    173                 /* paranoid */ verify( urgent );
    174 
    175                 // Reset mask
    176                 reset_mask( this );
    177 
    178                 // Create the node specific to this wait operation
    179                 wait_ctx_primed( thrd, 0 )
    180 
    181                 // Some one else has the monitor, wait for him to finish and then run
     135
     136        static void __enter_monitor_dtor( monitor_desc * this, fptr_t func ) {
     137                // Lock the monitor spinlock
     138                lock( this->lock __cfaabi_dbg_ctx2 );
     139                // Interrupts disable inside critical section
     140                thread_desc * thrd = kernelTLS.this_thread;
     141
     142                __cfaabi_dbg_print_safe( "Kernel : %10p Entering dtor for mon %p (%p)\n", thrd, this, this->owner);
     143
     144
     145                if( !this->owner ) {
     146                        __cfaabi_dbg_print_safe( "Kernel : Destroying free mon %p\n", this);
     147
     148                        // No one has the monitor, just take it
     149                        set_owner( this, thrd );
     150
     151                        unlock( this->lock );
     152                        return;
     153                }
     154                else if( this->owner == thrd) {
     155                        // We already have the monitor... but where about to destroy it so the nesting will fail
     156                        // Abort!
     157                        abort( "Attempt to destroy monitor %p by thread \"%.256s\" (%p) in nested mutex.", this, thrd->self_cor.name, thrd );
     158                }
     159
     160                __lock_size_t count = 1;
     161                monitor_desc ** monitors = &this;
     162                __monitor_group_t group = { &this, 1, func };
     163                if( is_accepted( this, group) ) {
     164                        __cfaabi_dbg_print_safe( "Kernel :  mon accepts dtor, block and signal it \n" );
     165
     166                        // Wake the thread that is waiting for this
     167                        __condition_criterion_t * urgent = pop( this->signal_stack );
     168                        verify( urgent );
     169
     170                        // Reset mask
     171                        reset_mask( this );
     172
     173                        // Create the node specific to this wait operation
     174                        wait_ctx_primed( thrd, 0 )
     175
     176                        // Some one else has the monitor, wait for him to finish and then run
     177                        BlockInternal( &this->lock, urgent->owner->waiting_thread );
     178
     179                        // Some one was waiting for us, enter
     180                        set_owner( this, thrd );
     181                }
     182                else {
     183                        __cfaabi_dbg_print_safe( "Kernel :  blocking \n" );
     184
     185                        wait_ctx( thrd, 0 )
     186                        this->dtor_node = &waiter;
     187
     188                        // Some one else has the monitor, wait in line for it
     189                        append( this->entry_queue, thrd );
     190                        BlockInternal( &this->lock );
     191
     192                        // BlockInternal will unlock spinlock, no need to unlock ourselves
     193                        return;
     194                }
     195
     196                __cfaabi_dbg_print_safe( "Kernel : Destroying %p\n", this);
     197
     198        }
     199
     200        // Leave single monitor
     201        void __leave_monitor_desc( monitor_desc * this ) {
     202                // Lock the monitor spinlock
     203                lock( this->lock __cfaabi_dbg_ctx2 );
     204
     205                __cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", kernelTLS.this_thread, this, this->owner);
     206
     207                verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
     208
     209                // Leaving a recursion level, decrement the counter
     210                this->recursion -= 1;
     211
     212                // If we haven't left the last level of recursion
     213                // it means we don't need to do anything
     214                if( this->recursion != 0) {
     215                        __cfaabi_dbg_print_safe( "Kernel :  recursion still %d\n", this->recursion);
     216                        unlock( this->lock );
     217                        return;
     218                }
     219
     220                // Get the next thread, will be null on low contention monitor
     221                thread_desc * new_owner = next_thread( this );
     222
     223                // We can now let other threads in safely
    182224                unlock( this->lock );
    183225
    184                 // Release the next thread
    185                 /* paranoid */ verifyf( urgent->owner->waiting_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
    186                 unpark( urgent->owner->waiting_thread __cfaabi_dbg_ctx2 );
    187 
    188                 // Park current thread waiting
    189                 park( __cfaabi_dbg_ctx );
    190 
    191                 // Some one was waiting for us, enter
    192                 /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
    193         }
    194         else {
    195                 __cfaabi_dbg_print_safe( "Kernel :  blocking \n" );
    196 
    197                 wait_ctx( thrd, 0 )
    198                 this->dtor_node = &waiter;
    199 
    200                 // Some one else has the monitor, wait in line for it
    201                 /* paranoid */ verify( thrd->next == 0p );
    202                 append( this->entry_queue, thrd );
    203                 /* paranoid */ verify( thrd->next == 1p );
    204                 unlock( this->lock );
    205 
    206                 // Park current thread waiting
    207                 park( __cfaabi_dbg_ctx );
    208 
    209                 /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
    210                 return;
    211         }
    212 
    213         __cfaabi_dbg_print_safe( "Kernel : Destroying %p\n", this);
    214 
    215 }
    216 
    217 // Leave single monitor
    218 void __leave( $monitor * this ) {
    219         // Lock the monitor spinlock
    220         lock( this->lock __cfaabi_dbg_ctx2 );
    221 
    222         __cfaabi_dbg_print_safe( "Kernel : %10p Leaving mon %p (%p)\n", kernelTLS.this_thread, this, this->owner);
    223 
    224         /* paranoid */ verifyf( kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
    225 
    226         // Leaving a recursion level, decrement the counter
    227         this->recursion -= 1;
    228 
    229         // If we haven't left the last level of recursion
    230         // it means we don't need to do anything
    231         if( this->recursion != 0) {
    232                 __cfaabi_dbg_print_safe( "Kernel :  recursion still %d\n", this->recursion);
    233                 unlock( this->lock );
    234                 return;
    235         }
    236 
    237         // Get the next thread, will be null on low contention monitor
    238         $thread * new_owner = next_thread( this );
    239 
    240         // Check the new owner is consistent with who we wake-up
    241         // new_owner might be null even if someone owns the monitor when the owner is still waiting for another monitor
    242         /* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
    243 
    244         // We can now let other threads in safely
    245         unlock( this->lock );
    246 
    247         //We need to wake-up the thread
    248         /* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
    249         unpark( new_owner __cfaabi_dbg_ctx2 );
    250 }
    251 
    252 // Leave single monitor for the last time
    253 void __dtor_leave( $monitor * this ) {
    254         __cfaabi_dbg_debug_do(
    255                 if( TL_GET( this_thread ) != this->owner ) {
    256                         abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, TL_GET( this_thread ), this->owner);
    257                 }
    258                 if( this->recursion != 1 ) {
    259                         abort( "Destroyed monitor %p has %d outstanding nested calls.\n", this, this->recursion - 1);
    260                 }
    261         )
    262 }
    263 
    264 extern "C" {
     226                //We need to wake-up the thread
     227                WakeThread( new_owner );
     228        }
     229
     230        // Leave single monitor for the last time
     231        void __leave_dtor_monitor_desc( monitor_desc * this ) {
     232                __cfaabi_dbg_debug_do(
     233                        if( TL_GET( this_thread ) != this->owner ) {
     234                                abort( "Destroyed monitor %p has inconsistent owner, expected %p got %p.\n", this, TL_GET( this_thread ), this->owner);
     235                        }
     236                        if( this->recursion != 1 ) {
     237                                abort( "Destroyed monitor %p has %d outstanding nested calls.\n", this, this->recursion - 1);
     238                        }
     239                )
     240        }
     241
    265242        // Leave the thread monitor
    266243        // last routine called by a thread.
    267244        // Should never return
    268         void __cfactx_thrd_leave() {
    269                 $thread * thrd = TL_GET( this_thread );
    270                 $monitor * this = &thrd->self_mon;
     245        void __leave_thread_monitor() {
     246                thread_desc * thrd = TL_GET( this_thread );
     247                monitor_desc * this = &thrd->self_mon;
    271248
    272249                // Lock the monitor now
     
    275252                disable_interrupts();
    276253
    277                 thrd->state = Halted;
    278 
    279                 /* paranoid */ verifyf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", thrd, this->owner, this->recursion, this );
     254                thrd->self_cor.state = Halted;
     255
     256                verifyf( thrd == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", thrd, this->owner, this->recursion, this );
    280257
    281258                // Leaving a recursion level, decrement the counter
     
    287264
    288265                // Fetch the next thread, can be null
    289                 $thread * new_owner = next_thread( this );
    290 
    291                 // Release the monitor lock
    292                 unlock( this->lock );
    293 
    294                 // Unpark the next owner if needed
    295                 /* paranoid */ verifyf( !new_owner || new_owner == this->owner, "Expected owner to be %p, got %p (m: %p)", new_owner, this->owner, this );
    296                 /* paranoid */ verify( ! kernelTLS.preemption_state.enabled );
    297                 /* paranoid */ verify( ! kernelTLS.this_processor->destroyer );
    298                 /* paranoid */ verify( thrd->state == Halted );
    299 
    300                 kernelTLS.this_processor->destroyer = new_owner;
    301 
    302                 // Leave the thread
    303                 __leave_thread();
     266                thread_desc * new_owner = next_thread( this );
     267
     268                // Leave the thread, this will unlock the spinlock
     269                // Use leave thread instead of BlockInternal which is
     270                // specialized for this case and supports null new_owner
     271                LeaveThread( &this->lock, new_owner );
    304272
    305273                // Control flow should never reach here!
     
    311279static inline void enter( __monitor_group_t monitors ) {
    312280        for( __lock_size_t i = 0; i < monitors.size; i++) {
    313                 __enter( monitors[i], monitors );
     281                __enter_monitor_desc( monitors[i], monitors );
    314282        }
    315283}
     
    317285// Leave multiple monitor
    318286// relies on the monitor array being sorted
    319 static inline void leave($monitor * monitors [], __lock_size_t count) {
     287static inline void leave(monitor_desc * monitors [], __lock_size_t count) {
    320288        for( __lock_size_t i = count - 1; i >= 0; i--) {
    321                 __leave( monitors[i] );
     289                __leave_monitor_desc( monitors[i] );
    322290        }
    323291}
     
    325293// Ctor for monitor guard
    326294// Sorts monitors before entering
    327 void ?{}( monitor_guard_t & this, $monitor * m [], __lock_size_t count, fptr_t func ) {
    328         $thread * thrd = TL_GET( this_thread );
     295void ?{}( monitor_guard_t & this, monitor_desc * m [], __lock_size_t count, fptr_t func ) {
     296        thread_desc * thrd = TL_GET( this_thread );
    329297
    330298        // Store current array
     
    366334// Ctor for monitor guard
    367335// Sorts monitors before entering
    368 void ?{}( monitor_dtor_guard_t & this, $monitor * m [], fptr_t func ) {
     336void ?{}( monitor_dtor_guard_t & this, monitor_desc * m [], fptr_t func ) {
    369337        // optimization
    370         $thread * thrd = TL_GET( this_thread );
     338        thread_desc * thrd = TL_GET( this_thread );
    371339
    372340        // Store current array
     
    379347        (thrd->monitors){m, 1, func};
    380348
    381         __dtor_enter( this.m, func );
     349        __enter_monitor_dtor( this.m, func );
    382350}
    383351
     
    385353void ^?{}( monitor_dtor_guard_t & this ) {
    386354        // Leave the monitors in order
    387         __dtor_leave( this.m );
     355        __leave_dtor_monitor_desc( this.m );
    388356
    389357        // Restore thread context
     
    393361//-----------------------------------------------------------------------------
    394362// Internal scheduling types
    395 void ?{}(__condition_node_t & this, $thread * waiting_thread, __lock_size_t count, uintptr_t user_info ) {
     363void ?{}(__condition_node_t & this, thread_desc * waiting_thread, __lock_size_t count, uintptr_t user_info ) {
    396364        this.waiting_thread = waiting_thread;
    397365        this.count = count;
     
    407375}
    408376
    409 void ?{}(__condition_criterion_t & this, $monitor * target, __condition_node_t & owner ) {
     377void ?{}(__condition_criterion_t & this, monitor_desc * target, __condition_node_t & owner ) {
    410378        this.ready  = false;
    411379        this.target = target;
     
    432400        // Append the current wait operation to the ones already queued on the condition
    433401        // We don't need locks for that since conditions must always be waited on inside monitor mutual exclusion
    434         /* paranoid */ verify( waiter.next == 0p );
    435402        append( this.blocked, &waiter );
    436         /* paranoid */ verify( waiter.next == 1p );
    437403
    438404        // Lock all monitors (aggregates the locks as well)
     
    441407        // Find the next thread(s) to run
    442408        __lock_size_t thread_count = 0;
    443         $thread * threads[ count ];
     409        thread_desc * threads[ count ];
    444410        __builtin_memset( threads, 0, sizeof( threads ) );
    445411
     
    449415        // Remove any duplicate threads
    450416        for( __lock_size_t i = 0; i < count; i++) {
    451                 $thread * new_owner = next_thread( monitors[i] );
     417                thread_desc * new_owner = next_thread( monitors[i] );
    452418                insert_unique( threads, thread_count, new_owner );
    453419        }
    454420
    455         // Unlock the locks, we don't need them anymore
    456         for(int i = 0; i < count; i++) {
    457                 unlock( *locks[i] );
    458         }
    459 
    460         // Wake the threads
    461         for(int i = 0; i < thread_count; i++) {
    462                 unpark( threads[i] __cfaabi_dbg_ctx2 );
    463         }
    464 
    465421        // Everything is ready to go to sleep
    466         park( __cfaabi_dbg_ctx );
     422        BlockInternal( locks, count, threads, thread_count );
    467423
    468424        // We are back, restore the owners and recursions
     
    479435        //Some more checking in debug
    480436        __cfaabi_dbg_debug_do(
    481                 $thread * this_thrd = TL_GET( this_thread );
     437                thread_desc * this_thrd = TL_GET( this_thread );
    482438                if ( this.monitor_count != this_thrd->monitors.size ) {
    483439                        abort( "Signal on condition %p made with different number of monitor(s), expected %zi got %zi", &this, this.monitor_count, this_thrd->monitors.size );
     
    533489
    534490        //Find the thread to run
    535         $thread * signallee = pop_head( this.blocked )->waiting_thread;
    536         __set_owner( monitors, count, signallee );
     491        thread_desc * signallee = pop_head( this.blocked )->waiting_thread;
     492        set_owner( monitors, count, signallee );
    537493
    538494        __cfaabi_dbg_print_buffer_decl( "Kernel : signal_block condition %p (s: %p)\n", &this, signallee );
    539495
    540         // unlock all the monitors
    541         unlock_all( locks, count );
    542 
    543         // unpark the thread we signalled
    544         unpark( signallee __cfaabi_dbg_ctx2 );
    545 
    546496        //Everything is ready to go to sleep
    547         park( __cfaabi_dbg_ctx );
     497        BlockInternal( locks, count, &signallee, 1 );
    548498
    549499
     
    586536        // Create one!
    587537        __lock_size_t max = count_max( mask );
    588         $monitor * mon_storage[max];
     538        monitor_desc * mon_storage[max];
    589539        __builtin_memset( mon_storage, 0, sizeof( mon_storage ) );
    590540        __lock_size_t actual_count = aggregate( mon_storage, mask );
     
    604554        {
    605555                // Check if the entry queue
    606                 $thread * next; int index;
     556                thread_desc * next; int index;
    607557                [next, index] = search_entry_queue( mask, monitors, count );
    608558
     
    614564                                verifyf( accepted.size == 1,  "ERROR: Accepted dtor has more than 1 mutex parameter." );
    615565
    616                                 $monitor * mon2dtor = accepted[0];
     566                                monitor_desc * mon2dtor = accepted[0];
    617567                                verifyf( mon2dtor->dtor_node, "ERROR: Accepted monitor has no dtor_node." );
    618568
     
    640590
    641591                                // Set the owners to be the next thread
    642                                 __set_owner( monitors, count, next );
    643 
    644                                 // unlock all the monitors
    645                                 unlock_all( locks, count );
    646 
    647                                 // unpark the thread we signalled
    648                                 unpark( next __cfaabi_dbg_ctx2 );
    649 
    650                                 //Everything is ready to go to sleep
    651                                 park( __cfaabi_dbg_ctx );
     592                                set_owner( monitors, count, next );
     593
     594                                // Everything is ready to go to sleep
     595                                BlockInternal( locks, count, &next, 1 );
    652596
    653597                                // We are back, restore the owners and recursions
     
    687631        }
    688632
    689         // unlock all the monitors
    690         unlock_all( locks, count );
    691 
    692633        //Everything is ready to go to sleep
    693         park( __cfaabi_dbg_ctx );
     634        BlockInternal( locks, count );
    694635
    695636
     
    708649// Utilities
    709650
    710 static inline void __set_owner( $monitor * this, $thread * owner ) {
    711         /* paranoid */ verify( this->lock.lock );
     651static inline void set_owner( monitor_desc * this, thread_desc * owner ) {
     652        // __cfaabi_dbg_print_safe( "Kernal :   Setting owner of %p to %p ( was %p)\n", this, owner, this->owner );
    712653
    713654        //Pass the monitor appropriately
     
    718659}
    719660
    720 static inline void __set_owner( $monitor * monitors [], __lock_size_t count, $thread * owner ) {
    721         /* paranoid */ verify ( monitors[0]->lock.lock );
    722         /* paranoid */ verifyf( monitors[0]->owner == kernelTLS.this_thread, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, monitors[0]->owner, monitors[0]->recursion, monitors[0] );
    723         monitors[0]->owner        = owner;
    724         monitors[0]->recursion    = 1;
     661static inline void set_owner( monitor_desc * monitors [], __lock_size_t count, thread_desc * owner ) {
     662        monitors[0]->owner     = owner;
     663        monitors[0]->recursion = 1;
    725664        for( __lock_size_t i = 1; i < count; i++ ) {
    726                 /* paranoid */ verify ( monitors[i]->lock.lock );
    727                 /* paranoid */ verifyf( monitors[i]->owner == kernelTLS.this_thread, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, monitors[i]->owner, monitors[i]->recursion, monitors[i] );
    728                 monitors[i]->owner        = owner;
    729                 monitors[i]->recursion    = 0;
    730         }
    731 }
    732 
    733 static inline void set_mask( $monitor * storage [], __lock_size_t count, const __waitfor_mask_t & mask ) {
     665                monitors[i]->owner     = owner;
     666                monitors[i]->recursion = 0;
     667        }
     668}
     669
     670static inline void set_mask( monitor_desc * storage [], __lock_size_t count, const __waitfor_mask_t & mask ) {
    734671        for( __lock_size_t i = 0; i < count; i++) {
    735672                storage[i]->mask = mask;
     
    737674}
    738675
    739 static inline void reset_mask( $monitor * this ) {
     676static inline void reset_mask( monitor_desc * this ) {
    740677        this->mask.accepted = 0p;
    741678        this->mask.data = 0p;
     
    743680}
    744681
    745 static inline $thread * next_thread( $monitor * this ) {
     682static inline thread_desc * next_thread( monitor_desc * this ) {
    746683        //Check the signaller stack
    747684        __cfaabi_dbg_print_safe( "Kernel :  mon %p AS-stack top %p\n", this, this->signal_stack.top);
     
    751688                //regardless of if we are ready to baton pass,
    752689                //we need to set the monitor as in use
    753                 /* paranoid */ verifyf( !this->owner || kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
    754                 __set_owner( this,  urgent->owner->waiting_thread );
     690                set_owner( this,  urgent->owner->waiting_thread );
    755691
    756692                return check_condition( urgent );
     
    759695        // No signaller thread
    760696        // Get the next thread in the entry_queue
    761         $thread * new_owner = pop_head( this->entry_queue );
    762         /* paranoid */ verifyf( !this->owner || kernelTLS.this_thread == this->owner, "Expected owner to be %p, got %p (r: %i, m: %p)", kernelTLS.this_thread, this->owner, this->recursion, this );
    763         /* paranoid */ verify( !new_owner || new_owner->next == 0p );
    764         __set_owner( this, new_owner );
     697        thread_desc * new_owner = pop_head( this->entry_queue );
     698        set_owner( this, new_owner );
    765699
    766700        return new_owner;
    767701}
    768702
    769 static inline bool is_accepted( $monitor * this, const __monitor_group_t & group ) {
     703static inline bool is_accepted( monitor_desc * this, const __monitor_group_t & group ) {
    770704        __acceptable_t * it = this->mask.data; // Optim
    771705        __lock_size_t count = this->mask.size;
     
    789723}
    790724
    791 static inline void init( __lock_size_t count, $monitor * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] ) {
     725static inline void init( __lock_size_t count, monitor_desc * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] ) {
    792726        for( __lock_size_t i = 0; i < count; i++) {
    793727                (criteria[i]){ monitors[i], waiter };
     
    797731}
    798732
    799 static inline void init_push( __lock_size_t count, $monitor * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] ) {
     733static inline void init_push( __lock_size_t count, monitor_desc * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] ) {
    800734        for( __lock_size_t i = 0; i < count; i++) {
    801735                (criteria[i]){ monitors[i], waiter };
     
    813747}
    814748
    815 static inline void lock_all( $monitor * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count ) {
     749static inline void lock_all( monitor_desc * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count ) {
    816750        for( __lock_size_t i = 0; i < count; i++ ) {
    817751                __spinlock_t * l = &source[i]->lock;
     
    827761}
    828762
    829 static inline void unlock_all( $monitor * locks [], __lock_size_t count ) {
     763static inline void unlock_all( monitor_desc * locks [], __lock_size_t count ) {
    830764        for( __lock_size_t i = 0; i < count; i++ ) {
    831765                unlock( locks[i]->lock );
     
    834768
    835769static inline void save(
    836         $monitor * ctx [],
     770        monitor_desc * ctx [],
    837771        __lock_size_t count,
    838772        __attribute((unused)) __spinlock_t * locks [],
     
    847781
    848782static inline void restore(
    849         $monitor * ctx [],
     783        monitor_desc * ctx [],
    850784        __lock_size_t count,
    851785        __spinlock_t * locks [],
     
    865799// 2 - Checks if all the monitors are ready to run
    866800//     if so return the thread to run
    867 static inline $thread * check_condition( __condition_criterion_t * target ) {
     801static inline thread_desc * check_condition( __condition_criterion_t * target ) {
    868802        __condition_node_t * node = target->owner;
    869803        unsigned short count = node->count;
     
    888822
    889823static inline void brand_condition( condition & this ) {
    890         $thread * thrd = TL_GET( this_thread );
     824        thread_desc * thrd = TL_GET( this_thread );
    891825        if( !this.monitors ) {
    892826                // __cfaabi_dbg_print_safe( "Branding\n" );
     
    894828                this.monitor_count = thrd->monitors.size;
    895829
    896                 this.monitors = ($monitor **)malloc( this.monitor_count * sizeof( *this.monitors ) );
     830                this.monitors = (monitor_desc **)malloc( this.monitor_count * sizeof( *this.monitors ) );
    897831                for( int i = 0; i < this.monitor_count; i++ ) {
    898832                        this.monitors[i] = thrd->monitors[i];
     
    901835}
    902836
    903 static inline [$thread *, int] search_entry_queue( const __waitfor_mask_t & mask, $monitor * monitors [], __lock_size_t count ) {
    904 
    905         __queue_t($thread) & entry_queue = monitors[0]->entry_queue;
     837static inline [thread_desc *, int] search_entry_queue( const __waitfor_mask_t & mask, monitor_desc * monitors [], __lock_size_t count ) {
     838
     839        __queue_t(thread_desc) & entry_queue = monitors[0]->entry_queue;
    906840
    907841        // For each thread in the entry-queue
    908         for(    $thread ** thrd_it = &entry_queue.head;
     842        for(    thread_desc ** thrd_it = &entry_queue.head;
    909843                *thrd_it;
    910844                thrd_it = &(*thrd_it)->link.next
     
    950884}
    951885
    952 static inline __lock_size_t aggregate( $monitor * storage [], const __waitfor_mask_t & mask ) {
     886static inline __lock_size_t aggregate( monitor_desc * storage [], const __waitfor_mask_t & mask ) {
    953887        __lock_size_t size = 0;
    954888        for( __lock_size_t i = 0; i < mask.size; i++ ) {
  • libcfa/src/concurrency/monitor.hfa

    r6a490b2 rb7d6a36  
    2323
    2424trait is_monitor(dtype T) {
    25         $monitor * get_monitor( T & );
     25        monitor_desc * get_monitor( T & );
    2626        void ^?{}( T & mutex );
    2727};
    2828
    29 static inline void ?{}($monitor & this) with( this ) {
     29static inline void ?{}(monitor_desc & this) with( this ) {
    3030        lock{};
    3131        entry_queue{};
     
    3939}
    4040
    41 static inline void ^?{}($monitor & ) {}
     41static inline void ^?{}(monitor_desc & ) {}
    4242
    4343struct monitor_guard_t {
    44         $monitor **     m;
     44        monitor_desc **         m;
    4545        __lock_size_t           count;
    4646        __monitor_group_t prev;
    4747};
    4848
    49 void ?{}( monitor_guard_t & this, $monitor ** m, __lock_size_t count, void (*func)() );
     49void ?{}( monitor_guard_t & this, monitor_desc ** m, __lock_size_t count, void (*func)() );
    5050void ^?{}( monitor_guard_t & this );
    5151
    5252struct monitor_dtor_guard_t {
    53         $monitor *    m;
     53        monitor_desc *    m;
    5454        __monitor_group_t prev;
    5555};
    5656
    57 void ?{}( monitor_dtor_guard_t & this, $monitor ** m, void (*func)() );
     57void ?{}( monitor_dtor_guard_t & this, monitor_desc ** m, void (*func)() );
    5858void ^?{}( monitor_dtor_guard_t & this );
    5959
     
    7272
    7373        // The monitor this criterion concerns
    74         $monitor * target;
     74        monitor_desc * target;
    7575
    7676        // The parent node to which this criterion belongs
     
    8787struct __condition_node_t {
    8888        // Thread that needs to be woken when all criteria are met
    89         $thread * waiting_thread;
     89        thread_desc * waiting_thread;
    9090
    9191        // Array of criteria (Criterions are contiguous in memory)
     
    106106}
    107107
    108 void ?{}(__condition_node_t & this, $thread * waiting_thread, __lock_size_t count, uintptr_t user_info );
     108void ?{}(__condition_node_t & this, thread_desc * waiting_thread, __lock_size_t count, uintptr_t user_info );
    109109void ?{}(__condition_criterion_t & this );
    110 void ?{}(__condition_criterion_t & this, $monitor * target, __condition_node_t * owner );
     110void ?{}(__condition_criterion_t & this, monitor_desc * target, __condition_node_t * owner );
    111111
    112112struct condition {
     
    115115
    116116        // Array of monitor pointers (Monitors are NOT contiguous in memory)
    117         $monitor ** monitors;
     117        monitor_desc ** monitors;
    118118
    119119        // Number of monitors in the array
     
    133133              bool signal      ( condition & this );
    134134              bool signal_block( condition & this );
    135 static inline bool is_empty    ( condition & this ) { return this.blocked.head == 1p; }
     135static inline bool is_empty    ( condition & this ) { return !this.blocked.head; }
    136136         uintptr_t front       ( condition & this );
    137137
  • libcfa/src/concurrency/mutex.cfa

    r6a490b2 rb7d6a36  
    4040        if( is_locked ) {
    4141                append( blocked_threads, kernelTLS.this_thread );
    42                 unlock( lock );
    43                 park( __cfaabi_dbg_ctx );
     42                BlockInternal( &lock );
    4443        }
    4544        else {
     
    6362        lock( this.lock __cfaabi_dbg_ctx2 );
    6463        this.is_locked = (this.blocked_threads != 0);
    65         unpark(
    66                 pop_head( this.blocked_threads ) __cfaabi_dbg_ctx2
     64        WakeThread(
     65                pop_head( this.blocked_threads )
    6766        );
    6867        unlock( this.lock );
     
    9594        else {
    9695                append( blocked_threads, kernelTLS.this_thread );
    97                 unlock( lock );
    98                 park( __cfaabi_dbg_ctx );
     96                BlockInternal( &lock );
    9997        }
    10098}
     
    120118        recursion_count--;
    121119        if( recursion_count == 0 ) {
    122                 $thread * thrd = pop_head( blocked_threads );
     120                thread_desc * thrd = pop_head( blocked_threads );
    123121                owner = thrd;
    124122                recursion_count = (thrd ? 1 : 0);
    125                 unpark( thrd __cfaabi_dbg_ctx2 );
     123                WakeThread( thrd );
    126124        }
    127125        unlock( lock );
     
    140138void notify_one(condition_variable & this) with(this) {
    141139        lock( lock __cfaabi_dbg_ctx2 );
    142         unpark(
    143                 pop_head( this.blocked_threads ) __cfaabi_dbg_ctx2
     140        WakeThread(
     141                pop_head( this.blocked_threads )
    144142        );
    145143        unlock( lock );
     
    149147        lock( lock __cfaabi_dbg_ctx2 );
    150148        while(this.blocked_threads) {
    151                 unpark(
    152                         pop_head( this.blocked_threads ) __cfaabi_dbg_ctx2
     149                WakeThread(
     150                        pop_head( this.blocked_threads )
    153151                );
    154152        }
     
    159157        lock( this.lock __cfaabi_dbg_ctx2 );
    160158        append( this.blocked_threads, kernelTLS.this_thread );
    161         unlock( this.lock );
    162         park( __cfaabi_dbg_ctx );
     159        BlockInternal( &this.lock );
    163160}
    164161
     
    167164        lock( this.lock __cfaabi_dbg_ctx2 );
    168165        append( this.blocked_threads, kernelTLS.this_thread );
    169         unlock(l);
    170         unlock(this.lock);
    171         park( __cfaabi_dbg_ctx );
     166        void __unlock(void) {
     167                unlock(l);
     168                unlock(this.lock);
     169        }
     170        BlockInternal( __unlock );
    172171        lock(l);
    173172}
  • libcfa/src/concurrency/mutex.hfa

    r6a490b2 rb7d6a36  
    3636
    3737        // List of blocked threads
    38         __queue_t(struct $thread) blocked_threads;
     38        __queue_t(struct thread_desc) blocked_threads;
    3939
    4040        // Locked flag
     
    5555
    5656        // List of blocked threads
    57         __queue_t(struct $thread) blocked_threads;
     57        __queue_t(struct thread_desc) blocked_threads;
    5858
    5959        // Current thread owning the lock
    60         struct $thread * owner;
     60        struct thread_desc * owner;
    6161
    6262        // Number of recursion level
     
    8383
    8484        // List of blocked threads
    85         __queue_t(struct $thread) blocked_threads;
     85        __queue_t(struct thread_desc) blocked_threads;
    8686};
    8787
  • libcfa/src/concurrency/preemption.cfa

    r6a490b2 rb7d6a36  
    3939// FwdDeclarations : timeout handlers
    4040static void preempt( processor   * this );
    41 static void timeout( $thread * this );
     41static void timeout( thread_desc * this );
    4242
    4343// FwdDeclarations : Signal handlers
    4444static void sigHandler_ctxSwitch( __CFA_SIGPARMS__ );
    45 static void sigHandler_alarm    ( __CFA_SIGPARMS__ );
    4645static void sigHandler_segv     ( __CFA_SIGPARMS__ );
    4746static void sigHandler_ill      ( __CFA_SIGPARMS__ );
     
    8483// Get next expired node
    8584static inline alarm_node_t * get_expired( alarm_list_t * alarms, Time currtime ) {
    86         if( ! & (*alarms)`first ) return 0p;                                            // If no alarms return null
    87         if( (*alarms)`first.alarm >= currtime ) return 0p;      // If alarms head not expired return null
     85        if( !alarms->head ) return 0p;                                          // If no alarms return null
     86        if( alarms->head->alarm >= currtime ) return 0p;        // If alarms head not expired return null
    8887        return pop(alarms);                                                                     // Otherwise just pop head
    8988}
     
    9897        while( node = get_expired( alarms, currtime ) ) {
    9998                // __cfaabi_dbg_print_buffer_decl( " KERNEL: preemption tick.\n" );
    100                 Duration period = node->period;
    101                 if( period == 0) {
    102                         node->set = false;                  // Node is one-shot, just mark it as not pending
    103                 }
    10499
    105100                // Check if this is a kernel
     
    112107
    113108                // Check if this is a periodic alarm
     109                Duration period = node->period;
    114110                if( period > 0 ) {
    115111                        // __cfaabi_dbg_print_buffer_local( " KERNEL: alarm period is %lu.\n", period.tv );
     
    117113                        insert( alarms, node );             // Reinsert the node for the next time it triggers
    118114                }
     115                else {
     116                        node->set = false;                  // Node is one-shot, just mark it as not pending
     117                }
    119118        }
    120119
    121120        // If there are still alarms pending, reset the timer
    122         if( & (*alarms)`first ) {
    123                 __cfadbg_print_buffer_decl(preemption, " KERNEL: @%ju(%ju) resetting alarm to %ju.\n", currtime.tv, __kernel_get_time().tv, (alarms->head->alarm - currtime).tv);
    124                 Duration delta = (*alarms)`first.alarm - currtime;
    125                 Duration capped = max(delta, 50`us);
     121        if( alarms->head ) {
     122                // __cfaabi_dbg_print_buffer_decl( " KERNEL: @%ju(%ju) resetting alarm to %ju.\n", currtime.tv, __kernel_get_time().tv, (alarms->head->alarm - currtime).tv);
     123                Duration delta = alarms->head->alarm - currtime;
     124                Duration caped = max(delta, 50`us);
    126125                // itimerval tim  = { caped };
    127126                // __cfaabi_dbg_print_buffer_local( "    Values are %lu, %lu, %lu %lu.\n", delta.tv, caped.tv, tim.it_value.tv_sec, tim.it_value.tv_usec);
    128127
    129                 __kernel_set_timer( capped );
     128                __kernel_set_timer( caped );
    130129        }
    131130}
     
    185184
    186185        // Enable interrupts by decrementing the counter
    187         // If counter reaches 0, execute any pending __cfactx_switch
     186        // If counter reaches 0, execute any pending CtxSwitch
    188187        void enable_interrupts( __cfaabi_dbg_ctx_param ) {
    189188                processor   * proc = kernelTLS.this_processor; // Cache the processor now since interrupts can start happening after the atomic store
     189                thread_desc * thrd = kernelTLS.this_thread;       // Cache the thread now since interrupts can start happening after the atomic store
    190190
    191191                with( kernelTLS.preemption_state ){
     
    209209                                if( proc->pending_preemption ) {
    210210                                        proc->pending_preemption = false;
    211                                         force_yield( __POLL_PREEMPTION );
     211                                        BlockInternal( thrd );
    212212                                }
    213213                        }
     
    219219
    220220        // Disable interrupts by incrementint the counter
    221         // Don't execute any pending __cfactx_switch even if counter reaches 0
     221        // Don't execute any pending CtxSwitch even if counter reaches 0
    222222        void enable_interrupts_noPoll() {
    223223                unsigned short prev = kernelTLS.preemption_state.disable_count;
     
    257257
    258258        if ( pthread_sigmask( SIG_BLOCK, &mask, 0p ) == -1 ) {
    259                 abort( "internal error, pthread_sigmask" );
     259            abort( "internal error, pthread_sigmask" );
    260260        }
    261261}
     
    268268
    269269// reserved for future use
    270 static void timeout( $thread * this ) {
    271         __unpark( this __cfaabi_dbg_ctx2 );
     270static void timeout( thread_desc * this ) {
     271        //TODO : implement waking threads
    272272}
    273273
    274274// KERNEL ONLY
    275 // Check if a __cfactx_switch signal handler shoud defer
     275// Check if a CtxSwitch signal handler shoud defer
    276276// If true  : preemption is safe
    277277// If false : preemption is unsafe and marked as pending
     
    303303
    304304        // Setup proper signal handlers
    305         __cfaabi_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO | SA_RESTART ); // __cfactx_switch handler
    306         __cfaabi_sigaction( SIGALRM, sigHandler_alarm    , SA_SIGINFO | SA_RESTART ); // debug handler
     305        __cfaabi_sigaction( SIGUSR1, sigHandler_ctxSwitch, SA_SIGINFO | SA_RESTART ); // CtxSwitch handler
    307306
    308307        signal_block( SIGALRM );
    309308
    310         alarm_stack = __create_pthread( &alarm_thread, alarm_loop, 0p );
     309        alarm_stack = create_pthread( &alarm_thread, alarm_loop, 0p );
    311310}
    312311
     
    395394        // Preemption can occur here
    396395
    397         force_yield( __ALARM_PREEMPTION ); // Do the actual __cfactx_switch
    398 }
    399 
    400 static void sigHandler_alarm( __CFA_SIGPARMS__ ) {
    401         abort("SIGALRM should never reach the signal handler");
     396        BlockInternal( kernelTLS.this_thread ); // Do the actual CtxSwitch
    402397}
    403398
  • libcfa/src/concurrency/thread.cfa

    r6a490b2 rb7d6a36  
    2323#include "invoke.h"
    2424
     25extern "C" {
     26        #include <fenv.h>
     27        #include <stddef.h>
     28}
     29
     30//extern volatile thread_local processor * this_processor;
     31
    2532//-----------------------------------------------------------------------------
    2633// Thread ctors and dtors
    27 void ?{}($thread & this, const char * const name, cluster & cl, void * storage, size_t storageSize ) with( this ) {
     34void ?{}(thread_desc & this, const char * const name, cluster & cl, void * storage, size_t storageSize ) with( this ) {
    2835        context{ 0p, 0p };
    2936        self_cor{ name, storage, storageSize };
    3037        state = Start;
    31         preempted = __NO_PREEMPTION;
    3238        curr_cor = &self_cor;
    3339        self_mon.owner = &this;
     
    4551}
    4652
    47 void ^?{}($thread& this) with( this ) {
     53void ^?{}(thread_desc& this) with( this ) {
    4854        unregister(curr_cluster, this);
    4955        ^self_cor{};
    5056}
    5157
    52 //-----------------------------------------------------------------------------
    53 // Starting and stopping threads
    54 forall( dtype T | is_thread(T) )
    55 void __thrd_start( T & this, void (*main_p)(T &) ) {
    56         $thread * this_thrd = get_thread(this);
    57 
    58         disable_interrupts();
    59         __cfactx_start(main_p, get_coroutine(this), this, __cfactx_invoke_thread);
    60 
    61         this_thrd->context.[SP, FP] = this_thrd->self_cor.context.[SP, FP];
    62         verify( this_thrd->context.SP );
    63 
    64         __schedule_thread(this_thrd);
    65         enable_interrupts( __cfaabi_dbg_ctx );
    66 }
    67 
    68 //-----------------------------------------------------------------------------
    69 // Support for threads that don't ues the thread keyword
    7058forall( dtype T | sized(T) | is_thread(T) | { void ?{}(T&); } )
    7159void ?{}( scoped(T)& this ) with( this ) {
     
    8573}
    8674
     75//-----------------------------------------------------------------------------
     76// Starting and stopping threads
     77forall( dtype T | is_thread(T) )
     78void __thrd_start( T & this, void (*main_p)(T &) ) {
     79        thread_desc * this_thrd = get_thread(this);
     80
     81        disable_interrupts();
     82        CtxStart(main_p, get_coroutine(this), this, CtxInvokeThread);
     83
     84        this_thrd->context.[SP, FP] = this_thrd->self_cor.context.[SP, FP];
     85        verify( this_thrd->context.SP );
     86
     87        ScheduleThread(this_thrd);
     88        enable_interrupts( __cfaabi_dbg_ctx );
     89}
     90
     91void yield( void ) {
     92        // Safety note : This could cause some false positives due to preemption
     93      verify( TL_GET( preemption_state.enabled ) );
     94        BlockInternal( TL_GET( this_thread ) );
     95        // Safety note : This could cause some false positives due to preemption
     96      verify( TL_GET( preemption_state.enabled ) );
     97}
     98
     99void yield( unsigned times ) {
     100        for( unsigned i = 0; i < times; i++ ) {
     101                yield();
     102        }
     103}
     104
    87105// Local Variables: //
    88106// mode: c //
  • libcfa/src/concurrency/thread.hfa

    r6a490b2 rb7d6a36  
    2828      void ^?{}(T& mutex this);
    2929      void main(T& this);
    30       $thread* get_thread(T& this);
     30      thread_desc* get_thread(T& this);
    3131};
    3232
    33 // define that satisfies the trait without using the thread keyword
    34 #define DECL_THREAD(X) $thread* get_thread(X& this) __attribute__((const)) { return &this.__thrd; } void main(X& this)
    35 
    36 // Inline getters for threads/coroutines/monitors
    37 forall( dtype T | is_thread(T) )
    38 static inline $coroutine* get_coroutine(T & this) __attribute__((const)) { return &get_thread(this)->self_cor; }
     33#define DECL_THREAD(X) thread_desc* get_thread(X& this) { return &this.__thrd; } void main(X& this)
    3934
    4035forall( dtype T | is_thread(T) )
    41 static inline $monitor  * get_monitor  (T & this) __attribute__((const)) { return &get_thread(this)->self_mon; }
     36static inline coroutine_desc* get_coroutine(T & this) {
     37        return &get_thread(this)->self_cor;
     38}
    4239
    43 static inline $coroutine* get_coroutine($thread * this) __attribute__((const)) { return &this->self_cor; }
    44 static inline $monitor  * get_monitor  ($thread * this) __attribute__((const)) { return &this->self_mon; }
     40forall( dtype T | is_thread(T) )
     41static inline monitor_desc* get_monitor(T & this) {
     42        return &get_thread(this)->self_mon;
     43}
    4544
    46 //-----------------------------------------------------------------------------
    47 // forward declarations needed for threads
     45static inline coroutine_desc* get_coroutine(thread_desc * this) {
     46        return &this->self_cor;
     47}
     48
     49static inline monitor_desc* get_monitor(thread_desc * this) {
     50        return &this->self_mon;
     51}
     52
    4853extern struct cluster * mainCluster;
    4954
     
    5358//-----------------------------------------------------------------------------
    5459// Ctors and dtors
    55 void ?{}($thread & this, const char * const name, struct cluster & cl, void * storage, size_t storageSize );
    56 void ^?{}($thread & this);
     60void ?{}(thread_desc & this, const char * const name, struct cluster & cl, void * storage, size_t storageSize );
     61void ^?{}(thread_desc & this);
    5762
    58 static inline void ?{}($thread & this)                                                                  { this{ "Anonymous Thread", *mainCluster, 0p, 65000 }; }
    59 static inline void ?{}($thread & this, size_t stackSize )                                               { this{ "Anonymous Thread", *mainCluster, 0p, stackSize }; }
    60 static inline void ?{}($thread & this, void * storage, size_t storageSize )                             { this{ "Anonymous Thread", *mainCluster, storage, storageSize }; }
    61 static inline void ?{}($thread & this, struct cluster & cl )                                            { this{ "Anonymous Thread", cl, 0p, 65000 }; }
    62 static inline void ?{}($thread & this, struct cluster & cl, size_t stackSize )                          { this{ "Anonymous Thread", cl, 0p, stackSize }; }
    63 static inline void ?{}($thread & this, struct cluster & cl, void * storage, size_t storageSize )        { this{ "Anonymous Thread", cl, storage, storageSize }; }
    64 static inline void ?{}($thread & this, const char * const name)                                         { this{ name, *mainCluster, 0p, 65000 }; }
    65 static inline void ?{}($thread & this, const char * const name, struct cluster & cl )                   { this{ name, cl, 0p, 65000 }; }
    66 static inline void ?{}($thread & this, const char * const name, struct cluster & cl, size_t stackSize ) { this{ name, cl, 0p, stackSize }; }
     63static inline void ?{}(thread_desc & this)                                                                  { this{ "Anonymous Thread", *mainCluster, 0p, 65000 }; }
     64static inline void ?{}(thread_desc & this, size_t stackSize )                                               { this{ "Anonymous Thread", *mainCluster, 0p, stackSize }; }
     65static inline void ?{}(thread_desc & this, void * storage, size_t storageSize )                             { this{ "Anonymous Thread", *mainCluster, storage, storageSize }; }
     66static inline void ?{}(thread_desc & this, struct cluster & cl )                                            { this{ "Anonymous Thread", cl, 0p, 65000 }; }
     67static inline void ?{}(thread_desc & this, struct cluster & cl, size_t stackSize )                          { this{ "Anonymous Thread", cl, 0p, stackSize }; }
     68static inline void ?{}(thread_desc & this, struct cluster & cl, void * storage, size_t storageSize )        { this{ "Anonymous Thread", cl, storage, storageSize }; }
     69static inline void ?{}(thread_desc & this, const char * const name)                                         { this{ name, *mainCluster, 0p, 65000 }; }
     70static inline void ?{}(thread_desc & this, const char * const name, struct cluster & cl )                   { this{ name, cl, 0p, 65000 }; }
     71static inline void ?{}(thread_desc & this, const char * const name, struct cluster & cl, size_t stackSize ) { this{ name, cl, 0p, stackSize }; }
    6772
    6873//-----------------------------------------------------------------------------
     
    8388void ^?{}( scoped(T)& this );
    8489
    85 //-----------------------------------------------------------------------------
    86 // Thread getters
    87 static inline struct $thread * active_thread () { return TL_GET( this_thread ); }
     90void yield();
     91void yield( unsigned times );
    8892
    89 //-----------------------------------------------------------------------------
    90 // Scheduler API
    91 
    92 //----------
    93 // Park thread: block until corresponding call to unpark, won't block if unpark is already called
    94 void park( __cfaabi_dbg_ctx_param );
    95 
    96 //----------
    97 // Unpark a thread, if the thread is already blocked, schedule it
    98 //                  if the thread is not yet block, signal that it should rerun immediately
    99 void unpark( $thread * this __cfaabi_dbg_ctx_param2 );
    100 
    101 forall( dtype T | is_thread(T) )
    102 static inline void unpark( T & this __cfaabi_dbg_ctx_param2 ) { if(!&this) return; unpark( get_thread( this ) __cfaabi_dbg_ctx_fwd2 );}
    103 
    104 //----------
    105 // Yield: force thread to block and be rescheduled
    106 bool force_yield( enum __Preemption_Reason );
    107 
    108 static inline void yield() {
    109         force_yield(__MANUAL_PREEMPTION);
    110 }
    111 
    112 // Yield: yield N times
    113 static inline void yield( unsigned times ) {
    114         for( times ) {
    115                 yield();
    116         }
    117 }
    118 
    119 //----------
    120 // sleep: force thread to block and be rescheduled after Duration duration
    121 void sleep( Duration duration );
     93static inline struct thread_desc * active_thread () { return TL_GET( this_thread ); }
    12294
    12395// Local Variables: //
  • libcfa/src/exception.c

    r6a490b2 rb7d6a36  
    99// Author           : Andrew Beach
    1010// Created On       : Mon Jun 26 15:13:00 2017
    11 // Last Modified By : Andrew Beach
    12 // Last Modified On : Tue Apr 14 12:01:00 2020
    13 // Update Count     : 18
     11// Last Modified By : Peter A. Buhr
     12// Last Modified On : Thu Feb 22 18:17:34 2018
     13// Update Count     : 11
    1414//
    1515
    16 // Normally we would get this from the CFA prelude.
    1716#include <stddef.h> // for size_t
    1817
    1918#include "exception.h"
    2019
    21 // Implementation of the secret header is hardware dependent.
    22 #if !( defined( __x86_64 ) || defined( __i386 ) )
    23 #error Exception Handling: No known architecture detected.
    24 #endif
     20// Implementation of the secret header.
    2521
    2622#include <stdlib.h>
     
    2824#include <unwind.h>
    2925#include <bits/debug.hfa>
    30 #include "stdhdr/assert.h"
    3126
    3227// FIX ME: temporary hack to keep ARM build working
    3328#ifndef _URC_FATAL_PHASE1_ERROR
    34 #define _URC_FATAL_PHASE1_ERROR 3
     29#define _URC_FATAL_PHASE1_ERROR 2
    3530#endif // ! _URC_FATAL_PHASE1_ERROR
    3631#ifndef _URC_FATAL_PHASE2_ERROR
     
    4035#include "lsda.h"
    4136
    42 /* The exception class for our exceptions. Because of the vendor component
    43  * its value would not be standard.
    44  * Vendor: UWPL
    45  * Language: CFA\0
    46  */
    47 const _Unwind_Exception_Class __cfaehm_exception_class = 0x4c50575500414643;
    4837
    4938// Base exception vtable is abstract, you should not have base exceptions.
    50 struct __cfaehm_base_exception_t_vtable
    51                 ___cfaehm_base_exception_t_vtable_instance = {
     39struct __cfaabi_ehm__base_exception_t_vtable
     40                ___cfaabi_ehm__base_exception_t_vtable_instance = {
    5241        .parent = NULL,
    5342        .size = 0,
     
    6049// Temperary global exception context. Does not work with concurency.
    6150struct exception_context_t {
    62         struct __cfaehm_try_resume_node * top_resume;
    63 
    64         exception_t * current_exception;
    65         int current_handler_index;
    66 } static shared_stack = {NULL, NULL, 0};
     51    struct __cfaabi_ehm__try_resume_node * top_resume;
     52    struct __cfaabi_ehm__try_resume_node * current_resume;
     53
     54    exception_t * current_exception;
     55    int current_handler_index;
     56} shared_stack = {NULL, NULL, 0, 0};
    6757
    6858// Get the current exception context.
     
    7262        return &shared_stack;
    7363}
     64//#define SAVE_EXCEPTION_CONTEXT(to_name)
     65//struct exception_context_t * to_name = this_exception_context();
     66//exception * this_exception() {
     67//    return this_exception_context()->current_exception;
     68//}
     69
     70
     71// This macro should be the only thing that needs to change across machines.
     72// Used in the personality function, way down in termination.
     73// struct _Unwind_Context * -> _Unwind_Reason_Code(*)(exception_t *)
     74#define MATCHER_FROM_CONTEXT(ptr_to_context) \
     75        (*(_Unwind_Reason_Code(**)(exception_t *))(_Unwind_GetCFA(ptr_to_context) + 8))
    7476
    7577
    7678// RESUMPTION ================================================================
    7779
    78 static void reset_top_resume(struct __cfaehm_try_resume_node ** store) {
    79         this_exception_context()->top_resume = *store;
    80 }
    81 
    82 void __cfaehm_throw_resume(exception_t * except) {
    83         struct exception_context_t * context = this_exception_context();
    84 
    85         __cfadbg_print_safe(exception, "Throwing resumption exception\n");
    86 
    87         __attribute__((cleanup(reset_top_resume)))
    88         struct __cfaehm_try_resume_node * original_head = context->top_resume;
    89         struct __cfaehm_try_resume_node * current = context->top_resume;
     80void __cfaabi_ehm__throw_resume(exception_t * except) {
     81
     82        __cfaabi_dbg_print_safe("Throwing resumption exception\n");
     83
     84        struct __cfaabi_ehm__try_resume_node * original_head = shared_stack.current_resume;
     85        struct __cfaabi_ehm__try_resume_node * current =
     86                (original_head) ? original_head->next : shared_stack.top_resume;
    9087
    9188        for ( ; current ; current = current->next) {
    92                 context->top_resume = current->next;
     89                shared_stack.current_resume = current;
    9390                if (current->handler(except)) {
     91                        shared_stack.current_resume = original_head;
    9492                        return;
    9593                }
    9694        }
    9795
    98         __cfadbg_print_safe(exception, "Unhandled exception\n");
     96        __cfaabi_dbg_print_safe("Unhandled exception\n");
     97        shared_stack.current_resume = original_head;
    9998
    10099        // Fall back to termination:
    101         __cfaehm_throw_terminate(except);
     100        __cfaabi_ehm__throw_terminate(except);
    102101        // TODO: Default handler for resumption.
    103102}
     
    107106// be added after the node is built but before it is made the top node.
    108107
    109 void __cfaehm_try_resume_setup(struct __cfaehm_try_resume_node * node,
     108void __cfaabi_ehm__try_resume_setup(struct __cfaabi_ehm__try_resume_node * node,
    110109                        _Bool (*handler)(exception_t * except)) {
    111         struct exception_context_t * context = this_exception_context();
    112         node->next = context->top_resume;
     110        node->next = shared_stack.top_resume;
    113111        node->handler = handler;
    114         context->top_resume = node;
    115 }
    116 
    117 void __cfaehm_try_resume_cleanup(struct __cfaehm_try_resume_node * node) {
    118         struct exception_context_t * context = this_exception_context();
    119         context->top_resume = node->next;
     112        shared_stack.top_resume = node;
     113}
     114
     115void __cfaabi_ehm__try_resume_cleanup(struct __cfaabi_ehm__try_resume_node * node) {
     116        shared_stack.top_resume = node->next;
    120117}
    121118
     
    126123// May have to move to cfa for constructors and destructors (references).
    127124
    128 // How to clean up an exception in various situations.
    129 static void __cfaehm_exception_cleanup(
    130                 _Unwind_Reason_Code reason,
    131                 struct _Unwind_Exception * exception) {
    132         switch (reason) {
    133         case _URC_FOREIGN_EXCEPTION_CAUGHT:
    134                 // This one we could clean-up to allow cross-language exceptions.
    135         case _URC_FATAL_PHASE1_ERROR:
    136         case _URC_FATAL_PHASE2_ERROR:
    137         default:
    138                 abort();
    139         }
    140 }
    141 
    142 // We need a piece of storage to raise the exception, for now its a single
    143 // piece.
    144 static struct _Unwind_Exception this_exception_storage;
    145 
    146 struct __cfaehm_node {
    147         struct __cfaehm_node * next;
     125struct __cfaabi_ehm__node {
     126        struct __cfaabi_ehm__node * next;
    148127};
    149128
    150129#define NODE_TO_EXCEPT(node) ((exception_t *)(1 + (node)))
    151 #define EXCEPT_TO_NODE(except) ((struct __cfaehm_node *)(except) - 1)
     130#define EXCEPT_TO_NODE(except) ((struct __cfaabi_ehm__node *)(except) - 1)
    152131
    153132// Creates a copy of the indicated exception and sets current_exception to it.
    154 static void __cfaehm_allocate_exception( exception_t * except ) {
     133static void __cfaabi_ehm__allocate_exception( exception_t * except ) {
    155134        struct exception_context_t * context = this_exception_context();
    156135
    157136        // Allocate memory for the exception.
    158         struct __cfaehm_node * store = malloc(
    159                 sizeof( struct __cfaehm_node ) + except->virtual_table->size );
     137        struct __cfaabi_ehm__node * store = malloc(
     138                sizeof( struct __cfaabi_ehm__node ) + except->virtual_table->size );
    160139
    161140        if ( ! store ) {
     
    170149        // Copy the exception to storage.
    171150        except->virtual_table->copy( context->current_exception, except );
    172 
    173         // Set up the exception storage.
    174         this_exception_storage.exception_class = __cfaehm_exception_class;
    175         this_exception_storage.exception_cleanup = __cfaehm_exception_cleanup;
    176151}
    177152
    178153// Delete the provided exception, unsetting current_exception if relivant.
    179 static void __cfaehm_delete_exception( exception_t * except ) {
     154static void __cfaabi_ehm__delete_exception( exception_t * except ) {
    180155        struct exception_context_t * context = this_exception_context();
    181156
    182         __cfadbg_print_safe(exception, "Deleting Exception\n");
     157        __cfaabi_dbg_print_safe("Deleting Exception\n");
    183158
    184159        // Remove the exception from the list.
    185         struct __cfaehm_node * to_free = EXCEPT_TO_NODE(except);
    186         struct __cfaehm_node * node;
     160        struct __cfaabi_ehm__node * to_free = EXCEPT_TO_NODE(except);
     161        struct __cfaabi_ehm__node * node;
    187162
    188163        if ( context->current_exception == except ) {
     
    192167                node = EXCEPT_TO_NODE(context->current_exception);
    193168                // It may always be in the first or second position.
    194                 while ( to_free != node->next ) {
     169                while( to_free != node->next ) {
    195170                        node = node->next;
    196171                }
     
    204179
    205180// If this isn't a rethrow (*except==0), delete the provided exception.
    206 void __cfaehm_cleanup_terminate( void * except ) {
    207         if ( *(void**)except ) __cfaehm_delete_exception( *(exception_t **)except );
    208 }
     181void __cfaabi_ehm__cleanup_terminate( void * except ) {
     182        if ( *(void**)except ) __cfaabi_ehm__delete_exception( *(exception_t **)except );
     183}
     184
     185
     186// We need a piece of storage to raise the exception
     187struct _Unwind_Exception this_exception_storage;
    209188
    210189// Function needed by force unwind
     
    213192                int version,
    214193                _Unwind_Action actions,
    215                 _Unwind_Exception_Class exception_class,
     194                _Unwind_Exception_Class exceptionClass,
    216195                struct _Unwind_Exception * unwind_exception,
    217                 struct _Unwind_Context * unwind_context,
    218                 void * stop_param) {
    219         // Verify actions follow the rules we expect.
    220         verify((actions & _UA_CLEANUP_PHASE) && (actions & _UA_FORCE_UNWIND));
    221         verify(!(actions & (_UA_SEARCH_PHASE | _UA_HANDER_FRAME)));
    222 
    223         if ( actions & _UA_END_OF_STACK ) {
    224                 exit(1);
    225         } else {
    226                 return _URC_NO_REASON;
    227         }
     196                struct _Unwind_Context * context,
     197                void * some_param) {
     198        if( actions & _UA_END_OF_STACK  ) exit(1);
     199        if( actions & _UA_CLEANUP_PHASE ) return _URC_NO_REASON;
     200
     201        return _URC_FATAL_PHASE2_ERROR;
    228202}
    229203
    230204// The exception that is being thrown must already be stored.
    231 static __attribute__((noreturn)) void __cfaehm_begin_unwind(void) {
     205__attribute__((noreturn)) void __cfaabi_ehm__begin_unwind(void) {
    232206        if ( ! this_exception_context()->current_exception ) {
    233207                printf("UNWIND ERROR missing exception in begin unwind\n");
    234208                abort();
    235209        }
     210
    236211
    237212        // Call stdlibc to raise the exception
     
    245220        // the whole stack.
    246221
    247         if ( ret == _URC_END_OF_STACK ) {
     222        if( ret == _URC_END_OF_STACK ) {
    248223                // No proper handler was found. This can be handled in many ways, C++ calls std::terminate.
    249224                // Here we force unwind the stack, basically raising a cancellation.
     
    260235}
    261236
    262 void __cfaehm_throw_terminate( exception_t * val ) {
    263         __cfadbg_print_safe(exception, "Throwing termination exception\n");
    264 
    265         __cfaehm_allocate_exception( val );
    266         __cfaehm_begin_unwind();
    267 }
    268 
    269 void __cfaehm_rethrow_terminate(void) {
    270         __cfadbg_print_safe(exception, "Rethrowing termination exception\n");
    271 
    272         __cfaehm_begin_unwind();
    273 }
     237void __cfaabi_ehm__throw_terminate( exception_t * val ) {
     238        __cfaabi_dbg_print_safe("Throwing termination exception\n");
     239
     240        __cfaabi_ehm__allocate_exception( val );
     241        __cfaabi_ehm__begin_unwind();
     242}
     243
     244void __cfaabi_ehm__rethrow_terminate(void) {
     245        __cfaabi_dbg_print_safe("Rethrowing termination exception\n");
     246
     247        __cfaabi_ehm__begin_unwind();
     248}
     249
     250#pragma GCC push_options
     251#pragma GCC optimize("O0")
    274252
    275253// This is our personality routine. For every stack frame annotated with
    276254// ".cfi_personality 0x3,__gcfa_personality_v0" this function will be called twice when unwinding.
    277255//  Once in the search phase and once in the cleanup phase.
    278 _Unwind_Reason_Code __gcfa_personality_v0(
    279                 int version,
    280                 _Unwind_Action actions,
    281                 unsigned long long exception_class,
    282                 struct _Unwind_Exception * unwind_exception,
    283                 struct _Unwind_Context * unwind_context)
     256_Unwind_Reason_Code __gcfa_personality_v0 (
     257                int version, _Unwind_Action actions, unsigned long long exceptionClass,
     258                struct _Unwind_Exception* unwind_exception,
     259                struct _Unwind_Context* context)
    284260{
    285261
    286         //__cfadbg_print_safe(exception, "CFA: 0x%lx\n", _Unwind_GetCFA(context));
    287         __cfadbg_print_safe(exception, "Personality function (%d, %x, %llu, %p, %p):",
    288                         version, actions, exception_class, unwind_exception, unwind_context);
    289 
    290         // Verify that actions follow the rules we expect.
    291         // This function should never be called at the end of the stack.
    292         verify(!(actions & _UA_END_OF_STACK));
    293         // Either only the search phase flag is set or...
     262        //__cfaabi_dbg_print_safe("CFA: 0x%lx\n", _Unwind_GetCFA(context));
     263        __cfaabi_dbg_print_safe("Personality function (%d, %x, %llu, %p, %p):",
     264                        version, actions, exceptionClass, unwind_exception, context);
     265
     266        // If we've reached the end of the stack then there is nothing much we can do...
     267        if( actions & _UA_END_OF_STACK ) return _URC_END_OF_STACK;
     268
    294269        if (actions & _UA_SEARCH_PHASE) {
    295                 verify(actions == _UA_SEARCH_PHASE);
    296                 __cfadbg_print_safe(exception, " lookup phase");
    297         // ... we are in clean-up phase.
    298         } else {
    299                 verify(actions & _UA_CLEANUP_PHASE);
    300                 __cfadbg_print_safe(exception, " cleanup phase");
    301                 // We shouldn't be the handler frame during forced unwind.
    302                 if (actions & _UA_HANDLER_FRAME) {
    303                         verify(!(actions & _UA_FORCE_UNWIND));
    304                         __cfadbg_print_safe(exception, " (handler frame)");
    305                 } else if (actions & _UA_FORCE_UNWIND) {
    306                         __cfadbg_print_safe(exception, " (force unwind)");
    307                 }
     270                __cfaabi_dbg_print_safe(" lookup phase");
     271        }
     272        else if (actions & _UA_CLEANUP_PHASE) {
     273                __cfaabi_dbg_print_safe(" cleanup phase");
     274        }
     275        // Just in case, probably can't actually happen
     276        else {
     277                printf(" error\n");
     278                return _URC_FATAL_PHASE1_ERROR;
    308279        }
    309280
    310281        // Get a pointer to the language specific data from which we will read what we need
    311         const unsigned char * lsd = _Unwind_GetLanguageSpecificData( unwind_context );
    312 
    313         if ( !lsd ) {   //Nothing to do, keep unwinding
     282        const unsigned char * lsd = (const unsigned char*) _Unwind_GetLanguageSpecificData( context );
     283
     284        if( !lsd ) {    //Nothing to do, keep unwinding
    314285                printf(" no LSD");
    315286                goto UNWIND;
     
    318289        // Get the instuction pointer and a reading pointer into the exception table
    319290        lsda_header_info lsd_info;
    320         const unsigned char * cur_ptr = parse_lsda_header(unwind_context, lsd, &lsd_info);
    321         _Unwind_Ptr instruction_ptr = _Unwind_GetIP(unwind_context);
    322 
    323         struct exception_context_t * context = this_exception_context();
     291        const unsigned char * cur_ptr = parse_lsda_header(context, lsd, &lsd_info);
     292        _Unwind_Ptr instruction_ptr = _Unwind_GetIP( context );
    324293
    325294        // Linearly search the table for stuff to do
    326         while ( cur_ptr < lsd_info.action_table ) {
     295        while( cur_ptr < lsd_info.action_table ) {
    327296                _Unwind_Ptr callsite_start;
    328297                _Unwind_Ptr callsite_len;
     
    337306
    338307                // Have we reach the correct frame info yet?
    339                 if ( lsd_info.Start + callsite_start + callsite_len < instruction_ptr ) {
     308                if( lsd_info.Start + callsite_start + callsite_len < instruction_ptr ) {
    340309#ifdef __CFA_DEBUG_PRINT__
    341310                        void * ls = (void*)lsd_info.Start;
     
    345314                        void * ep = (void*)lsd_info.Start + callsite_start + callsite_len;
    346315                        void * ip = (void*)instruction_ptr;
    347                         __cfadbg_print_safe(exception, "\nfound %p - %p (%p, %p, %p), looking for %p\n",
     316                        __cfaabi_dbg_print_safe("\nfound %p - %p (%p, %p, %p), looking for %p\n",
    348317                                        bp, ep, ls, cs, cl, ip);
    349318#endif // __CFA_DEBUG_PRINT__
     
    352321
    353322                // Have we gone too far?
    354                 if ( lsd_info.Start + callsite_start > instruction_ptr ) {
     323                if( lsd_info.Start + callsite_start > instruction_ptr ) {
    355324                        printf(" gone too far");
    356325                        break;
    357326                }
    358327
    359                 // Check for what we must do:
    360                 if ( 0 == callsite_landing_pad ) {
    361                         // Nothing to do, move along
    362                         __cfadbg_print_safe(exception, " no landing pad");
    363                 } else if (actions & _UA_SEARCH_PHASE) {
    364                         // In search phase, these means we found a potential handler we must check.
    365 
    366                         // We have arbitrarily decided that 0 means nothing to do and 1 means there is
    367                         // a potential handler. This doesn't seem to conflict the gcc default behavior.
    368                         if (callsite_action != 0) {
    369                                 // Now we want to run some code to see if the handler matches
    370                                 // This is the tricky part where we want to the power to run arbitrary code
    371                                 // However, generating a new exception table entry and try routine every time
    372                                 // is way more expansive than we might like
    373                                 // The information we have is :
    374                                 //  - The GR (Series of registers)
    375                                 //    GR1=GP Global Pointer of frame ref by context
    376                                 //  - The instruction pointer
    377                                 //  - The instruction pointer info (???)
    378                                 //  - The CFA (Canonical Frame Address)
    379                                 //  - The BSP (Probably the base stack pointer)
    380 
    381                                 // The current apprach uses one exception table entry per try block
    382                                 _uleb128_t imatcher;
    383                                 // Get the relative offset to the {...}?
    384                                 cur_ptr = read_uleb128(cur_ptr, &imatcher);
    385 
    386 #                               if defined( __x86_64 )
    387                                 _Unwind_Word match_pos = _Unwind_GetCFA(unwind_context) + 8;
    388 #                               elif defined( __i386 )
    389                                 _Unwind_Word match_pos = _Unwind_GetCFA(unwind_context) + 24;
    390 #                               endif
    391                                 int (*matcher)(exception_t *) = *(int(**)(exception_t *))match_pos;
    392 
    393                                 int index = matcher(context->current_exception);
    394                                 _Unwind_Reason_Code ret = (0 == index)
    395                                         ? _URC_CONTINUE_UNWIND : _URC_HANDLER_FOUND;
    396                                 context->current_handler_index = index;
    397 
    398                                 // Based on the return value, check if we matched the exception
    399                                 if (ret == _URC_HANDLER_FOUND) {
    400                                         __cfadbg_print_safe(exception, " handler found\n");
    401                                 } else {
    402                                         __cfadbg_print_safe(exception, " no handler\n");
     328                // Something to do?
     329                if( callsite_landing_pad ) {
     330                        // Which phase are we in
     331                        if (actions & _UA_SEARCH_PHASE) {
     332                                // In search phase, these means we found a potential handler we must check.
     333
     334                                // We have arbitrarily decided that 0 means nothing to do and 1 means there is
     335                                // a potential handler. This doesn't seem to conflict the gcc default behavior.
     336                                if (callsite_action != 0) {
     337                                        // Now we want to run some code to see if the handler matches
     338                                        // This is the tricky part where we want to the power to run arbitrary code
     339                                        // However, generating a new exception table entry and try routine every time
     340                                        // is way more expansive than we might like
     341                                        // The information we have is :
     342                                        //  - The GR (Series of registers)
     343                                        //    GR1=GP Global Pointer of frame ref by context
     344                                        //  - The instruction pointer
     345                                        //  - The instruction pointer info (???)
     346                                        //  - The CFA (Canonical Frame Address)
     347                                        //  - The BSP (Probably the base stack pointer)
     348
     349
     350                                        // The current apprach uses one exception table entry per try block
     351                                        _uleb128_t imatcher;
     352                                        // Get the relative offset to the {...}?
     353                                        cur_ptr = read_uleb128(cur_ptr, &imatcher);
     354
     355                                        _Unwind_Reason_Code (*matcher)(exception_t *) =
     356                                                MATCHER_FROM_CONTEXT(context);
     357                                        int index = matcher(shared_stack.current_exception);
     358                                        _Unwind_Reason_Code ret = (0 == index)
     359                                                ? _URC_CONTINUE_UNWIND : _URC_HANDLER_FOUND;
     360                                        shared_stack.current_handler_index = index;
     361
     362                                        // Based on the return value, check if we matched the exception
     363                                        if( ret == _URC_HANDLER_FOUND) {
     364                                                __cfaabi_dbg_print_safe(" handler found\n");
     365                                        } else {
     366                                                __cfaabi_dbg_print_safe(" no handler\n");
     367                                        }
     368                                        return ret;
    403369                                }
    404                                 return ret;
     370
     371                                // This is only a cleanup handler, ignore it
     372                                __cfaabi_dbg_print_safe(" no action");
    405373                        }
    406 
    407                         // This is only a cleanup handler, ignore it
    408                         __cfadbg_print_safe(exception, " no action");
    409                 } else {
    410                         // In clean-up phase, no destructors here but this could be the handler.
    411 
    412                         if ( (callsite_action != 0) && !(actions & _UA_HANDLER_FRAME) ){
    413                                 // If this is a potential exception handler
    414                                 // but not the one that matched the exception in the seach phase,
    415                                 // just ignore it
    416                                 goto UNWIND;
     374                        else if (actions & _UA_CLEANUP_PHASE) {
     375
     376                                if( (callsite_action != 0) && !(actions & _UA_HANDLER_FRAME) ){
     377                                        // If this is a potential exception handler
     378                                        // but not the one that matched the exception in the seach phase,
     379                                        // just ignore it
     380                                        goto UNWIND;
     381                                }
     382
     383                                // We need to run some clean-up or a handler
     384                                // These statment do the right thing but I don't know any specifics at all
     385                                _Unwind_SetGR( context, __builtin_eh_return_data_regno(0), (_Unwind_Ptr) unwind_exception );
     386                                _Unwind_SetGR( context, __builtin_eh_return_data_regno(1), 0 );
     387
     388                                // I assume this sets the instruction pointer to the adress of the landing pad
     389                                // It doesn't actually set it, it only state the value that needs to be set once we return _URC_INSTALL_CONTEXT
     390                                _Unwind_SetIP( context, ((lsd_info.LPStart) + (callsite_landing_pad)) );
     391
     392                                __cfaabi_dbg_print_safe(" action\n");
     393
     394                                // Return have some action to run
     395                                return _URC_INSTALL_CONTEXT;
    417396                        }
    418 
    419                         // We need to run some clean-up or a handler
    420                         // These statment do the right thing but I don't know any specifics at all
    421                         _Unwind_SetGR( unwind_context, __builtin_eh_return_data_regno(0),
    422                                 (_Unwind_Ptr)unwind_exception );
    423                         _Unwind_SetGR( unwind_context, __builtin_eh_return_data_regno(1), 0 );
    424 
    425                         // I assume this sets the instruction pointer to the adress of the landing pad
    426                         // It doesn't actually set it, it only state the value that needs to be set once we
    427                         // return _URC_INSTALL_CONTEXT
    428                         _Unwind_SetIP( unwind_context, ((lsd_info.LPStart) + (callsite_landing_pad)) );
    429 
    430                         __cfadbg_print_safe(exception, " action\n");
    431 
    432                         // Return have some action to run
    433                         return _URC_INSTALL_CONTEXT;
    434397                }
     398
     399                // Nothing to do, move along
     400                __cfaabi_dbg_print_safe(" no landing pad");
    435401        }
    436402        // No handling found
    437         __cfadbg_print_safe(exception, " table end reached");
     403        __cfaabi_dbg_print_safe(" table end reached\n");
    438404
    439405        UNWIND:
    440         __cfadbg_print_safe(exception, " unwind\n");
     406        __cfaabi_dbg_print_safe(" unwind\n");
    441407
    442408        // Keep unwinding the stack
    443409        return _URC_CONTINUE_UNWIND;
    444410}
    445 
    446 #pragma GCC push_options
    447 #pragma GCC optimize(0)
    448411
    449412// Try statements are hoisted out see comments for details. While this could probably be unique
    450413// and simply linked from libcfa but there is one problem left, see the exception table for details
    451414__attribute__((noinline))
    452 void __cfaehm_try_terminate(void (*try_block)(),
     415void __cfaabi_ehm__try_terminate(void (*try_block)(),
    453416                void (*catch_block)(int index, exception_t * except),
    454417                __attribute__((unused)) int (*match_block)(exception_t * except)) {
     
    456419        //! printf("%p %p %p %p\n", &try_block, &catch_block, &match_block, &xy);
    457420
     421        // Setup statments: These 2 statments won't actually result in any code, they only setup global tables.
     422        // However, they clobber gcc cancellation support from gcc.  We can replace the personality routine but
     423        // replacing the exception table gcc generates is not really doable, it generates labels based on how the
     424        // assembly works.
     425
    458426        // Setup the personality routine and exception table.
    459         // Unforturnately these clobber gcc cancellation support which means we can't get access to
    460         // the attribute cleanup tables at the same time. We would have to inspect the assembly to
    461         // create a new set ourselves.
    462427#ifdef __PIC__
    463428        asm volatile (".cfi_personality 0x9b,CFA.ref.__gcfa_personality_v0");
     
    484449        // Label which defines the end of the area for which the handler is setup.
    485450        asm volatile (".TRYEND:");
    486         // Label which defines the start of the exception landing pad. Basically what is called when
    487         // the exception is caught. Note, if multiple handlers are given, the multiplexing should be
    488         // done by the generated code, not the exception runtime.
     451        // Label which defines the start of the exception landing pad.  Basically what is called when the exception is
     452        // caught.  Note, if multiple handlers are given, the multiplexing should be done by the generated code, not the
     453        // exception runtime.
    489454        asm volatile (".CATCH:");
    490455
    491456        // Exception handler
    492         // Note: Saving the exception context on the stack breaks termination exceptions.
    493         catch_block( this_exception_context()->current_handler_index,
    494                      this_exception_context()->current_exception );
     457        catch_block( shared_stack.current_handler_index,
     458                     shared_stack.current_exception );
    495459}
    496460
     
    500464
    501465#ifdef __PIC__
     466#if defined( __i386 ) || defined( __x86_64 )
    502467asm (
    503468        // HEADER
     
    516481        // handler landing pad offset and 1 (action code, gcc seems to use 0).
    517482        ".LLSDACSBCFA2:\n"
    518         "       .uleb128 .TRYSTART-__cfaehm_try_terminate\n"
     483        "       .uleb128 .TRYSTART-__cfaabi_ehm__try_terminate\n"
    519484        "       .uleb128 .TRYEND-.TRYSTART\n"
    520         "       .uleb128 .CATCH-__cfaehm_try_terminate\n"
     485        "       .uleb128 .CATCH-__cfaabi_ehm__try_terminate\n"
    521486        "       .uleb128 1\n"
    522487        ".LLSDACSECFA2:\n"
    523488        // TABLE FOOTER
    524489        "       .text\n"
    525         "       .size   __cfaehm_try_terminate, .-__cfaehm_try_terminate\n"
     490        "       .size   __cfaabi_ehm__try_terminate, .-__cfaabi_ehm__try_terminate\n"
    526491);
    527492
     
    542507        "       .quad __gcfa_personality_v0\n"
    543508#else // then __i386
    544         "       .long __gcfa_personality_v0\n"
     509        "   .long __gcfa_personality_v0\n"
    545510#endif
    546511);
     512#else
     513#error Exception Handling: unknown architecture for position independent code.
     514#endif // __i386 || __x86_64
    547515#else // __PIC__
     516#if defined( __i386 ) || defined( __x86_64 )
    548517asm (
    549518        // HEADER
     
    560529        ".LLSDACSBCFA2:\n"
    561530        //      Handled area start (relative to start of function)
    562         "       .uleb128 .TRYSTART-__cfaehm_try_terminate\n"
     531        "       .uleb128 .TRYSTART-__cfaabi_ehm__try_terminate\n"
    563532        //      Handled area length
    564533        "       .uleb128 .TRYEND-.TRYSTART\n"
    565534        //      Handler landing pad address (relative to start of function)
    566         "       .uleb128 .CATCH-__cfaehm_try_terminate\n"
     535        "       .uleb128 .CATCH-__cfaabi_ehm__try_terminate\n"
    567536        //      Action code, gcc seems to always use 0.
    568537        "       .uleb128 1\n"
     
    570539        ".LLSDACSECFA2:\n"
    571540        "       .text\n"
    572         "       .size   __cfaehm_try_terminate, .-__cfaehm_try_terminate\n"
     541        "       .size   __cfaabi_ehm__try_terminate, .-__cfaabi_ehm__try_terminate\n"
    573542        "       .ident  \"GCC: (Ubuntu 6.2.0-3ubuntu11~16.04) 6.2.0 20160901\"\n"
    574543        "       .section        .note.GNU-stack,\"x\",@progbits\n"
    575544);
     545#else
     546#error Exception Handling: unknown architecture for position dependent code.
     547#endif // __i386 || __x86_64
    576548#endif // __PIC__
    577549
  • libcfa/src/exception.h

    r6a490b2 rb7d6a36  
    99// Author           : Andrew Beach
    1010// Created On       : Mon Jun 26 15:11:00 2017
    11 // Last Modified By : Andrew Beach
    12 // Last Modified On : Fri Mar 27 10:16:00 2020
    13 // Update Count     : 9
     11// Last Modified By : Peter A. Buhr
     12// Last Modified On : Thu Feb 22 18:11:15 2018
     13// Update Count     : 8
    1414//
    1515
     
    2121#endif
    2222
    23 struct __cfaehm_base_exception_t;
    24 typedef struct __cfaehm_base_exception_t exception_t;
    25 struct __cfaehm_base_exception_t_vtable {
    26         const struct __cfaehm_base_exception_t_vtable * parent;
     23struct __cfaabi_ehm__base_exception_t;
     24typedef struct __cfaabi_ehm__base_exception_t exception_t;
     25struct __cfaabi_ehm__base_exception_t_vtable {
     26        const struct __cfaabi_ehm__base_exception_t_vtable * parent;
    2727        size_t size;
    28         void (*copy)(struct __cfaehm_base_exception_t *this,
    29                      struct __cfaehm_base_exception_t * other);
    30         void (*free)(struct __cfaehm_base_exception_t *this);
    31         const char * (*msg)(struct __cfaehm_base_exception_t *this);
     28        void (*copy)(struct __cfaabi_ehm__base_exception_t *this,
     29                     struct __cfaabi_ehm__base_exception_t * other);
     30        void (*free)(struct __cfaabi_ehm__base_exception_t *this);
     31        const char * (*msg)(struct __cfaabi_ehm__base_exception_t *this);
    3232};
    33 struct __cfaehm_base_exception_t {
    34         struct __cfaehm_base_exception_t_vtable const * virtual_table;
     33struct __cfaabi_ehm__base_exception_t {
     34        struct __cfaabi_ehm__base_exception_t_vtable const * virtual_table;
    3535};
    36 extern struct __cfaehm_base_exception_t_vtable
    37         ___cfaehm_base_exception_t_vtable_instance;
     36extern struct __cfaabi_ehm__base_exception_t_vtable
     37        ___cfaabi_ehm__base_exception_t_vtable_instance;
    3838
    3939
    4040// Used in throw statement translation.
    41 void __cfaehm_throw_terminate(exception_t * except) __attribute__((noreturn));
    42 void __cfaehm_rethrow_terminate() __attribute__((noreturn));
    43 void __cfaehm_throw_resume(exception_t * except);
     41void __cfaabi_ehm__throw_terminate(exception_t * except) __attribute__((noreturn));
     42void __cfaabi_ehm__rethrow_terminate() __attribute__((noreturn));
     43void __cfaabi_ehm__throw_resume(exception_t * except);
    4444
    4545// Function catches termination exceptions.
    46 void __cfaehm_try_terminate(
     46void __cfaabi_ehm__try_terminate(
    4747    void (*try_block)(),
    4848    void (*catch_block)(int index, exception_t * except),
     
    5050
    5151// Clean-up the exception in catch blocks.
    52 void __cfaehm_cleanup_terminate(void * except);
     52void __cfaabi_ehm__cleanup_terminate(void * except);
    5353
    5454// Data structure creates a list of resume handlers.
    55 struct __cfaehm_try_resume_node {
    56     struct __cfaehm_try_resume_node * next;
     55struct __cfaabi_ehm__try_resume_node {
     56    struct __cfaabi_ehm__try_resume_node * next;
    5757    _Bool (*handler)(exception_t * except);
    5858};
    5959
    6060// These act as constructor and destructor for the resume node.
    61 void __cfaehm_try_resume_setup(
    62     struct __cfaehm_try_resume_node * node,
     61void __cfaabi_ehm__try_resume_setup(
     62    struct __cfaabi_ehm__try_resume_node * node,
    6363    _Bool (*handler)(exception_t * except));
    64 void __cfaehm_try_resume_cleanup(
    65     struct __cfaehm_try_resume_node * node);
     64void __cfaabi_ehm__try_resume_cleanup(
     65    struct __cfaabi_ehm__try_resume_node * node);
    6666
    6767// Check for a standard way to call fake deconstructors.
    68 struct __cfaehm_cleanup_hook {};
     68struct __cfaabi_ehm__cleanup_hook {};
    6969
    7070#ifdef __cforall
  • libcfa/src/heap.cfa

    r6a490b2 rb7d6a36  
    1010// Created On       : Tue Dec 19 21:58:35 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Wed May  6 17:29:26 2020
    13 // Update Count     : 727
     12// Last Modified On : Tue Feb  4 10:04:51 2020
     13// Update Count     : 648
    1414//
    1515
     
    1919#include <errno.h>                                                                              // errno
    2020#include <string.h>                                                                             // memset, memcpy
    21 #include <limits.h>                                                                             // ULONG_MAX
    2221extern "C" {
    2322#include <sys/mman.h>                                                                   // mmap, munmap
    2423} // extern "C"
    2524
     25// #comment TD : Many of these should be merged into math I believe
    2626#include "bits/align.hfa"                                                               // libPow2
    2727#include "bits/defs.hfa"                                                                // likely, unlikely
     
    3030//#include "stdlib.hfa"                                                                 // bsearchl
    3131#include "malloc.h"
    32 #include "bitmanip.hfa"                                                                 // ceiling
    3332
    3433#define MIN(x, y) (y > x ? x : y)
     
    8281};
    8382
     83size_t default_mmap_start() __attribute__(( weak )) {
     84        return __CFA_DEFAULT_MMAP_START__;
     85} // default_mmap_start
     86
    8487size_t default_heap_expansion() __attribute__(( weak )) {
    8588        return __CFA_DEFAULT_HEAP_EXPANSION__;
    8689} // default_heap_expansion
    87 
    88 size_t default_mmap_start() __attribute__(( weak )) {
    89         return __CFA_DEFAULT_MMAP_START__;
    90 } // default_mmap_start
    9190
    9291
     
    151150                                                        union {
    152151//                                                              FreeHeader * home;              // allocated block points back to home locations (must overlay alignment)
    153                                                                 // 2nd low-order bit => zero filled
    154152                                                                void * home;                    // allocated block points back to home locations (must overlay alignment)
    155153                                                                size_t blockSize;               // size for munmap (must overlay alignment)
     
    171169                                struct FakeHeader {
    172170                                        #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    173                                         // 1st low-order bit => fake header & alignment
    174                                         uint32_t alignment;
     171                                        uint32_t alignment;                                     // low-order bits of home/blockSize used for tricks
    175172                                        #endif // __ORDER_LITTLE_ENDIAN__
    176173
     
    182179                                } fake; // FakeHeader
    183180                        } kind; // Kind
    184                         size_t size;                                                            // allocation size in bytes
    185181                } header; // Header
    186182                char pad[libAlign() - sizeof( Header )];
     
    266262static unsigned long long int free_storage;
    267263static unsigned int free_calls;
    268 static unsigned long long int aalloc_storage;
    269 static unsigned int aalloc_calls;
    270264static unsigned long long int calloc_storage;
    271265static unsigned int calloc_calls;
    272266static unsigned long long int memalign_storage;
    273267static unsigned int memalign_calls;
    274 static unsigned long long int amemalign_storage;
    275 static unsigned int amemalign_calls;
    276268static unsigned long long int cmemalign_storage;
    277269static unsigned int cmemalign_calls;
    278 static unsigned long long int resize_storage;
    279 static unsigned int resize_calls;
    280270static unsigned long long int realloc_storage;
    281271static unsigned int realloc_calls;
     
    285275// Use "write" because streams may be shutdown when calls are made.
    286276static void printStats() {
    287         char helpText[1024];
     277        char helpText[512];
    288278        __cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText),
    289279                                                                        "\nHeap statistics:\n"
    290280                                                                        "  malloc: calls %u / storage %llu\n"
    291                                                                         "  aalloc: calls %u / storage %llu\n"
    292281                                                                        "  calloc: calls %u / storage %llu\n"
    293282                                                                        "  memalign: calls %u / storage %llu\n"
    294                                                                         "  amemalign: calls %u / storage %llu\n"
    295283                                                                        "  cmemalign: calls %u / storage %llu\n"
    296                                                                         "  resize: calls %u / storage %llu\n"
    297284                                                                        "  realloc: calls %u / storage %llu\n"
    298285                                                                        "  free: calls %u / storage %llu\n"
     
    301288                                                                        "  sbrk: calls %u / storage %llu\n",
    302289                                                                        malloc_calls, malloc_storage,
    303                                                                         aalloc_calls, calloc_storage,
    304290                                                                        calloc_calls, calloc_storage,
    305291                                                                        memalign_calls, memalign_storage,
    306                                                                         amemalign_calls, amemalign_storage,
    307292                                                                        cmemalign_calls, cmemalign_storage,
    308                                                                         resize_calls, resize_storage,
    309293                                                                        realloc_calls, realloc_storage,
    310294                                                                        free_calls, free_storage,
     
    316300
    317301static int printStatsXML( FILE * stream ) {                             // see malloc_info
    318         char helpText[1024];
     302        char helpText[512];
    319303        int len = snprintf( helpText, sizeof(helpText),
    320304                                                "<malloc version=\"1\">\n"
     
    323307                                                "</sizes>\n"
    324308                                                "<total type=\"malloc\" count=\"%u\" size=\"%llu\"/>\n"
    325                                                 "<total type=\"aalloc\" count=\"%u\" size=\"%llu\"/>\n"
    326309                                                "<total type=\"calloc\" count=\"%u\" size=\"%llu\"/>\n"
    327310                                                "<total type=\"memalign\" count=\"%u\" size=\"%llu\"/>\n"
    328                                                 "<total type=\"amemalign\" count=\"%u\" size=\"%llu\"/>\n"
    329311                                                "<total type=\"cmemalign\" count=\"%u\" size=\"%llu\"/>\n"
    330                                                 "<total type=\"resize\" count=\"%u\" size=\"%llu\"/>\n"
    331312                                                "<total type=\"realloc\" count=\"%u\" size=\"%llu\"/>\n"
    332313                                                "<total type=\"free\" count=\"%u\" size=\"%llu\"/>\n"
     
    336317                                                "</malloc>",
    337318                                                malloc_calls, malloc_storage,
    338                                                 aalloc_calls, aalloc_storage,
    339319                                                calloc_calls, calloc_storage,
    340320                                                memalign_calls, memalign_storage,
    341                                                 amemalign_calls, amemalign_storage,
    342321                                                cmemalign_calls, cmemalign_storage,
    343                                                 resize_calls, resize_storage,
    344322                                                realloc_calls, realloc_storage,
    345323                                                free_calls, free_storage,
     
    359337//                 ((char *)(sbrk( 0 )) - (char *)(heapManager.heapBegin)) );
    360338// } // noMemory
     339
     340
     341static inline void checkAlign( size_t alignment ) {
     342        if ( alignment < libAlign() || ! libPow2( alignment ) ) {
     343                abort( "Alignment %zu for memory allocation is less than %d and/or not a power of 2.", alignment, libAlign() );
     344        } // if
     345} // checkAlign
     346
     347
     348static inline bool setHeapExpand( size_t value ) {
     349  if ( heapExpand < pageSize ) return true;
     350        heapExpand = value;
     351        return false;
     352} // setHeapExpand
    361353
    362354
     
    377369
    378370static inline bool setMmapStart( size_t value ) {               // true => mmapped, false => sbrk
    379   if ( value < pageSize || bucketSizes[NoBucketSizes - 1] < value ) return false;
     371  if ( value < pageSize || bucketSizes[NoBucketSizes - 1] < value ) return true;
    380372        mmapStart = value;                                                                      // set global
    381373
     
    384376        assert( maxBucketsUsed < NoBucketSizes );                       // subscript failure ?
    385377        assert( mmapStart <= bucketSizes[maxBucketsUsed] ); // search failure ?
    386         return true;
     378        return false;
    387379} // setMmapStart
    388 
    389 
    390 // <-------+----------------------------------------------------> bsize (bucket size)
    391 // |header |addr
    392 //==================================================================================
    393 //                   align/offset |
    394 // <-----------------<------------+-----------------------------> bsize (bucket size)
    395 //                   |fake-header | addr
    396 #define headerAddr( addr ) ((HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) ))
    397 #define realHeader( header ) ((HeapManager.Storage.Header *)((char *)header - header->kind.fake.offset))
    398 
    399 // <-------<<--------------------- dsize ---------------------->> bsize (bucket size)
    400 // |header |addr
    401 //==================================================================================
    402 //                   align/offset |
    403 // <------------------------------<<---------- dsize --------->>> bsize (bucket size)
    404 //                   |fake-header |addr
    405 #define dataStorage( bsize, addr, header ) (bsize - ( (char *)addr - (char *)header ))
    406 
    407 
    408 static inline void checkAlign( size_t alignment ) {
    409         if ( alignment < libAlign() || ! libPow2( alignment ) ) {
    410                 abort( "Alignment %zu for memory allocation is less than %d and/or not a power of 2.", alignment, libAlign() );
    411         } // if
    412 } // checkAlign
    413380
    414381
     
    424391static inline void fakeHeader( HeapManager.Storage.Header *& header, size_t & alignment ) {
    425392        if ( unlikely( (header->kind.fake.alignment & 1) == 1 ) ) { // fake header ?
     393                size_t offset = header->kind.fake.offset;
    426394                alignment = header->kind.fake.alignment & -2;   // remove flag from value
    427395                #ifdef __CFA_DEBUG__
    428396                checkAlign( alignment );                                                // check alignment
    429397                #endif // __CFA_DEBUG__
    430                 header = realHeader( header );                                  // backup from fake to real header
     398                header = (HeapManager.Storage.Header *)((char *)header - offset);
    431399        } // if
    432400} // fakeHeader
     401
     402
     403// <-------+----------------------------------------------------> bsize (bucket size)
     404// |header |addr
     405//==================================================================================
     406//                                | alignment
     407// <-----------------<------------+-----------------------------> bsize (bucket size)
     408//                   |fake-header | addr
     409#define headerAddr( addr ) ((HeapManager.Storage.Header *)( (char *)addr - sizeof(HeapManager.Storage) ))
     410
     411// <-------<<--------------------- dsize ---------------------->> bsize (bucket size)
     412// |header |addr
     413//==================================================================================
     414//                                | alignment
     415// <------------------------------<<---------- dsize --------->>> bsize (bucket size)
     416//                   |fake-header |addr
     417#define dataStorage( bsize, addr, header ) (bsize - ( (char *)addr - (char *)header ))
    433418
    434419
     
    443428
    444429        #ifdef __CFA_DEBUG__
    445         checkHeader( addr < heapBegin, name, addr );            // bad low address ?
     430        checkHeader( addr < heapBegin || header < (HeapManager.Storage.Header *)heapBegin, name, addr ); // bad low address ?
    446431        #endif // __CFA_DEBUG__
    447432
     
    502487        // along with the block and is a multiple of the alignment size.
    503488
    504   if ( unlikely( size > ULONG_MAX - sizeof(HeapManager.Storage) ) ) return 0p;
     489  if ( unlikely( size > ~0ul - sizeof(HeapManager.Storage) ) ) return 0p;
    505490        size_t tsize = size + sizeof(HeapManager.Storage);
    506491        if ( likely( tsize < mmapStart ) ) {                            // small size => sbrk
     
    554539                block->header.kind.real.home = freeElem;                // pointer back to free list of apropriate size
    555540        } else {                                                                                        // large size => mmap
    556   if ( unlikely( size > ULONG_MAX - pageSize ) ) return 0p;
     541  if ( unlikely( size > ~0ul - pageSize ) ) return 0p;
    557542                tsize = libCeiling( tsize, pageSize );                  // must be multiple of page size
    558543                #ifdef __STATISTICS__
     
    572557        } // if
    573558
    574         block->header.size = size;                                                      // store allocation size
    575559        void * addr = &(block->data);                                           // adjust off header to user bytes
    576560
     
    696680        #endif // FASTLOOKUP
    697681
    698         if ( ! setMmapStart( default_mmap_start() ) ) {
     682        if ( setMmapStart( default_mmap_start() ) ) {
    699683                abort( "HeapManager : internal error, mmap start initialization failure." );
    700684        } // if
     
    702686
    703687        char * end = (char *)sbrk( 0 );
    704         heapBegin = heapEnd = sbrk( (char *)libCeiling( (long unsigned int)end, libAlign() ) - end ); // move start of heap to multiple of alignment
     688        sbrk( (char *)libCeiling( (long unsigned int)end, libAlign() ) - end ); // move start of heap to multiple of alignment
     689        heapBegin = heapEnd = sbrk( 0 );                                        // get new start point
    705690} // HeapManager
    706691
     
    728713        //assert( heapManager.heapBegin != 0 );
    729714        //heapManager{};
    730         if ( heapManager.heapBegin == 0p ) heapManager{};       // sanity check
     715        if ( heapManager.heapBegin == 0p ) heapManager{};
    731716} // memory_startup
    732717
     
    740725        //assert( heapManager.heapBegin != 0 );
    741726        if ( unlikely( heapManager.heapBegin == 0p ) ) heapManager{}; // called before memory_startup ?
    742 #if __SIZEOF_POINTER__ == 8
    743         verify( size < ((typeof(size_t))1 << 48) );
    744 #endif // __SIZEOF_POINTER__ == 8
    745727        void * addr = doMalloc( size );
    746728        if ( unlikely( addr == 0p ) ) errno = ENOMEM;           // POSIX
     
    749731
    750732
    751 static inline void * callocNoStats( size_t dim, size_t elemSize ) {
    752         size_t size = dim * elemSize;
     733static inline void * callocNoStats( size_t noOfElems, size_t elemSize ) {
     734        size_t size = noOfElems * elemSize;
    753735        char * addr = (char *)mallocNoStats( size );
    754736  if ( unlikely( addr == 0p ) ) return 0p;
     
    808790
    809791
    810 static inline void * cmemalignNoStats( size_t alignment, size_t dim, size_t elemSize ) {
    811         size_t size = dim * elemSize;
     792static inline void * cmemalignNoStats( size_t alignment, size_t noOfElems, size_t elemSize ) {
     793        size_t size = noOfElems * elemSize;
    812794        char * addr = (char *)memalignNoStats( alignment, size );
    813795  if ( unlikely( addr == 0p ) ) return 0p;
     
    821803        #endif // __CFA_DEBUG__
    822804                memset( addr, '\0', dataStorage( bsize, addr, header ) ); // set to zeros
    823 
    824         header->kind.real.blockSize |= 2;                                       // mark as zero filled
     805        header->kind.real.blockSize |= 2;                               // mark as zero filled
     806
    825807        return addr;
    826808} // cmemalignNoStats
     
    837819
    838820extern "C" {
    839         // Allocates size bytes and returns a pointer to the allocated memory.  The contents are undefined. If size is 0,
    840         // then malloc() returns a unique pointer value that can later be successfully passed to free().
     821        // The malloc() function allocates size bytes and returns a pointer to the allocated memory. The memory is not
     822        // initialized. If size is 0, then malloc() returns either 0p, or a unique pointer value that can later be
     823        // successfully passed to free().
    841824        void * malloc( size_t size ) {
    842825                #ifdef __STATISTICS__
     
    848831        } // malloc
    849832
    850 
    851         // Same as malloc() except size bytes is an array of dim elements each of elemSize bytes.
    852         void * aalloc( size_t dim, size_t elemSize ) {
    853                 #ifdef __STATISTICS__
    854                 __atomic_add_fetch( &aalloc_calls, 1, __ATOMIC_SEQ_CST );
    855                 __atomic_add_fetch( &aalloc_storage, dim * elemSize, __ATOMIC_SEQ_CST );
    856                 #endif // __STATISTICS__
    857 
    858                 return mallocNoStats( dim * elemSize );
    859         } // aalloc
    860 
    861 
    862         // Same as aalloc() with memory set to zero.
    863         void * calloc( size_t dim, size_t elemSize ) {
     833        // The calloc() function allocates memory for an array of nmemb elements of size bytes each and returns a pointer to
     834        // the allocated memory. The memory is set to zero. If nmemb or size is 0, then calloc() returns either 0p, or a
     835        // unique pointer value that can later be successfully passed to free().
     836        void * calloc( size_t noOfElems, size_t elemSize ) {
    864837                #ifdef __STATISTICS__
    865838                __atomic_add_fetch( &calloc_calls, 1, __ATOMIC_SEQ_CST );
    866                 __atomic_add_fetch( &calloc_storage, dim * elemSize, __ATOMIC_SEQ_CST );
    867                 #endif // __STATISTICS__
    868 
    869                 return callocNoStats( dim, elemSize );
     839                __atomic_add_fetch( &calloc_storage, noOfElems * elemSize, __ATOMIC_SEQ_CST );
     840                #endif // __STATISTICS__
     841
     842                return callocNoStats( noOfElems, elemSize );
    870843        } // calloc
    871844
    872         // Change the size of the memory block pointed to by oaddr to size bytes. The contents are undefined.  If oaddr is
    873         // 0p, then the call is equivalent to malloc(size), for all values of size; if size is equal to zero, and oaddr is
    874         // not 0p, then the call is equivalent to free(oaddr). Unless oaddr is 0p, it must have been returned by an earlier
    875         // call to malloc(), alloc(), calloc() or realloc(). If the area pointed to was moved, a free(oaddr) is done.
    876         void * resize( void * oaddr, size_t size ) {
    877                 #ifdef __STATISTICS__
    878                 __atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST );
    879                 __atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST );
    880                 #endif // __STATISTICS__
    881 
    882                 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
    883           if ( unlikely( size == 0 ) ) { free( oaddr ); return mallocNoStats( size ); } // special cases
    884           if ( unlikely( oaddr == 0p ) ) return mallocNoStats( size );
    885 
    886                 HeapManager.Storage.Header * header;
    887                 HeapManager.FreeHeader * freeElem;
    888                 size_t bsize, oalign = 0;
    889                 headers( "resize", oaddr, header, freeElem, bsize, oalign );
    890 
    891                 size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
    892                 // same size, DO NOT preserve STICKY PROPERTIES.
    893           if ( oalign == 0 && size <= odsize && odsize <= size * 2 ) { // allow 50% wasted storage for smaller size
    894                         header->kind.real.blockSize &= -2;                      // no alignment and turn off 0 fill
    895                         return oaddr;
    896                 } // if
    897        
    898                 // change size, DO NOT preserve STICKY PROPERTIES.
    899                 free( oaddr );
    900                 void * naddr = mallocNoStats( size );                   // create new area
    901                 return naddr;
    902         } // resize
    903 
    904 
    905         // Same as resize() but the contents are unchanged in the range from the start of the region up to the minimum of
    906         // the old and new sizes.
     845        // The realloc() function changes the size of the memory block pointed to by ptr to size bytes. The contents will be
     846        // unchanged in the range from the start of the region up to the minimum of the old and new sizes. If the new size
     847        // is larger than the old size, the added memory will not be initialized.  If ptr is 0p, then the call is
     848        // equivalent to malloc(size), for all values of size; if size is equal to zero, and ptr is not 0p, then the call
     849        // is equivalent to free(ptr). Unless ptr is 0p, it must have been returned by an earlier call to malloc(),
     850        // calloc() or realloc(). If the area pointed to was moved, a free(ptr) is done.
    907851        void * realloc( void * oaddr, size_t size ) {
    908852                #ifdef __STATISTICS__
    909853                __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
    910                 __atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
    911854                #endif // __STATISTICS__
    912855
     
    924867                        // Do not know size of original allocation => cannot do 0 fill for any additional space because do not know
    925868                        // where to start filling, i.e., do not overwrite existing values in space.
     869                        //
     870                        // This case does not result in a new profiler entry because the previous one still exists and it must match with
     871                        // the free for this memory.  Hence, this realloc does not appear in the profiler output.
    926872                        return oaddr;
    927873                } // if
     874
     875                #ifdef __STATISTICS__
     876                __atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
     877                #endif // __STATISTICS__
    928878
    929879                // change size and copy old content to new storage
     
    953903        } // realloc
    954904
    955         // Same as malloc() except the memory address is a multiple of alignment, which must be a power of two. (obsolete)
     905        // The obsolete function memalign() allocates size bytes and returns a pointer to the allocated memory. The memory
     906        // address will be a multiple of alignment, which must be a power of two.
    956907        void * memalign( size_t alignment, size_t size ) {
    957908                #ifdef __STATISTICS__
     
    964915
    965916
    966         // Same as aalloc() with memory alignment.
    967         void * amemalign( size_t alignment, size_t dim, size_t elemSize ) {
     917        // The cmemalign() function is the same as calloc() with memory alignment.
     918        void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ) {
    968919                #ifdef __STATISTICS__
    969920                __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
    970                 __atomic_add_fetch( &cmemalign_storage, dim * elemSize, __ATOMIC_SEQ_CST );
    971                 #endif // __STATISTICS__
    972 
    973                 return memalignNoStats( alignment, dim * elemSize );
    974         } // amemalign
    975 
    976 
    977         // Same as calloc() with memory alignment.
    978         void * cmemalign( size_t alignment, size_t dim, size_t elemSize ) {
    979                 #ifdef __STATISTICS__
    980                 __atomic_add_fetch( &cmemalign_calls, 1, __ATOMIC_SEQ_CST );
    981                 __atomic_add_fetch( &cmemalign_storage, dim * elemSize, __ATOMIC_SEQ_CST );
    982                 #endif // __STATISTICS__
    983 
    984                 return cmemalignNoStats( alignment, dim, elemSize );
     921                __atomic_add_fetch( &cmemalign_storage, noOfElems * elemSize, __ATOMIC_SEQ_CST );
     922                #endif // __STATISTICS__
     923
     924                return cmemalignNoStats( alignment, noOfElems, elemSize );
    985925        } // cmemalign
    986926
    987         // Same as memalign(), but ISO/IEC 2011 C11 Section 7.22.2 states: the value of size shall be an integral multiple
    988     // of alignment. This requirement is universally ignored.
     927        // The function aligned_alloc() is the same as memalign(), except for the added restriction that size should be a
     928        // multiple of alignment.
    989929        void * aligned_alloc( size_t alignment, size_t size ) {
    990930                return memalign( alignment, size );
     
    992932
    993933
    994         // Allocates size bytes and places the address of the allocated memory in *memptr. The address of the allocated
    995         // memory shall be a multiple of alignment, which must be a power of two and a multiple of sizeof(void *). If size
    996         // is 0, then posix_memalign() returns either 0p, or a unique pointer value that can later be successfully passed to
    997         // free(3).
     934        // The function posix_memalign() allocates size bytes and places the address of the allocated memory in *memptr. The
     935        // address of the allocated memory will be a multiple of alignment, which must be a power of two and a multiple of
     936        // sizeof(void *). If size is 0, then posix_memalign() returns either 0p, or a unique pointer value that can later
     937        // be successfully passed to free(3).
    998938        int posix_memalign( void ** memptr, size_t alignment, size_t size ) {
    999939          if ( alignment < sizeof(void *) || ! libPow2( alignment ) ) return EINVAL; // check alignment
     
    1003943        } // posix_memalign
    1004944
    1005         // Allocates size bytes and returns a pointer to the allocated memory. The memory address shall be a multiple of the
    1006         // page size.  It is equivalent to memalign(sysconf(_SC_PAGESIZE),size).
     945        // The obsolete function valloc() allocates size bytes and returns a pointer to the allocated memory. The memory
     946        // address will be a multiple of the page size.  It is equivalent to memalign(sysconf(_SC_PAGESIZE),size).
    1007947        void * valloc( size_t size ) {
    1008948                return memalign( pageSize, size );
     
    1010950
    1011951
    1012         // Same as valloc but rounds size to multiple of page size.
    1013         void * pvalloc( size_t size ) {
    1014                 return memalign( pageSize, libCeiling( size, pageSize ) );
    1015         } // pvalloc
    1016 
    1017 
    1018         // Frees the memory space pointed to by ptr, which must have been returned by a previous call to malloc(), calloc()
    1019         // or realloc().  Otherwise, or if free(ptr) has already been called before, undefined behaviour occurs. If ptr is
    1020         // 0p, no operation is performed.
     952        // The free() function frees the memory space pointed to by ptr, which must have been returned by a previous call to
     953        // malloc(), calloc() or realloc().  Otherwise, or if free(ptr) has already been called before, undefined behavior
     954        // occurs. If ptr is 0p, no operation is performed.
    1021955        void free( void * addr ) {
    1022956                #ifdef __STATISTICS__
     
    1039973
    1040974
    1041         // Returns the alignment of an allocation.
     975        // The malloc_alignment() function returns the alignment of the allocation.
    1042976        size_t malloc_alignment( void * addr ) {
    1043977          if ( unlikely( addr == 0p ) ) return libAlign();      // minimum alignment
     
    1046980                        return header->kind.fake.alignment & -2;        // remove flag from value
    1047981                } else {
    1048                         return libAlign();                                                      // minimum alignment
     982                        return libAlign ();                                                     // minimum alignment
    1049983                } // if
    1050984        } // malloc_alignment
    1051985
    1052         // Set the alignment for an the allocation and return previous alignment or 0 if no alignment.
    1053         size_t $malloc_alignment_set( void * addr, size_t alignment ) {
    1054           if ( unlikely( addr == 0p ) ) return libAlign();      // minimum alignment
    1055                 size_t ret;
    1056                 HeapManager.Storage.Header * header = headerAddr( addr );
    1057                 if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
    1058                         ret = header->kind.fake.alignment & -2;         // remove flag from old value
    1059                         header->kind.fake.alignment = alignment | 1; // add flag to new value
    1060                 } else {
    1061                         ret = 0;                                                                        // => no alignment to change
    1062                 } // if
    1063                 return ret;
    1064         } // $malloc_alignment_set
    1065 
    1066 
    1067         // Returns true if the allocation is zero filled, e.g., allocated by calloc().
     986
     987        // The malloc_zero_fill() function returns true if the allocation is zero filled, i.e., initially allocated by calloc().
    1068988        bool malloc_zero_fill( void * addr ) {
    1069989          if ( unlikely( addr == 0p ) ) return false;           // null allocation is not zero fill
    1070990                HeapManager.Storage.Header * header = headerAddr( addr );
    1071991                if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
    1072                         header = realHeader( header );                          // backup from fake to real header
     992                        header = (HeapManager.Storage.Header *)((char *)header - header->kind.fake.offset);
    1073993                } // if
    1074                 return (header->kind.real.blockSize & 2) != 0;  // zero filled ?
     994                return (header->kind.real.blockSize & 2) != 0;  // zero filled (calloc/cmemalign) ?
    1075995        } // malloc_zero_fill
    1076996
    1077         // Set allocation is zero filled and return previous zero filled.
    1078         bool $malloc_zero_fill_set( void * addr ) {
    1079           if ( unlikely( addr == 0p ) ) return false;           // null allocation is not zero fill
    1080                 HeapManager.Storage.Header * header = headerAddr( addr );
    1081                 if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
    1082                         header = realHeader( header );                          // backup from fake to real header
    1083                 } // if
    1084                 bool ret = (header->kind.real.blockSize & 2) != 0; // zero filled ?
    1085                 header->kind.real.blockSize |= 2;                               // mark as zero filled
    1086                 return ret;
    1087         } // $malloc_zero_fill_set
    1088 
    1089 
    1090         // Returns original total allocation size (not bucket size) => array size is dimension * sizeif(T).
    1091         size_t malloc_size( void * addr ) {
    1092           if ( unlikely( addr == 0p ) ) return false;           // null allocation is not zero fill
    1093                 HeapManager.Storage.Header * header = headerAddr( addr );
    1094                 if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
    1095                         header = realHeader( header );                          // backup from fake to real header
    1096                 } // if
    1097                 return header->size;
    1098         } // malloc_size
    1099 
    1100         // Set allocation size and return previous size.
    1101         size_t $malloc_size_set( void * addr, size_t size ) {
    1102           if ( unlikely( addr == 0p ) ) return false;           // null allocation is not zero fill
    1103                 HeapManager.Storage.Header * header = headerAddr( addr );
    1104                 if ( (header->kind.fake.alignment & 1) == 1 ) { // fake header ?
    1105                         header = realHeader( header );                          // backup from fake to real header
    1106                 } // if
    1107                 size_t ret = header->size;
    1108                 header->size = size;
    1109                 return ret;
    1110         } // $malloc_size_set
    1111 
    1112 
    1113         // Returns the number of usable bytes in the block pointed to by ptr, a pointer to a block of memory allocated by
    1114         // malloc or a related function.
     997
     998        // The malloc_usable_size() function returns the number of usable bytes in the block pointed to by ptr, a pointer to
     999        // a block of memory allocated by malloc(3) or a related function.
    11151000        size_t malloc_usable_size( void * addr ) {
    11161001          if ( unlikely( addr == 0p ) ) return 0;                       // null allocation has 0 size
     
    11241009
    11251010
    1126         // Prints (on default standard error) statistics about memory allocated by malloc and related functions.
     1011        // The malloc_stats() function prints (on default standard error) statistics about memory allocated by malloc(3) and
     1012        // related functions.
    11271013        void malloc_stats( void ) {
    11281014                #ifdef __STATISTICS__
     
    11321018        } // malloc_stats
    11331019
    1134         // Changes the file descripter where malloc_stats() writes statistics.
     1020        // The malloc_stats_fd() function changes the file descripter where malloc_stats() writes the statistics.
    11351021        int malloc_stats_fd( int fd __attribute__(( unused )) ) {
    11361022                #ifdef __STATISTICS__
     
    11441030
    11451031
    1146         // Adjusts parameters that control the behaviour of the memory-allocation functions (see malloc). The param argument
    1147         // specifies the parameter to be modified, and value specifies the new value for that parameter.
     1032        // The mallopt() function adjusts parameters that control the behavior of the memory-allocation functions (see
     1033        // malloc(3)). The param argument specifies the parameter to be modified, and value specifies the new value for that
     1034        // parameter.
    11481035        int mallopt( int option, int value ) {
    11491036                choose( option ) {
    11501037                  case M_TOP_PAD:
    1151                         heapExpand = ceiling( value, pageSize ); return 1;
     1038                        if ( setHeapExpand( value ) ) return 1;
    11521039                  case M_MMAP_THRESHOLD:
    11531040                        if ( setMmapStart( value ) ) return 1;
    1154                         break;
    11551041                } // switch
    11561042                return 0;                                                                               // error, unsupported
    11571043        } // mallopt
    11581044
    1159         // Attempt to release free memory at the top of the heap (by calling sbrk with a suitable argument).
     1045        // The malloc_trim() function attempts to release free memory at the top of the heap (by calling sbrk(2) with a
     1046        // suitable argument).
    11601047        int malloc_trim( size_t ) {
    11611048                return 0;                                                                               // => impossible to release memory
     
    11631050
    11641051
    1165         // Exports an XML string that describes the current state of the memory-allocation implementation in the caller.
    1166         // The string is printed on the file stream stream.  The exported string includes information about all arenas (see
    1167         // malloc).
     1052        // The malloc_info() function exports an XML string that describes the current state of the memory-allocation
     1053        // implementation in the caller.  The string is printed on the file stream stream.  The exported string includes
     1054        // information about all arenas (see malloc(3)).
    11681055        int malloc_info( int options, FILE * stream ) {
    11691056                if ( options != 0 ) { errno = EINVAL; return -1; }
     
    11721059
    11731060
    1174         // Records the current state of all malloc internal bookkeeping variables (but not the actual contents of the heap
    1175         // or the state of malloc_hook functions pointers).  The state is recorded in a system-dependent opaque data
    1176         // structure dynamically allocated via malloc, and a pointer to that data structure is returned as the function
    1177         // result.  (The caller must free this memory.)
     1061        // The malloc_get_state() function records the current state of all malloc(3) internal bookkeeping variables (but
     1062        // not the actual contents of the heap or the state of malloc_hook(3) functions pointers).  The state is recorded in
     1063        // a system-dependent opaque data structure dynamically allocated via malloc(3), and a pointer to that data
     1064        // structure is returned as the function result.  (It is the caller's responsibility to free(3) this memory.)
    11781065        void * malloc_get_state( void ) {
    11791066                return 0p;                                                                              // unsupported
     
    11811068
    11821069
    1183         // Restores the state of all malloc internal bookkeeping variables to the values recorded in the opaque data
    1184         // structure pointed to by state.
     1070        // The malloc_set_state() function restores the state of all malloc(3) internal bookkeeping variables to the values
     1071        // recorded in the opaque data structure pointed to by state.
    11851072        int malloc_set_state( void * ptr ) {
    11861073                return 0;                                                                               // unsupported
     
    11901077
    11911078// Must have CFA linkage to overload with C linkage realloc.
    1192 void * resize( void * oaddr, size_t nalign, size_t size ) {
     1079void * realloc( void * oaddr, size_t nalign, size_t size ) {
    11931080        #ifdef __STATISTICS__
    1194         __atomic_add_fetch( &resize_calls, 1, __ATOMIC_SEQ_CST );
    1195         __atomic_add_fetch( &resize_storage, size, __ATOMIC_SEQ_CST );
     1081        __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
    11961082        #endif // __STATISTICS__
    11971083
    11981084        // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
    1199   if ( unlikely( size == 0 ) ) { free( oaddr ); return memalignNoStats( nalign, size ); } // special cases
    1200   if ( unlikely( oaddr == 0p ) ) return memalignNoStats( nalign, size );
    1201 
     1085  if ( unlikely( size == 0 ) ) { free( oaddr ); return mallocNoStats( size ); } // special cases
     1086  if ( unlikely( oaddr == 0p ) ) return mallocNoStats( size );
    12021087
    12031088        if ( unlikely( nalign == 0 ) ) nalign = libAlign();     // reset alignment to minimum
     
    12101095        HeapManager.FreeHeader * freeElem;
    12111096        size_t bsize, oalign = 0;
    1212         headers( "resize", oaddr, header, freeElem, bsize, oalign );
     1097        headers( "realloc", oaddr, header, freeElem, bsize, oalign );
    12131098        size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
    12141099
    1215         if ( oalign <= nalign && (uintptr_t)oaddr % nalign == 0 ) { // <= alignment and new alignment happens to match
    1216                 if ( oalign >= libAlign() ) {                                   // fake header ?
    1217                         headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
    1218                 } // if
    1219                 if ( size <= odsize && odsize <= size * 2 ) {   // allow 50% wasted storage for smaller size
    1220                         header->kind.real.blockSize &= -2;                      // turn off 0 fill
    1221                         return oaddr;
    1222                 } // if
    1223         } // if
    1224 
    1225         // change size
     1100  if ( oalign != 0 && (uintptr_t)oaddr % nalign == 0 ) { // has alignment and just happens to work out
     1101                headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
     1102                return realloc( oaddr, size );
     1103        } // if
     1104
     1105        #ifdef __STATISTICS__
     1106        __atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
     1107        #endif // __STATISTICS__
     1108
     1109        // change size and copy old content to new storage
    12261110
    12271111        void * naddr;
     
    12321116        } // if
    12331117
    1234         free( oaddr );
    1235         return naddr;
    1236 } // resize
    1237 
    1238 
    1239 void * realloc( void * oaddr, size_t nalign, size_t size ) {
    1240         if ( unlikely( nalign == 0 ) ) nalign = libAlign();     // reset alignment to minimum
    1241         #ifdef __CFA_DEBUG__
    1242         else
    1243                 checkAlign( nalign );                                                   // check alignment
    1244         #endif // __CFA_DEBUG__
    1245 
    1246         HeapManager.Storage.Header * header;
    1247         HeapManager.FreeHeader * freeElem;
    1248         size_t bsize, oalign = 0;
    1249         headers( "realloc", oaddr, header, freeElem, bsize, oalign );
    1250         size_t odsize = dataStorage( bsize, oaddr, header ); // data storage available in bucket
    1251 
    1252         if ( oalign <= nalign && (uintptr_t)oaddr % nalign == 0 ) { // <= alignment and new alignment happens to match
    1253                 if ( oalign >= libAlign() ) {                                   // fake header ?
    1254                         headerAddr( oaddr )->kind.fake.alignment = nalign | 1; // update alignment (could be the same)
    1255                 } // if
    1256                 return realloc( oaddr, size );
    1257         } // if
    1258 
    1259         // change size and copy old content to new storage
    1260 
    1261         #ifdef __STATISTICS__
    1262         __atomic_add_fetch( &realloc_calls, 1, __ATOMIC_SEQ_CST );
    1263         __atomic_add_fetch( &realloc_storage, size, __ATOMIC_SEQ_CST );
    1264         #endif // __STATISTICS__
    1265 
    1266         // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
    1267   if ( unlikely( size == 0 ) ) { free( oaddr ); return memalignNoStats( nalign, size ); } // special cases
    1268   if ( unlikely( oaddr == 0p ) ) return memalignNoStats( nalign, size );
    1269 
    1270         void * naddr;
    1271         if ( unlikely( header->kind.real.blockSize & 2 ) ) { // previous request zero fill
    1272                 naddr = cmemalignNoStats( nalign, 1, size );    // create new aligned area
    1273         } else {
    1274                 naddr = memalignNoStats( nalign, size );                // create new aligned area
    1275         } // if
    1276 
    12771118        headers( "realloc", naddr, header, freeElem, bsize, oalign );
    1278         size_t ndsize = dataStorage( bsize, naddr, header ); // data storage available in bucket
     1119        size_t ndsize = dataStorage( bsize, naddr, header ); // data storage avilable in bucket
    12791120        // To preserve prior fill, the entire bucket must be copied versus the size.
    12801121        memcpy( naddr, oaddr, MIN( odsize, ndsize ) );          // copy bytes
  • libcfa/src/interpose.cfa

    r6a490b2 rb7d6a36  
    1010// Created On       : Wed Mar 29 16:10:31 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Fri Mar 13 17:35:37 2020
    13 // Update Count     : 178
     12// Last Modified On : Mon Feb 17 10:18:53 2020
     13// Update Count     : 166
    1414//
    1515
    1616#include <stdarg.h>                                                                             // va_start, va_end
    17 #include <stdio.h>
    1817#include <string.h>                                                                             // strlen
    1918#include <unistd.h>                                                                             // _exit, getpid
     
    144143void abort( const char fmt[], ... ) __attribute__(( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ ));
    145144void abort( bool signalAbort, const char fmt[], ... ) __attribute__(( format(printf, 2, 3), __nothrow__, __leaf__, __noreturn__ ));
    146 void __abort( bool signalAbort, const char fmt[], va_list args ) __attribute__(( __nothrow__, __leaf__, __noreturn__ ));
    147145
    148146extern "C" {
     
    154152                va_list argp;
    155153                va_start( argp, fmt );
    156                 __abort( false, fmt, argp );
     154                abort( false, fmt, argp );
    157155                va_end( argp );
    158156        }
     
    220218}
    221219
    222 // Cannot forward va_list.
    223 void __abort( bool signalAbort, const char fmt[], va_list args ) {
     220void abort( bool signalAbort, const char fmt[], ... ) {
    224221        void * kernel_data = kernel_abort();                            // must be done here to lock down kernel
    225222        int len;
     
    231228
    232229        assert( fmt );
     230        va_list args;
     231        va_start( args, fmt );
     232
    233233        len = vsnprintf( abort_text, abort_text_size, fmt, args );
     234        va_end( args );
    234235        __cfaabi_bits_write( STDERR_FILENO, abort_text, len );
    235236
    236237        if ( fmt[strlen( fmt ) - 1] != '\n' ) {                         // add optional newline if missing at the end of the format text
    237                 __cfaabi_bits_write( STDERR_FILENO, "\n", 1 );
     238                __cfaabi_dbg_write( "\n", 1 );
    238239        } // if
    239240        kernel_abort_msg( kernel_data, abort_text, abort_text_size );
     
    247248        va_list args;
    248249        va_start( args, fmt );
    249         __abort( false, fmt, args );
    250     // CONTROL NEVER REACHES HERE!
     250        abort( false, fmt, args );
    251251        va_end( args );
    252 }
    253 
    254 void abort( bool signalAbort, const char fmt[], ... ) {
    255     va_list args;
    256     va_start( args, fmt );
    257     __abort( signalAbort, fmt, args );
    258     // CONTROL NEVER REACHES HERE!
    259     va_end( args );
    260252}
    261253
  • libcfa/src/iostream.cfa

    r6a490b2 rb7d6a36  
    1010// Created On       : Wed May 27 17:56:53 2015
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Sat May  2 18:30:25 2020
    13 // Update Count     : 1017
     12// Last Modified On : Thu Feb 20 15:53:23 2020
     13// Update Count     : 829
    1414//
    1515
     
    2929#include <complex.h>                                                                    // creal, cimag
    3030} // extern "C"
    31 
    32 #include <bitmanip.hfa>                                                                 // fms
    3331
    3432
     
    461459\
    462460                if ( f.base == 'b' || f.base == 'B' ) {                 /* bespoke binary format */ \
    463                         int bits = high1( f.val );                                      /* position of most significant bit */ \
    464                         if ( bits == 0 ) bits = 1;                                      /* 0 value => force one bit to print */ \
    465                         int spaces; \
     461                        int bits;                                                                                                       \
     462                        if ( f.val == (T){0} ) bits = 1;                        /* force at least one bit to print */ \
     463                        else bits = sizeof(long long int) * 8 - __builtin_clzll( f.val ); /* position of most significant bit */ \
     464                        bits = bits > sizeof(f.val) * 8 ? sizeof(f.val) * 8 : bits; \
     465                        int spaces = f.wd - bits;                                       /* can be negative */ \
     466                        if ( ! f.flags.nobsdp ) { spaces -= 2; }        /* base prefix takes space */ \
     467                        /* printf( "%d %d\n", bits, spaces ); */ \
    466468                        if ( ! f.flags.left ) {                                         /* right justified ? */ \
    467469                                /* Note, base prefix then zero padding or spacing then prefix. */ \
    468                                 if ( f.flags.pc ) { \
    469                                         spaces = f.wd - f.pc; \
    470                                         if ( ! f.flags.nobsdp ) { spaces -= 2; } /* base prefix takes space */ \
     470                                if ( f.flags.pad0 || f.flags.pc ) { \
     471                                        if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \
     472                                        if ( f.flags.pc ) spaces = f.pc - bits; \
     473                                        if ( spaces > 0 ) fmt( os, "%0*d", spaces, 0 ); /* zero pad */ \
     474                                } else { \
    471475                                        if ( spaces > 0 ) fmt( os, "%*s", spaces, " " ); /* space pad */ \
    472476                                        if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \
    473                                         spaces = f.pc - bits; \
    474                                         if ( spaces > 0 ) fmt( os, "%0*d", spaces, 0 ); /* zero pad */ \
    475                                 } else { \
    476                                         spaces = f.wd - bits; \
    477                                         if ( ! f.flags.nobsdp ) { spaces -= 2; } /* base prefix takes space */ \
    478                                         if ( f.flags.pad0 ) { \
    479                                                 if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \
    480                                                 if ( spaces > 0 ) fmt( os, "%0*d", spaces, 0 ); /* zero pad */ \
    481                                         } else { \
    482                                                 if ( spaces > 0 ) fmt( os, "%*s", spaces, " " ); /* space pad */ \
    483                                                 if ( ! f.flags.nobsdp ) { fmt( os, "0%c", f.base ); } \
    484                                         } /* if */ \
    485477                                } /* if */ \
    486                         } else { \
    487                                 if ( ! f.flags.nobsdp ) fmt( os, "0%c", f.base ); \
    488                                 if ( f.flags.pc ) { \
    489                                         spaces = f.pc - bits; \
    490                                         if ( spaces > 0 ) fmt( os, "%0*d", spaces, 0 ); /* zero pad */ \
    491                                         spaces = f.wd - f.pc; \
    492                                 } else { /* pad0 flag ignored with left flag */ \
    493                                         spaces = f.wd - bits; \
    494                                 } /* if */ \
    495                                 if ( ! f.flags.nobsdp ) { spaces -= 2; } /* base prefix takes space */ \
     478                        } else if ( ! f.flags.nobsdp ) { \
     479                                fmt( os, "0%c", f.base ); \
    496480                        } /* if */ \
    497                         int shift = floor( bits - 1, 4 ); \
     481                        int shift = (bits - 1) / 4 * 4; /* floor( bits - 1, 4 ) */ \
    498482                        typeof( f.val ) temp = f.val; \
    499483                        fmt( os, "%s", shortbin[(temp >> shift) & 0xf] ); \
     
    550534#define IntegralFMTImpl128( T, SIGNED, CODE, IFMTNP, IFMTP ) \
    551535forall( dtype ostype | ostream( ostype ) ) \
    552 static void base10_128( ostype & os, _Ostream_Manip(T) f ) { \
    553         if ( f.val > UINT64_MAX ) { \
    554                 unsigned long long int lsig = f.val % P10_UINT64; \
    555                 f.val /= P10_UINT64; /* msig */ \
    556                 base10_128( os, f ); /* recursion */ \
    557                 _Ostream_Manip(unsigned long long int) fmt @= { lsig, 0, 19, 'u', { .all : 0 } }; \
    558                 fmt.flags.nobsdp = true; \
    559                 /* printf( "fmt1 %c %lld %d\n", fmt.base, fmt.val, fmt.all ); */ \
     536static void base10_128( ostype & os, _Ostream_Manip(T) fmt ) { \
     537        if ( fmt.val > UINT64_MAX ) { \
     538                fmt.val /= P10_UINT64; \
     539                base10_128( os, fmt ); /* recursive */ \
     540                _Ostream_Manip(unsigned long long int) fmt2 @= { (uint64_t)(fmt.val % P10_UINT64), 0, 19, 'u', { .all : 0 } }; \
     541                fmt2.flags.nobsdp = true; \
     542                printf( "fmt2 %c %lld %d\n", fmt2.base, fmt2.val, fmt2.all );   \
    560543                sepOff( os ); \
    561                 (ostype &)(os | fmt); \
     544                (ostype &)(os | fmt2); \
    562545        } else { \
    563                 /* printf( "fmt2 %c %lld %d\n", f.base, (unsigned long long int)f.val, f.all ); */ \
    564                 _Ostream_Manip(SIGNED long long int) fmt @= { (SIGNED long long int)f.val, f.wd, f.pc, f.base, { .all : f.all } }; \
     546                printf( "fmt %c %lld %d\n", fmt.base, fmt.val, fmt.all ); \
    565547                (ostype &)(os | fmt); \
    566548        } /* if */ \
    567 } /* base10_128 */ \
     549} /* base10_128 */                                                \
    568550forall( dtype ostype | ostream( ostype ) ) { \
    569551        ostype & ?|?( ostype & os, _Ostream_Manip(T) f ) { \
    570552                if ( $sepPrt( os ) ) fmt( os, "%s", $sepGetCur( os ) ); \
    571553\
    572                 if ( f.base == 'b' | f.base == 'B' | f.base == 'o' | f.base == 'x' | f.base == 'X' ) { \
     554                if ( f.base == 'b' | f.base == 'o' | f.base == 'x' | f.base == 'X' ) { \
    573555                        unsigned long long int msig = (unsigned long long int)(f.val >> 64); \
    574556                        unsigned long long int lsig = (unsigned long long int)(f.val); \
     
    580562                        } else { \
    581563                                fmt2.flags.pad0 = fmt2.flags.nobsdp = true;     \
    582                                 if ( f.base == 'b' | f.base == 'B' ) { \
    583                                         if ( fmt.flags.pc && fmt.pc > 64 ) fmt.pc -= 64; else { fmt.flags.pc = false; fmt.pc = 0; } \
    584                                         if ( fmt.flags.left ) { \
    585                                                 fmt.flags.left = false; \
    586                                                 fmt.wd = 0; \
    587                                                 /* printf( "L %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
    588                                                 fmt2.flags.left = true; \
    589                                                 int msigd = high1( msig ); \
    590                                                 fmt2.wd = f.wd - (fmt.pc > msigd ? fmt.pc : msigd); \
    591                                                 if ( ! fmt.flags.nobsdp ) fmt2.wd -= 2; /* compensate for 0b base specifier */ \
    592                                                 if ( (int)fmt2.wd < 64 ) fmt2.wd = 64; /* cast deals with negative value */ \
    593                                                 fmt2.flags.pc = true; fmt2.pc = 64; \
    594                                         } else { \
    595                                                 if ( fmt.wd > 64 ) fmt.wd -= 64; \
    596                                                 else fmt.wd = 1; \
    597                                                 /* printf( "R %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
    598                                                 fmt2.wd = 64; \
    599                                         } /* if */ \
    600                                         /* printf( "C %llo %d %d '%c' %x\n", fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
     564                                if ( f.base == 'b' ) { \
     565                                        if ( f.wd > 64 ) fmt.wd = f.wd - 64; \
     566                                        fmt2.wd = 64; \
    601567                                        (ostype &)(os | fmt | "" | fmt2); \
    602568                                } else if ( f.base == 'o' ) { \
    603                                         if ( fmt.flags.pc && fmt.pc > 22 ) fmt.pc -= 22; else { fmt.flags.pc = false; fmt.pc = 0; } \
    604569                                        fmt.val = (unsigned long long int)fmt.val >> 2; \
    605                                         fmt2.val = ((msig & 0x3) << 1) + ((lsig & 0x8000000000000000U) != 0); \
    606                                         if ( fmt.flags.left ) { \
    607                                                 fmt.flags.left = false; \
    608                                                 fmt.wd = 0; \
    609                                                 /* printf( "L %llo %llo %llo %d %d '%c' %x %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all, fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
    610                                                 (ostype &)(os | fmt | "" | fmt2); \
    611                                                 sepOff( os ); \
    612                                                 fmt2.flags.left = true; \
    613                                                 int msigd = ceiling( high1( fmt.val ), 3 ); \
    614                                                 fmt2.wd = f.wd - (fmt.pc > msigd ? fmt.pc : msigd); \
    615                                                 if ( ! fmt.flags.nobsdp ) fmt2.wd -= 1; /* compensate for 0 base specifier */ \
    616                                                 if ( (int)fmt2.wd < 21 ) fmt2.wd = 21; /* cast deals with negative value */ \
    617                                                 fmt2.flags.pc = true; fmt2.pc = 21; \
     570                                        if ( f.wd > 21 ) fmt.wd = f.wd - 21; \
     571                                        fmt2.wd = 1; \
     572                                        fmt2.val = ((msig & 0x3) << 1) + 1; \
     573                                        (ostype &)(os | fmt | "" | fmt2); \
     574                                        sepOff( os ); \
     575                                        fmt2.wd = 21; \
     576                                        fmt2.val = lsig & 0x7fffffffffffffff; \
     577                                        (ostype &)(os | fmt2); \
     578                                } else { \
     579                                        if ( f.flags.left ) { \
     580                                                if ( f.wd > 16 ) fmt2.wd = f.wd - 16;   \
     581                                                fmt.wd = 16;                                                    \
    618582                                        } else { \
    619                                                 if ( fmt.wd > 22 ) fmt.wd -= 22; \
    620                                                 else fmt.wd = 1; \
    621                                                 /* printf( "R %llo %llo %llo %d %d '%c' %x %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all, fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
    622                                                 (ostype &)(os | fmt | "" | fmt2); \
    623                                                 sepOff( os ); \
    624                                                 fmt2.wd = 21; \
     583                                                if ( f.wd > 16 ) fmt.wd = f.wd - 16;    \
     584                                                fmt2.wd = 16;                                                   \
    625585                                        } /* if */ \
    626                                         fmt2.val = lsig & 0x7fffffffffffffffU; \
    627                                         /* printf( "\nC %llo %d %d '%c' %x\n", fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
    628                                         (ostype &)(os | fmt2); \
    629                                 } else { /* f.base == 'x'  | f.base == 'X' */ \
    630                                         if ( fmt.flags.pc && fmt.pc > 16 ) fmt.pc -= 16; else { fmt.flags.pc = false; fmt.pc = 0; } \
    631                                         if ( fmt.flags.left ) { \
    632                                                 fmt.flags.left = false; \
    633                                                 fmt.wd = 0; \
    634                                                 /* printf( "L %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
    635                                                 fmt2.flags.left = true; \
    636                                                 int msigd = high1( msig ); \
    637                                                 fmt2.wd = f.wd - (fmt.pc > msigd ? fmt.pc : msigd); \
    638                                                 if ( ! fmt.flags.nobsdp ) fmt2.wd -= 2; /* compensate for 0x base specifier */ \
    639                                                 if ( (int)fmt2.wd < 16 ) fmt2.wd = 16; /* cast deals with negative value */ \
    640                                                 fmt2.flags.pc = true; fmt2.pc = 16; \
    641                                         } else { \
    642                                                 if ( fmt.wd > 16 ) fmt.wd -= 16; \
    643                                                 else fmt.wd = 1; \
    644                                                 /* printf( "R %llo %llo %llo %d %d '%c' %x\n", msig, lsig, fmt.val, fmt.wd, fmt.pc, fmt.base, fmt.all ); */ \
    645                                                 fmt2.wd = 16; \
    646                                         } /* if */ \
    647                                         /* printf( "C %llo %d %d '%c' %x\n", fmt2.val, fmt2.wd, fmt2.pc, fmt2.base, fmt2.all ); */ \
    648586                                        (ostype &)(os | fmt | "" | fmt2); \
    649587                                } /* if */ \
    650588                        } /* if */ \
    651589                } else { \
    652                         if ( CODE == 'd' ) { \
    653                                 if ( f.val < 0 )  { fmt( os, "-" ); sepOff( os ); f.val = -f.val; f.flags.sign = false; } \
    654                         } /* if */ \
    655590                        base10_128( os, f ); \
    656591                } /* if */ \
  • libcfa/src/startup.cfa

    r6a490b2 rb7d6a36  
    1414//
    1515
    16 #include <time.h>                // tzset
    17 #include <locale.h>        // setlocale
     16#include <time.h>                                                                               // tzset
    1817#include "startup.hfa"
    1918
     
    2221    void __cfaabi_appready_startup( void ) {
    2322                tzset();                                                                                // initialize time global variables
    24                 setlocale(LC_NUMERIC, "");
    2523                #ifdef __CFA_DEBUG__
    2624                extern void heapAppStart();
     
    4341struct __spinlock_t;
    4442extern "C" {
    45         void __cfaabi_dbg_record_lock(struct __spinlock_t & this, const char prev_name[]) __attribute__(( weak )) {}
     43        void __cfaabi_dbg_record(struct __spinlock_t & this, const char prev_name[]) __attribute__(( weak )) {}
    4644}
    4745
  • libcfa/src/stdhdr/malloc.h

    r6a490b2 rb7d6a36  
    1010// Created On       : Thu Jul 20 15:58:16 2017
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Apr 16 22:44:06 2020
    13 // Update Count     : 13
     12// Last Modified On : Sat Aug 11 09:06:31 2018
     13// Update Count     : 10
    1414//
    1515
     
    3131
    3232extern "C" {
    33 void * aalloc( size_t noOfElems, size_t elemSize );
    34 void * amemalign( size_t alignment, size_t noOfElems, size_t elemSize );
    35 void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize );
    3633size_t malloc_alignment( void * );
    3734bool malloc_zero_fill( void * );
    38 size_t malloc_size( void * );
    3935int malloc_stats_fd( int fd );
     36void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize );
    4037} // extern "C"
    4138
  • libcfa/src/stdlib.cfa

    r6a490b2 rb7d6a36  
    1010// Created On       : Thu Jan 28 17:10:29 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Apr 16 22:43:33 2020
    13 // Update Count     : 498
     12// Last Modified On : Tue Feb  4 08:27:08 2020
     13// Update Count     : 486
    1414//
    1515
     
    2020#define _XOPEN_SOURCE 600                                                               // posix_memalign, *rand48
    2121#include <string.h>                                                                             // memcpy, memset
     22#include <malloc.h>                                                                             // malloc_usable_size
    2223//#include <math.h>                                                                             // fabsf, fabs, fabsl
    2324#include <complex.h>                                                                    // _Complex_I
     
    3738        } // alloc_set
    3839
    39         T * alloc_set( T ptr[], size_t dim, T fill ) {          // realloc array with fill
    40                 size_t olen = malloc_usable_size( ptr );                // current allocation
    41                 void * nptr = (void *)realloc( (void *)ptr, dim * sizeof(T) ); // C realloc
    42                 size_t nlen = malloc_usable_size( nptr );               // new allocation
    43                 if ( nlen > olen ) {                                                    // larger ?
    44                         for ( i; malloc_size( ptr ) / sizeof(T) ~ dim ) {
    45                                 memcpy( &ptr[i], &fill, sizeof(T) );    // initialize with fill value
    46                         } // for
    47                 } // if
    48                 return (T *)nptr;
    49         } // alloc_align_set
    50 
    5140        T * alloc_align_set( T ptr[], size_t align, char fill ) { // aligned realloc with fill
    5241                size_t olen = malloc_usable_size( ptr );                // current allocation
     
    5948                return (T *)nptr;
    6049        } // alloc_align_set
    61 
    62         T * alloc_align_set( T ptr[], size_t align, size_t dim, T fill ) { // aligned realloc with fill
    63                 size_t olen = malloc_usable_size( ptr );                // current allocation
    64                 void * nptr = (void *)realloc( (void *)ptr, align, sizeof(T) ); // CFA realloc
    65                 // char * nptr = alloc_align( ptr, align );
    66                 size_t nlen = malloc_usable_size( nptr );               // new allocation
    67                 if ( nlen > olen ) {                                                    // larger ?
    68                         for ( i; dim ) { memcpy( &ptr[i], &fill, sizeof(T) ); } // initialize with fill value
    69                 } // if
    70                 return (T *)nptr;
    71         } // alloc_align_set
    7250} // distribution
    7351
  • libcfa/src/stdlib.hfa

    r6a490b2 rb7d6a36  
    1010// Created On       : Thu Jan 28 17:12:35 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Thu Apr 16 22:44:05 2020
    13 // Update Count     : 432
     12// Last Modified On : Tue Feb  4 08:27:01 2020
     13// Update Count     : 401
    1414//
    1515
     
    2121#include <stdlib.h>                                                                             // *alloc, strto*, ato*
    2222
    23 // Reduce includes by explicitly defining these routines.
    2423extern "C" {
    2524        void * memalign( size_t align, size_t size );           // malloc.h
    26         size_t malloc_usable_size( void * ptr );                        // malloc.h
    27         size_t malloc_size( void * addr );                                      // CFA heap
    28         void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ); // CFA heap
    2925        void * memset( void * dest, int fill, size_t size ); // string.h
    3026        void * memcpy( void * dest, const void * src, size_t size ); // string.h
    31         void * resize( void * oaddr, size_t size );                     // CFA heap
     27    void * cmemalign( size_t alignment, size_t noOfElems, size_t elemSize ); // CFA heap
    3228} // extern "C"
    3329
    34 void * resize( void * oaddr, size_t nalign, size_t size ); // CFA heap
    3530void * realloc( void * oaddr, size_t nalign, size_t size ); // CFA heap
    3631
     
    4540
    4641static inline forall( dtype T | sized(T) ) {
    47         // Cforall safe equivalents, i.e., implicit size specification
     42        // C dynamic allocation
    4843
    4944        T * malloc( void ) {
     
    7671                return posix_memalign( (void **)ptr, align, sizeof(T) ); // C posix_memalign
    7772        } // posix_memalign
    78 } // distribution
    79 
    80 static inline forall( dtype T | sized(T) ) {
    81         // Cforall safe general allocation, fill, resize, array
     73
     74        // Cforall dynamic allocation
    8275
    8376        T * alloc( void ) {
     
    9083        } // alloc
    9184
    92         forall( dtype S | sized(S) )
    93         T * alloc( S ptr[], size_t dim = 1 ) {                          // singleton/array resize
    94                 size_t len = malloc_usable_size( ptr );                 // current bucket size
    95                 if ( sizeof(T) * dim > len ) {                                  // not enough space ?
    96                         T * temp = alloc( dim );                                        // new storage
    97                         free( ptr );                                                            // free old storage
    98                         return temp;
    99                 } else {
    100                         return (T *)ptr;
    101                 } // if
    102         } // alloc
    103 
    104         T * alloc( T ptr[], size_t dim, bool copy = true ) {
    105                 if ( copy ) {                                                                   // realloc
    106                         return (T *)(void *)realloc( (void *)ptr, dim * sizeof(T) ); // C realloc
    107                 } else {
    108                         struct __Unknown {};
    109                         return alloc( (__Unknown *)ptr, dim );          // reuse, cheat making T/S different types
    110                 } // if
     85        T * alloc( T ptr[], size_t dim ) {                                      // realloc
     86                return (T *)(void *)realloc( (void *)ptr, dim * sizeof(T) ); // C realloc
    11187        } // alloc
    11288
     
    136112forall( dtype T | sized(T) ) {
    137113        T * alloc_set( T ptr[], size_t dim, char fill );        // realloc array with fill
    138         T * alloc_set( T ptr[], size_t dim, T fill );           // realloc array with fill
    139114} // distribution
    140115
     
    150125        T * alloc_align( T ptr[], size_t align ) {                      // aligned realloc array
    151126                return (T *)(void *)realloc( (void *)ptr, align, sizeof(T) ); // CFA realloc
    152         } // alloc_align
    153 
    154         forall( dtype S | sized(S) )
    155         T * alloc_align( S ptr[], size_t align ) {                      // aligned reuse array
    156                 return (T *)(void *)resize( (void *)ptr, align, sizeof(T) ); // CFA realloc
    157127        } // alloc_align
    158128
     
    185155
    186156forall( dtype T | sized(T) ) {
    187         T * alloc_align_set( T ptr[], size_t align, char fill ); // aligned realloc with fill
    188         T * alloc_align_set( T ptr[], size_t align, T fill ); // aligned realloc with fill
    189157        T * alloc_align_set( T ptr[], size_t align, size_t dim, char fill ); // aligned realloc array with fill
    190         T * alloc_align_set( T ptr[], size_t align, size_t dim, T fill ); // aligned realloc array with fill
    191 } // distribution
    192 
    193 static inline forall( dtype T | sized(T) ) {
    194         // Cforall safe initialization/copy, i.e., implicit size specification, non-array types
     158} // distribution
     159
     160static inline forall( dtype T | sized(T) ) {
     161        // data, non-array types
    195162        T * memset( T * dest, char fill ) {
    196163                return (T *)memset( dest, fill, sizeof(T) );
     
    203170
    204171static inline forall( dtype T | sized(T) ) {
    205         // Cforall safe initialization/copy, i.e., implicit size specification, array types
     172        // data, array types
    206173        T * amemset( T dest[], char fill, size_t dim ) {
    207174                return (T *)(void *)memset( dest, fill, dim * sizeof(T) ); // C memset
     
    213180} // distribution
    214181
    215 // Cforall allocation/deallocation and constructor/destructor, non-array types
     182// allocation/deallocation and constructor/destructor, non-array types
    216183forall( dtype T | sized(T), ttype Params | { void ?{}( T &, Params ); } ) T * new( Params p );
    217184forall( dtype T | sized(T) | { void ^?{}( T & ); } ) void delete( T * ptr );
    218185forall( dtype T, ttype Params | sized(T) | { void ^?{}( T & ); void delete( Params ); } ) void delete( T * ptr, Params rest );
    219186
    220 // Cforall allocation/deallocation and constructor/destructor, array types
     187// allocation/deallocation and constructor/destructor, array types
    221188forall( dtype T | sized(T), ttype Params | { void ?{}( T &, Params ); } ) T * anew( size_t dim, Params p );
    222189forall( dtype T | sized(T) | { void ^?{}( T & ); } ) void adelete( size_t dim, T arr[] );
  • src/AST/Convert.cpp

    r6a490b2 rb7d6a36  
    493493        }
    494494
    495         const ast::Stmt * visit(const ast::SuspendStmt * node ) override final {
    496                 if ( inCache( node ) ) return nullptr;
    497                 auto stmt = new SuspendStmt();
    498                 stmt->then   = get<CompoundStmt>().accept1( node->then   );
    499                 switch(node->type) {
    500                         case ast::SuspendStmt::None     : stmt->type = SuspendStmt::None     ; break;
    501                         case ast::SuspendStmt::Coroutine: stmt->type = SuspendStmt::Coroutine; break;
    502                         case ast::SuspendStmt::Generator: stmt->type = SuspendStmt::Generator; break;
    503                 }
    504                 return stmtPostamble( stmt, node );
    505         }
    506 
    507495        const ast::Stmt * visit( const ast::WaitForStmt * node ) override final {
    508496                if ( inCache( node ) ) return nullptr;
     
    18711859        }
    18721860
    1873         virtual void visit( const SuspendStmt * old ) override final {
    1874                 if ( inCache( old ) ) return;
    1875                 ast::SuspendStmt::Type type;
    1876                 switch (old->type) {
    1877                         case SuspendStmt::Coroutine: type = ast::SuspendStmt::Coroutine; break;
    1878                         case SuspendStmt::Generator: type = ast::SuspendStmt::Generator; break;
    1879                         case SuspendStmt::None     : type = ast::SuspendStmt::None     ; break;
    1880                         default: abort();
    1881                 }
    1882                 this->node = new ast::SuspendStmt(
    1883                         old->location,
    1884                         GET_ACCEPT_1(then  , CompoundStmt),
    1885                         type,
    1886                         GET_LABELS_V(old->labels)
    1887                 );
    1888                 cache.emplace( old, this->node );
    1889         }
    1890 
    18911861        virtual void visit( const WaitForStmt * old ) override final {
    18921862                if ( inCache( old ) ) return;
  • src/AST/Decl.hpp

    r6a490b2 rb7d6a36  
    259259
    260260        bool is_coroutine() { return kind == Coroutine; }
    261         bool is_generator() { return kind == Generator; }
    262         bool is_monitor  () { return kind == Monitor  ; }
    263         bool is_thread   () { return kind == Thread   ; }
     261        bool is_monitor() { return kind == Monitor; }
     262        bool is_thread() { return kind == Thread; }
    264263
    265264        const Decl * accept( Visitor & v ) const override { return v.visit( this ); }
  • src/AST/Fwd.hpp

    r6a490b2 rb7d6a36  
    5353class CatchStmt;
    5454class FinallyStmt;
    55 class SuspendStmt;
    5655class WaitForStmt;
    5756class WithStmt;
  • src/AST/Pass.hpp

    r6a490b2 rb7d6a36  
    111111        const ast::Stmt *             visit( const ast::CatchStmt            * ) override final;
    112112        const ast::Stmt *             visit( const ast::FinallyStmt          * ) override final;
    113         const ast::Stmt *             visit( const ast::SuspendStmt          * ) override final;
    114113        const ast::Stmt *             visit( const ast::WaitForStmt          * ) override final;
    115114        const ast::Decl *             visit( const ast::WithStmt             * ) override final;
  • src/AST/Pass.impl.hpp

    r6a490b2 rb7d6a36  
    823823
    824824//--------------------------------------------------------------------------
    825 // FinallyStmt
    826 template< typename pass_t >
    827 const ast::Stmt * ast::Pass< pass_t >::visit( const ast::SuspendStmt * node ) {
    828         VISIT_START( node );
    829 
    830         VISIT(
    831                 maybe_accept( node, &SuspendStmt::then   );
    832         )
    833 
    834         VISIT_END( Stmt, node );
    835 }
    836 
    837 //--------------------------------------------------------------------------
    838825// WaitForStmt
    839826template< typename pass_t >
  • src/AST/Print.cpp

    r6a490b2 rb7d6a36  
    674674                safe_print( node->body );
    675675                --indent;
    676 
    677                 return node;
    678         }
    679 
    680         virtual const ast::Stmt * visit( const ast::SuspendStmt * node ) override final {
    681                 os << "Suspend Statement";
    682                 switch (node->type) {
    683                         case ast::SuspendStmt::None     : os << " with implicit target"; break;
    684                         case ast::SuspendStmt::Generator: os << " for generator"; break;
    685                         case ast::SuspendStmt::Coroutine: os << " for coroutine"; break;
    686                 }
    687                 os << endl;
    688 
    689                 ++indent;
    690                 if(node->then) {
    691                         os << indent << " with post statement :" << endl;
    692                         safe_print( node->then );
    693                 }
    694                 ++indent;
    695676
    696677                return node;
  • src/AST/Stmt.hpp

    r6a490b2 rb7d6a36  
    342342};
    343343
    344 /// Suspend statement
    345 class SuspendStmt final : public Stmt {
    346 public:
    347         ptr<CompoundStmt> then;
    348         enum Type { None, Coroutine, Generator } type = None;
    349 
    350         SuspendStmt( const CodeLocation & loc, const CompoundStmt * then, Type type, std::vector<Label> && labels = {} )
    351         : Stmt(loc, std::move(labels)), then(then), type(type) {}
    352 
    353         const Stmt * accept( Visitor & v ) const override { return v.visit( this ); }
    354 private:
    355         SuspendStmt * clone() const override { return new SuspendStmt{ *this }; }
    356         MUTATE_FRIEND
    357 };
    358 
    359344/// Wait for concurrency statement `when (...) waitfor (... , ...) ... timeout(...) ... else ...`
    360345class WaitForStmt final : public Stmt {
  • src/AST/Visitor.hpp

    r6a490b2 rb7d6a36  
    4747    virtual const ast::Stmt *             visit( const ast::CatchStmt            * ) = 0;
    4848    virtual const ast::Stmt *             visit( const ast::FinallyStmt          * ) = 0;
    49     virtual const ast::Stmt *             visit( const ast::SuspendStmt          * ) = 0;
    5049    virtual const ast::Stmt *             visit( const ast::WaitForStmt          * ) = 0;
    5150    virtual const ast::Decl *             visit( const ast::WithStmt             * ) = 0;
  • src/Common/PassVisitor.h

    r6a490b2 rb7d6a36  
    110110        virtual void visit( FinallyStmt * finallyStmt ) override final;
    111111        virtual void visit( const FinallyStmt * finallyStmt ) override final;
    112         virtual void visit( SuspendStmt * suspendStmt ) override final;
    113         virtual void visit( const SuspendStmt * suspendStmt ) override final;
    114112        virtual void visit( WaitForStmt * waitforStmt ) override final;
    115113        virtual void visit( const WaitForStmt * waitforStmt ) override final;
     
    278276        virtual Statement * mutate( CatchStmt * catchStmt ) override final;
    279277        virtual Statement * mutate( FinallyStmt * finallyStmt ) override final;
    280         virtual Statement * mutate( SuspendStmt * suspendStmt ) override final;
    281278        virtual Statement * mutate( WaitForStmt * waitforStmt ) override final;
    282279        virtual Declaration * mutate( WithStmt * withStmt ) override final;
  • src/Common/PassVisitor.impl.h

    r6a490b2 rb7d6a36  
    15221522
    15231523//--------------------------------------------------------------------------
    1524 // SuspendStmt
    1525 template< typename pass_type >
    1526 void PassVisitor< pass_type >::visit( SuspendStmt * node ) {
    1527         VISIT_START( node );
    1528 
    1529         maybeAccept_impl( node->then  , *this );
    1530 
    1531         VISIT_END( node );
    1532 }
    1533 
    1534 template< typename pass_type >
    1535 void PassVisitor< pass_type >::visit( const SuspendStmt * node ) {
    1536         VISIT_START( node );
    1537 
    1538         maybeAccept_impl( node->then  , *this );
    1539 
    1540         VISIT_END( node );
    1541 }
    1542 
    1543 template< typename pass_type >
    1544 Statement * PassVisitor< pass_type >::mutate( SuspendStmt * node ) {
    1545         MUTATE_START( node );
    1546 
    1547         maybeMutate_impl( node->then  , *this );
    1548 
    1549         MUTATE_END( Statement, node );
    1550 }
    1551 
    1552 //--------------------------------------------------------------------------
    15531524// WaitForStmt
    15541525template< typename pass_type >
  • src/CompilationState.cc

    r6a490b2 rb7d6a36  
    2727        nopreludep = false,
    2828        genproto = false,
    29         deterministic_output = false,
    3029        nomainp = false,
    3130        parsep = false,
  • src/CompilationState.h

    r6a490b2 rb7d6a36  
    2828        nopreludep,
    2929        genproto,
    30         deterministic_output,
    3130        nomainp,
    3231        parsep,
  • src/Concurrency/Keywords.cc

    r6a490b2 rb7d6a36  
    1616#include "Concurrency/Keywords.h"
    1717
    18 #include <cassert>                        // for assert
    19 #include <string>                         // for string, operator==
    20 
    21 #include "Common/PassVisitor.h"           // for PassVisitor
    22 #include "Common/SemanticError.h"         // for SemanticError
    23 #include "Common/utility.h"               // for deleteAll, map_range
    24 #include "CodeGen/OperatorTable.h"        // for isConstructor
    25 #include "ControlStruct/LabelGenerator.h" // for LebelGenerator
    26 #include "InitTweak/InitTweak.h"          // for getPointerBase
    27 #include "SynTree/LinkageSpec.h"          // for Cforall
    28 #include "SynTree/Constant.h"             // for Constant
    29 #include "SynTree/Declaration.h"          // for StructDecl, FunctionDecl, ObjectDecl
    30 #include "SynTree/Expression.h"           // for VariableExpr, ConstantExpr, Untype...
    31 #include "SynTree/Initializer.h"          // for SingleInit, ListInit, Initializer ...
    32 #include "SynTree/Label.h"                // for Label
    33 #include "SynTree/Statement.h"            // for CompoundStmt, DeclStmt, ExprStmt
    34 #include "SynTree/Type.h"                 // for StructInstType, Type, PointerType
    35 #include "SynTree/Visitor.h"              // for Visitor, acceptAll
     18#include <cassert>                 // for assert
     19#include <string>                  // for string, operator==
     20
     21#include "Common/PassVisitor.h"    // for PassVisitor
     22#include "Common/SemanticError.h"  // for SemanticError
     23#include "Common/utility.h"        // for deleteAll, map_range
     24#include "CodeGen/OperatorTable.h" // for isConstructor
     25#include "InitTweak/InitTweak.h"   // for getPointerBase
     26#include "SynTree/LinkageSpec.h"   // for Cforall
     27#include "SynTree/Constant.h"      // for Constant
     28#include "SynTree/Declaration.h"   // for StructDecl, FunctionDecl, ObjectDecl
     29#include "SynTree/Expression.h"    // for VariableExpr, ConstantExpr, Untype...
     30#include "SynTree/Initializer.h"   // for SingleInit, ListInit, Initializer ...
     31#include "SynTree/Label.h"         // for Label
     32#include "SynTree/Statement.h"     // for CompoundStmt, DeclStmt, ExprStmt
     33#include "SynTree/Type.h"          // for StructInstType, Type, PointerType
     34#include "SynTree/Visitor.h"       // for Visitor, acceptAll
    3635
    3736class Attribute;
     
    8988        //      int data;                                  int data;
    9089        //      a_struct_t more_data;                      a_struct_t more_data;
    91         //                                =>             $thread __thrd_d;
     90        //                                =>             thread_desc __thrd_d;
    9291        // };                                        };
    93         //                                           static inline $thread * get_thread( MyThread * this ) { return &this->__thrd_d; }
     92        //                                           static inline thread_desc * get_thread( MyThread * this ) { return &this->__thrd_d; }
    9493        //
    9594        class ThreadKeyword final : public ConcurrentSueKeyword {
     
    9796
    9897                ThreadKeyword() : ConcurrentSueKeyword(
    99                         "$thread",
     98                        "thread_desc",
    10099                        "__thrd",
    101100                        "get_thread",
     
    121120        //      int data;                                  int data;
    122121        //      a_struct_t more_data;                      a_struct_t more_data;
    123         //                                =>             $coroutine __cor_d;
     122        //                                =>             coroutine_desc __cor_d;
    124123        // };                                        };
    125         //                                           static inline $coroutine * get_coroutine( MyCoroutine * this ) { return &this->__cor_d; }
     124        //                                           static inline coroutine_desc * get_coroutine( MyCoroutine * this ) { return &this->__cor_d; }
    126125        //
    127126        class CoroutineKeyword final : public ConcurrentSueKeyword {
     
    129128
    130129                CoroutineKeyword() : ConcurrentSueKeyword(
    131                         "$coroutine",
     130                        "coroutine_desc",
    132131                        "__cor",
    133132                        "get_coroutine",
     
    148147        };
    149148
    150 
    151 
    152149        //-----------------------------------------------------------------------------
    153150        //Handles monitor type declarations :
     
    155152        //      int data;                                  int data;
    156153        //      a_struct_t more_data;                      a_struct_t more_data;
    157         //                                =>             $monitor __mon_d;
     154        //                                =>             monitor_desc __mon_d;
    158155        // };                                        };
    159         //                                           static inline $monitor * get_coroutine( MyMonitor * this ) { return &this->__cor_d; }
     156        //                                           static inline monitor_desc * get_coroutine( MyMonitor * this ) { return &this->__cor_d; }
    160157        //
    161158        class MonitorKeyword final : public ConcurrentSueKeyword {
     
    163160
    164161                MonitorKeyword() : ConcurrentSueKeyword(
    165                         "$monitor",
     162                        "monitor_desc",
    166163                        "__mon",
    167164                        "get_monitor",
     
    183180
    184181        //-----------------------------------------------------------------------------
    185         //Handles generator type declarations :
    186         // generator MyGenerator {                   struct MyGenerator {
    187         //      int data;                                  int data;
    188         //      a_struct_t more_data;                      a_struct_t more_data;
    189         //                                =>             int __gen_next;
    190         // };                                        };
    191         //
    192         class GeneratorKeyword final : public ConcurrentSueKeyword {
    193           public:
    194 
    195                 GeneratorKeyword() : ConcurrentSueKeyword(
    196                         "$generator",
    197                         "__generator_state",
    198                         "get_generator",
    199                         "Unable to find builtin type $generator\n",
    200                         true,
    201                         AggregateDecl::Generator
    202                 )
    203                 {}
    204 
    205                 virtual ~GeneratorKeyword() {}
    206 
    207                 virtual bool is_target( StructDecl * decl ) override final { return decl->is_generator(); }
    208 
    209                 static void implement( std::list< Declaration * > & translationUnit ) {
    210                         PassVisitor< GeneratorKeyword > impl;
    211                         mutateAll( translationUnit, impl );
    212                 }
    213         };
    214 
    215 
    216         //-----------------------------------------------------------------------------
    217         class SuspendKeyword final : public WithStmtsToAdd, public WithGuards {
    218         public:
    219                 SuspendKeyword() = default;
    220                 virtual ~SuspendKeyword() = default;
    221 
    222                 void  premutate( FunctionDecl * );
    223                 DeclarationWithType * postmutate( FunctionDecl * );
    224 
    225                 Statement * postmutate( SuspendStmt * );
    226 
    227                 static void implement( std::list< Declaration * > & translationUnit ) {
    228                         PassVisitor< SuspendKeyword > impl;
    229                         mutateAll( translationUnit, impl );
    230                 }
    231 
    232         private:
    233                 DeclarationWithType * is_main( FunctionDecl * );
    234                 bool is_real_suspend( FunctionDecl * );
    235 
    236                 Statement * make_generator_suspend( SuspendStmt * );
    237                 Statement * make_coroutine_suspend( SuspendStmt * );
    238 
    239                 struct LabelPair {
    240                         Label obj;
    241                         int   idx;
    242                 };
    243 
    244                 LabelPair make_label() {
    245                         labels.push_back( gen.newLabel("generator") );
    246                         return { labels.back(), int(labels.size()) };
    247                 }
    248 
    249                 DeclarationWithType * in_generator = nullptr;
    250                 FunctionDecl * decl_suspend = nullptr;
    251                 std::vector<Label> labels;
    252                 ControlStruct::LabelGenerator & gen = *ControlStruct::LabelGenerator::getGenerator();
    253         };
    254 
    255         //-----------------------------------------------------------------------------
    256182        //Handles mutex routines definitions :
    257183        // void foo( A * mutex a, B * mutex b,  int i ) {                  void foo( A * a, B * b,  int i ) {
    258         //                                                                       $monitor * __monitors[] = { get_monitor(a), get_monitor(b) };
     184        //                                                                       monitor_desc * __monitors[] = { get_monitor(a), get_monitor(b) };
    259185        //                                                                       monitor_guard_t __guard = { __monitors, 2 };
    260186        //    /*Some code*/                                       =>           /*Some code*/
     
    295221        //Handles mutex routines definitions :
    296222        // void foo( A * mutex a, B * mutex b,  int i ) {                  void foo( A * a, B * b,  int i ) {
    297         //                                                                       $monitor * __monitors[] = { get_monitor(a), get_monitor(b) };
     223        //                                                                       monitor_desc * __monitors[] = { get_monitor(a), get_monitor(b) };
    298224        //                                                                       monitor_guard_t __guard = { __monitors, 2 };
    299225        //    /*Some code*/                                       =>           /*Some code*/
     
    325251                CoroutineKeyword        ::implement( translationUnit );
    326252                MonitorKeyword  ::implement( translationUnit );
    327                 GeneratorKeyword  ::implement( translationUnit );
    328                 SuspendKeyword    ::implement( translationUnit );
    329253        }
    330254
     
    382306        Expression * ConcurrentSueKeyword::postmutate( KeywordCastExpr * cast ) {
    383307                if ( cast_target == cast->target ) {
    384                         // convert (thread &)t to ($thread &)*get_thread(t), etc.
     308                        // convert (thread &)t to (thread_desc &)*get_thread(t), etc.
    385309                        if( !type_decl ) SemanticError( cast, context_error );
    386310                        if( !dtor_decl ) SemanticError( cast, context_error );
     
    453377                        get_type,
    454378                        nullptr,
    455                         { new Attribute("const") },
     379                        noAttributes,
    456380                        Type::Inline
    457381                );
     
    522446
    523447                declsToAddAfter.push_back( get_decl );
    524         }
    525 
    526         //=============================================================================================
    527         // Suspend keyword implementation
    528         //=============================================================================================
    529         DeclarationWithType * SuspendKeyword::is_main( FunctionDecl * func) {
    530                 if(func->name != "main") return nullptr;
    531                 if(func->type->parameters.size() != 1) return nullptr;
    532 
    533                 auto param = func->type->parameters.front();
    534 
    535                 auto type  = dynamic_cast<ReferenceType * >(param->get_type());
    536                 if(!type) return nullptr;
    537 
    538                 auto obj   = dynamic_cast<StructInstType *>(type->base);
    539                 if(!obj) return nullptr;
    540 
    541                 if(!obj->baseStruct->is_generator()) return nullptr;
    542 
    543                 return param;
    544         }
    545 
    546         bool SuspendKeyword::is_real_suspend( FunctionDecl * func ) {
    547                 if(isMangled(func->linkage)) return false; // the real suspend isn't mangled
    548                 if(func->name != "__cfactx_suspend") return false; // the real suspend has a specific name
    549                 if(func->type->parameters.size() != 0) return false; // Too many parameters
    550                 if(func->type->returnVals.size() != 0) return false; // Too many return values
    551 
    552                 return true;
    553         }
    554 
    555         void SuspendKeyword::premutate( FunctionDecl * func ) {
    556                 GuardValue(in_generator);
    557                 in_generator = nullptr;
    558 
    559                 // Is this the real suspend?
    560                 if(is_real_suspend(func)) {
    561                         decl_suspend = decl_suspend ? decl_suspend : func;
    562                         return;
    563                 }
    564 
    565                 // Is this the main of a generator?
    566                 auto param = is_main( func );
    567                 if(!param) return;
    568 
    569                 if(func->type->returnVals.size() != 0) SemanticError(func->location, "Generator main must return void");
    570 
    571                 in_generator = param;
    572                 GuardValue(labels);
    573                 labels.clear();
    574         }
    575 
    576         DeclarationWithType * SuspendKeyword::postmutate( FunctionDecl * func ) {
    577                 if( !func->statements ) return func; // Not the actual definition, don't do anything
    578                 if( !in_generator     ) return func; // Not in a generator, don't do anything
    579                 if( labels.empty()    ) return func; // Generator has no states, nothing to do, could throw a warning
    580 
    581                 // This is a generator main, we need to add the following code to the top
    582                 // static void * __generator_labels[] = {&&s0, &&s1, ...};
    583                 // goto * __generator_labels[gen.__generator_state];
    584                 const auto & loc = func->location;
    585 
    586                 const auto first_label = gen.newLabel("generator");
    587 
    588                 // for each label add to declaration
    589                 std::list<Initializer*> inits = { new SingleInit( new LabelAddressExpr( first_label ) ) };
    590                 for(const auto & label : labels) {
    591                         inits.push_back(
    592                                 new SingleInit(
    593                                         new LabelAddressExpr( label )
    594                                 )
    595                         );
    596                 }
    597                 auto init = new ListInit(std::move(inits), noDesignators, true);
    598                 labels.clear();
    599 
    600                 // create decl
    601                 auto decl = new ObjectDecl(
    602                         "__generator_labels",
    603                         Type::StorageClasses( Type::Static ),
    604                         LinkageSpec::AutoGen,
    605                         nullptr,
    606                         new ArrayType(
    607                                 Type::Qualifiers(),
    608                                 new PointerType(
    609                                         Type::Qualifiers(),
    610                                         new VoidType( Type::Qualifiers() )
    611                                 ),
    612                                 nullptr,
    613                                 false, false
    614                         ),
    615                         init
    616                 );
    617 
    618                 // create the goto
    619                 assert(in_generator);
    620 
    621                 auto go_decl = new ObjectDecl(
    622                         "__generator_label",
    623                         noStorageClasses,
    624                         LinkageSpec::AutoGen,
    625                         nullptr,
    626                         new PointerType(
    627                                 Type::Qualifiers(),
    628                                 new VoidType( Type::Qualifiers() )
    629                         ),
    630                         new SingleInit(
    631                                 new UntypedExpr(
    632                                         new NameExpr("?[?]"),
    633                                         {
    634                                                 new NameExpr("__generator_labels"),
    635                                                 new UntypedMemberExpr(
    636                                                         new NameExpr("__generator_state"),
    637                                                         new VariableExpr( in_generator )
    638                                                 )
    639                                         }
    640                                 )
    641                         )
    642                 );
    643                 go_decl->location = loc;
    644 
    645                 auto go = new BranchStmt(
    646                         new VariableExpr( go_decl ),
    647                         BranchStmt::Goto
    648                 );
    649                 go->location = loc;
    650                 go->computedTarget->location = loc;
    651 
    652                 auto noop = new NullStmt({ first_label });
    653                 noop->location = loc;
    654 
    655                 // wrap everything in a nice compound
    656                 auto body = new CompoundStmt({
    657                         new DeclStmt( decl ),
    658                         new DeclStmt( go_decl ),
    659                         go,
    660                         noop,
    661                         func->statements
    662                 });
    663                 body->location   = loc;
    664                 func->statements = body;
    665 
    666                 return func;
    667         }
    668 
    669         Statement * SuspendKeyword::postmutate( SuspendStmt * stmt ) {
    670                 SuspendStmt::Type type = stmt->type;
    671                 if(type == SuspendStmt::None) {
    672                         // This suspend has a implicit target, find it
    673                         type = in_generator ? SuspendStmt::Generator : SuspendStmt::Coroutine;
    674                 }
    675 
    676                 // Check that the target makes sense
    677                 if(!in_generator && type == SuspendStmt::Generator) SemanticError( stmt->location, "'suspend generator' must be used inside main of generator type.");
    678 
    679                 // Act appropriately
    680                 switch(type) {
    681                         case SuspendStmt::Generator: return make_generator_suspend(stmt);
    682                         case SuspendStmt::Coroutine: return make_coroutine_suspend(stmt);
    683                         default: abort();
    684                 }
    685         }
    686 
    687         Statement * SuspendKeyword::make_generator_suspend( SuspendStmt * stmt ) {
    688                 assert(in_generator);
    689                 // Target code is :
    690                 //   gen.__generator_state = X;
    691                 //   { THEN }
    692                 //   return;
    693                 //   __gen_X:;
    694 
    695                 // Save the location and delete the old statement, we only need the location from this point on
    696                 auto loc = stmt->location;
    697 
    698                 // Build the label and get its index
    699                 auto label = make_label();
    700 
    701                 // Create the context saving statement
    702                 auto save = new ExprStmt( new UntypedExpr(
    703                         new NameExpr( "?=?" ),
    704                         {
    705                                 new UntypedMemberExpr(
    706                                         new NameExpr("__generator_state"),
    707                                         new VariableExpr( in_generator )
    708                                 ),
    709                                 new ConstantExpr(
    710                                         Constant::from_int( label.idx )
    711                                 )
    712                         }
    713                 ));
    714                 assert(save->expr);
    715                 save->location = loc;
    716                 stmtsToAddBefore.push_back( save );
    717 
    718                 // if we have a then add it here
    719                 auto then = stmt->then;
    720                 stmt->then = nullptr;
    721                 delete stmt;
    722                 if(then) stmtsToAddBefore.push_back( then );
    723 
    724                 // Create the return statement
    725                 auto ret = new ReturnStmt( nullptr );
    726                 ret->location = loc;
    727                 stmtsToAddBefore.push_back( ret );
    728 
    729                 // Create the null statement with the created label
    730                 auto noop = new NullStmt({ label.obj });
    731                 noop->location = loc;
    732 
    733                 // Return the null statement to take the place of the previous statement
    734                 return noop;
    735         }
    736 
    737         Statement * SuspendKeyword::make_coroutine_suspend( SuspendStmt * stmt ) {
    738                 if(stmt->then) SemanticError( stmt->location, "Compound statement following coroutines is not implemented.");
    739 
    740                 // Save the location and delete the old statement, we only need the location from this point on
    741                 auto loc = stmt->location;
    742                 delete stmt;
    743 
    744                 // Create the call expression
    745                 if(!decl_suspend) SemanticError( loc, "suspend keyword applied to coroutines requires coroutines to be in scope, add #include <coroutine.hfa>\n");
    746                 auto expr = new UntypedExpr( VariableExpr::functionPointer( decl_suspend ) );
    747                 expr->location = loc;
    748 
    749                 // Change this statement into a regular expr
    750                 assert(expr);
    751                 auto nstmt = new ExprStmt( expr );
    752                 nstmt->location = loc;
    753                 return nstmt;
    754         }
    755 
     448
     449                // get_decl->fixUniqueId();
     450        }
    756451
    757452        //=============================================================================================
     
    821516        void MutexKeyword::postvisit(StructDecl* decl) {
    822517
    823                 if( decl->name == "$monitor" && decl->body ) {
     518                if( decl->name == "monitor_desc" && decl->body ) {
    824519                        assert( !monitor_decl );
    825520                        monitor_decl = decl;
     
    917612                );
    918613
    919                 //$monitor * __monitors[] = { get_monitor(a), get_monitor(b) };
     614                //monitor_desc * __monitors[] = { get_monitor(a), get_monitor(b) };
    920615                body->push_front( new DeclStmt( monitors) );
    921616        }
     
    978673                );
    979674
    980                 //$monitor * __monitors[] = { get_monitor(a), get_monitor(b) };
     675                //monitor_desc * __monitors[] = { get_monitor(a), get_monitor(b) };
    981676                body->push_front( new DeclStmt( monitors) );
    982677        }
     
    986681        //=============================================================================================
    987682        void ThreadStarter::previsit( StructDecl * decl ) {
    988                 if( decl->name == "$thread" && decl->body ) {
     683                if( decl->name == "thread_desc" && decl->body ) {
    989684                        assert( !thread_decl );
    990685                        thread_decl = decl;
  • src/Concurrency/Waitfor.cc

    r6a490b2 rb7d6a36  
    244244                        decl_mask = decl;
    245245                }
    246                 else if( decl->name == "$monitor" ) {
     246                else if( decl->name == "monitor_desc" ) {
    247247                        assert( !decl_monitor );
    248248                        decl_monitor = decl;
  • src/ControlStruct/ExceptTranslate.cc

    r6a490b2 rb7d6a36  
    99// Author           : Andrew Beach
    1010// Created On       : Wed Jun 14 16:49:00 2017
    11 // Last Modified By : Andrew Beach
    12 // Last Modified On : Fri Mar 27 11:58:00 2020
    13 // Update Count     : 13
     11// Last Modified By : Peter A. Buhr
     12// Last Modified On : Fri Dec 13 23:40:15 2019
     13// Update Count     : 12
    1414//
    1515
     
    211211                        ThrowStmt *throwStmt ) {
    212212                // __throw_terminate( `throwStmt->get_name()` ); }
    213                 return create_given_throw( "__cfaehm_throw_terminate", throwStmt );
     213                return create_given_throw( "__cfaabi_ehm__throw_terminate", throwStmt );
    214214        }
    215215
     
    232232                        ) ) );
    233233                result->push_back( new ExprStmt(
    234                         new UntypedExpr( new NameExpr( "__cfaehm_rethrow_terminate" ) )
     234                        new UntypedExpr( new NameExpr( "__cfaabi_ehm__rethrow_terminate" ) )
    235235                        ) );
    236236                delete throwStmt;
     
    241241                        ThrowStmt *throwStmt ) {
    242242                // __throw_resume( `throwStmt->get_name` );
    243                 return create_given_throw( "__cfaehm_throw_resume", throwStmt );
     243                return create_given_throw( "__cfaabi_ehm__throw_resume", throwStmt );
    244244        }
    245245
     
    309309                        local_except->get_attributes().push_back( new Attribute(
    310310                                "cleanup",
    311                                 { new NameExpr( "__cfaehm_cleanup_terminate" ) }
     311                                { new NameExpr( "__cfaabi_ehm__cleanup_terminate" ) }
    312312                                ) );
    313313
     
    429429                        FunctionDecl * terminate_catch,
    430430                        FunctionDecl * terminate_match ) {
    431                 // { __cfaehm_try_terminate(`try`, `catch`, `match`); }
     431                // { __cfaabi_ehm__try_terminate(`try`, `catch`, `match`); }
    432432
    433433                UntypedExpr * caller = new UntypedExpr( new NameExpr(
    434                         "__cfaehm_try_terminate" ) );
     434                        "__cfaabi_ehm__try_terminate" ) );
    435435                std::list<Expression *>& args = caller->get_args();
    436436                args.push_back( nameOf( try_wrapper ) );
     
    486486
    487487                // struct __try_resume_node __resume_node
    488                 //      __attribute__((cleanup( __cfaehm_try_resume_cleanup )));
     488                //      __attribute__((cleanup( __cfaabi_ehm__try_resume_cleanup )));
    489489                // ** unwinding of the stack here could cause problems **
    490490                // ** however I don't think that can happen currently **
    491                 // __cfaehm_try_resume_setup( &__resume_node, resume_handler );
     491                // __cfaabi_ehm__try_resume_setup( &__resume_node, resume_handler );
    492492
    493493                std::list< Attribute * > attributes;
     
    495495                        std::list< Expression * > attr_params;
    496496                        attr_params.push_back( new NameExpr(
    497                                 "__cfaehm_try_resume_cleanup" ) );
     497                                "__cfaabi_ehm__try_resume_cleanup" ) );
    498498                        attributes.push_back( new Attribute( "cleanup", attr_params ) );
    499499                }
     
    514514
    515515                UntypedExpr *setup = new UntypedExpr( new NameExpr(
    516                         "__cfaehm_try_resume_setup" ) );
     516                        "__cfaabi_ehm__try_resume_setup" ) );
    517517                setup->get_args().push_back( new AddressExpr( nameOf( obj ) ) );
    518518                setup->get_args().push_back( nameOf( resume_handler ) );
     
    539539        ObjectDecl * ExceptionMutatorCore::create_finally_hook(
    540540                        FunctionDecl * finally_wrapper ) {
    541                 // struct __cfaehm_cleanup_hook __finally_hook
     541                // struct __cfaabi_ehm__cleanup_hook __finally_hook
    542542                //      __attribute__((cleanup( finally_wrapper )));
    543543
     
    593593                        // Skip children?
    594594                        return;
    595                 } else if ( structDecl->get_name() == "__cfaehm_base_exception_t" ) {
     595                } else if ( structDecl->get_name() == "__cfaabi_ehm__base_exception_t" ) {
    596596                        assert( nullptr == except_decl );
    597597                        except_decl = structDecl;
    598598                        init_func_types();
    599                 } else if ( structDecl->get_name() == "__cfaehm_try_resume_node" ) {
     599                } else if ( structDecl->get_name() == "__cfaabi_ehm__try_resume_node" ) {
    600600                        assert( nullptr == node_decl );
    601601                        node_decl = structDecl;
    602                 } else if ( structDecl->get_name() == "__cfaehm_cleanup_hook" ) {
     602                } else if ( structDecl->get_name() == "__cfaabi_ehm__cleanup_hook" ) {
    603603                        assert( nullptr == hook_decl );
    604604                        hook_decl = structDecl;
  • src/Parser/ParseNode.h

    r6a490b2 rb7d6a36  
    428428Statement * build_asm( bool voltile, Expression * instruction, ExpressionNode * output = nullptr, ExpressionNode * input = nullptr, ExpressionNode * clobber = nullptr, LabelNode * gotolabels = nullptr );
    429429Statement * build_directive( std::string * directive );
    430 SuspendStmt * build_suspend( StatementNode *, SuspendStmt::Type = SuspendStmt::None);
    431430WaitForStmt * build_waitfor( ExpressionNode * target, StatementNode * stmt, ExpressionNode * when );
    432431WaitForStmt * build_waitfor( ExpressionNode * target, StatementNode * stmt, ExpressionNode * when, WaitForStmt * existing );
  • src/Parser/StatementNode.cc

    r6a490b2 rb7d6a36  
    249249} // build_finally
    250250
    251 SuspendStmt * build_suspend( StatementNode * then, SuspendStmt::Type type ) {
    252         auto node = new SuspendStmt();
    253 
    254         node->type = type;
    255 
    256         std::list< Statement * > stmts;
    257         buildMoveList< Statement, StatementNode >( then, stmts );
    258         if(!stmts.empty()) {
    259                 assert( stmts.size() == 1 );
    260                 node->then = dynamic_cast< CompoundStmt * >( stmts.front() );
    261         }
    262 
    263         return node;
    264 }
    265 
    266251WaitForStmt * build_waitfor( ExpressionNode * targetExpr, StatementNode * stmt, ExpressionNode * when ) {
    267252        auto node = new WaitForStmt();
  • src/Parser/TypeData.cc

    r6a490b2 rb7d6a36  
    769769          case AggregateDecl::Struct:
    770770          case AggregateDecl::Coroutine:
    771           case AggregateDecl::Generator:
    772771          case AggregateDecl::Monitor:
    773772          case AggregateDecl::Thread:
  • src/Parser/lex.ll

    r6a490b2 rb7d6a36  
    6565#define FLOATXX(v) KEYWORD_RETURN(v);
    6666#else
    67 #define FLOATXX(v) IDENTIFIER_RETURN();
     67#define FLOATXX(v) IDENTIFIER_RETURN(); 
    6868#endif // HAVE_KEYWORDS_FLOATXX
    6969
     
    301301_Static_assert  { KEYWORD_RETURN(STATICASSERT); }               // C11
    302302struct                  { KEYWORD_RETURN(STRUCT); }
    303 suspend                 { KEYWORD_RETURN(SUSPEND); }                    // CFA
     303        /* suspend                      { KEYWORD_RETURN(SUSPEND); }                    // CFA */
    304304switch                  { KEYWORD_RETURN(SWITCH); }
    305305thread                  { KEYWORD_RETURN(THREAD); }                             // C11
  • src/Parser/parser.yy

    r6a490b2 rb7d6a36  
    1010// Created On       : Sat Sep  1 20:22:55 2001
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Mon Apr 27 12:25:42 2020
    13 // Update Count     : 4483
     12// Last Modified On : Sun Feb 16 08:22:14 2020
     13// Update Count     : 4461
    1414//
    1515
     
    278278%token OTYPE FTYPE DTYPE TTYPE TRAIT                                    // CFA
    279279%token SIZEOF OFFSETOF
    280 // %token RESUME                                                                        // CFA
    281 %token SUSPEND                                                                  // CFA
     280// %token SUSPEND RESUME                                                                        // CFA
    282281%token ATTRIBUTE EXTENSION                                                              // GCC
    283282%token IF ELSE SWITCH CASE DEFAULT DO WHILE FOR BREAK CONTINUE GOTO RETURN
     
    919918        conditional_expression
    920919        | unary_expression assignment_operator assignment_expression
    921                 {
    922                         if ( $2 == OperKinds::AtAssn ) {
    923                                 SemanticError( yylloc, "C @= assignment is currently unimplemented." ); $$ = nullptr;
    924                         } else {
    925                                 $$ = new ExpressionNode( build_binary_val( $2, $1, $3 ) );
    926                         } // if
    927                 }
     920                { $$ = new ExpressionNode( build_binary_val( $2, $1, $3 ) ); }
    928921        | unary_expression '=' '{' initializer_list_opt comma_opt '}'
    929922                { SemanticError( yylloc, "Initializer assignment is currently unimplemented." ); $$ = nullptr; }
     
    966959
    967960tuple_expression_list:
    968         assignment_expression
    969         | '@'                                                                                           // CFA
    970                 { SemanticError( yylloc, "Eliding tuple element with '@' is currently unimplemented." ); $$ = nullptr; }
    971         | tuple_expression_list ',' assignment_expression
     961        assignment_expression_opt
     962        | tuple_expression_list ',' assignment_expression_opt
    972963                { $$ = (ExpressionNode *)($1->set_last( $3 )); }
    973         | tuple_expression_list ',' '@'
    974                 { SemanticError( yylloc, "Eliding tuple element with '@' is currently unimplemented." ); $$ = nullptr; }
    975964        ;
    976965
     
    12701259        | RETURN '{' initializer_list_opt comma_opt '}' ';'
    12711260                { SemanticError( yylloc, "Initializer return is currently unimplemented." ); $$ = nullptr; }
    1272         | SUSPEND ';'
    1273                 { $$ = new StatementNode( build_suspend( nullptr ) ); }
    1274         | SUSPEND compound_statement
    1275                 { $$ = new StatementNode( build_suspend( $2 ) ); }
    1276         | SUSPEND COROUTINE ';'
    1277                 { $$ = new StatementNode( build_suspend( nullptr, SuspendStmt::Coroutine ) ); }
    1278         | SUSPEND COROUTINE compound_statement
    1279                 { $$ = new StatementNode( build_suspend( $3, SuspendStmt::Coroutine ) ); }
    1280         | SUSPEND GENERATOR ';'
    1281                 { $$ = new StatementNode( build_suspend( nullptr, SuspendStmt::Generator ) ); }
    1282         | SUSPEND GENERATOR compound_statement
    1283                 { $$ = new StatementNode( build_suspend( $3, SuspendStmt::Generator ) ); }
     1261        // | SUSPEND ';'
     1262        //      { SemanticError( yylloc, "Suspend expression is currently unimplemented." ); $$ = nullptr; }
     1263        // | SUSPEND compound_statement ';'
     1264        //      { SemanticError( yylloc, "Suspend expression is currently unimplemented." ); $$ = nullptr; }
    12841265        | THROW assignment_expression_opt ';'                           // handles rethrow
    12851266                { $$ = new StatementNode( build_throw( $2 ) ); }
     
    16081589                // type_specifier can resolve to just TYPEDEFname (e.g., typedef int T; int f( T );). Therefore this must be
    16091590                // flattened to allow lookahead to the '(' without having to reduce identifier_or_type_name.
    1610         cfa_abstract_tuple identifier_or_type_name '(' push cfa_parameter_ellipsis_list_opt pop ')' attribute_list_opt
     1591        cfa_abstract_tuple identifier_or_type_name '(' push cfa_parameter_ellipsis_list_opt pop ')'
    16111592                // To obtain LR(1 ), this rule must be factored out from function return type (see cfa_abstract_declarator).
    1612                 { $$ = DeclarationNode::newFunction( $2, $1, $5, 0 )->addQualifiers( $8 ); }
    1613         | cfa_function_return identifier_or_type_name '(' push cfa_parameter_ellipsis_list_opt pop ')' attribute_list_opt
    1614                 { $$ = DeclarationNode::newFunction( $2, $1, $5, 0 )->addQualifiers( $8 ); }
     1593                { $$ = DeclarationNode::newFunction( $2, $1, $5, 0 ); }
     1594        | cfa_function_return identifier_or_type_name '(' push cfa_parameter_ellipsis_list_opt pop ')'
     1595                { $$ = DeclarationNode::newFunction( $2, $1, $5, 0 ); }
    16151596        ;
    16161597
     
    20962077aggregate_control:                                                                              // CFA
    20972078        GENERATOR
    2098                 { yyy = true; $$ = AggregateDecl::Generator; }
    2099         | MONITOR GENERATOR
    2100                 { SemanticError( yylloc, "monitor generator is currently unimplemented." ); $$ = AggregateDecl::NoAggregate; }
     2079                { yyy = true; $$ = AggregateDecl::Coroutine; }
    21012080        | COROUTINE
    21022081                { yyy = true; $$ = AggregateDecl::Coroutine; }
    21032082        | MONITOR
    21042083                { yyy = true; $$ = AggregateDecl::Monitor; }
    2105         | MONITOR COROUTINE
    2106                 { SemanticError( yylloc, "monitor coroutine is currently unimplemented." ); $$ = AggregateDecl::NoAggregate; }
    21072084        | THREAD
    21082085                { yyy = true; $$ = AggregateDecl::Thread; }
    2109         | MONITOR THREAD
    2110                 { SemanticError( yylloc, "monitor thread is currently unimplemented." ); $$ = AggregateDecl::NoAggregate; }
    21112086        ;
    21122087
  • src/ResolvExpr/AlternativeFinder.cc

    r6a490b2 rb7d6a36  
    12921292
    12931293                try {
    1294                         // Attempt 1 : turn (thread&)X into ($thread&)X.__thrd
     1294                        // Attempt 1 : turn (thread&)X into (thread_desc&)X.__thrd
    12951295                        // Clone is purely for memory management
    12961296                        std::unique_ptr<Expression> tech1 { new UntypedMemberExpr(new NameExpr(castExpr->concrete_target.field), castExpr->arg->clone()) };
     
    13031303                } catch(SemanticErrorException & ) {}
    13041304
    1305                 // Fallback : turn (thread&)X into ($thread&)get_thread(X)
     1305                // Fallback : turn (thread&)X into (thread_desc&)get_thread(X)
    13061306                std::unique_ptr<Expression> fallback { UntypedExpr::createDeref( new UntypedExpr(new NameExpr(castExpr->concrete_target.getter), { castExpr->arg->clone() })) };
    13071307                // don't prune here, since it's guaranteed all alternatives will have the same type
  • src/ResolvExpr/Resolver.cc

    r6a490b2 rb7d6a36  
    99// Author           : Aaron B. Moss
    1010// Created On       : Sun May 17 12:17:01 2015
    11 // Last Modified By : Andrew Beach
    12 // Last Modified On : Fri Mar 27 11:58:00 2020
    13 // Update Count     : 242
     11// Last Modified By : Aaron B. Moss
     12// Last Modified On : Wed May 29 11:00:00 2019
     13// Update Count     : 241
    1414//
    1515
     
    560560                // TODO: Replace *exception type with &exception type.
    561561                if ( throwStmt->get_expr() ) {
    562                         const StructDecl * exception_decl = indexer.lookupStruct( "__cfaehm_base_exception_t" );
     562                        const StructDecl * exception_decl = indexer.lookupStruct( "__cfaabi_ehm__base_exception_t" );
    563563                        assert( exception_decl );
    564564                        Type * exceptType = new PointerType( noQualifiers, new StructInstType( noQualifiers, const_cast<StructDecl *>(exception_decl) ) );
     
    14771477                if ( throwStmt->expr ) {
    14781478                        const ast::StructDecl * exceptionDecl =
    1479                                 symtab.lookupStruct( "__cfaehm_base_exception_t" );
     1479                                symtab.lookupStruct( "__cfaabi_ehm__base_exception_t" );
    14801480                        assert( exceptionDecl );
    14811481                        ast::ptr< ast::Type > exceptType =
  • src/ResolvExpr/TypeEnvironment.cc

    r6a490b2 rb7d6a36  
    2020#include <utility>                     // for pair, move
    2121
    22 #include "CompilationState.h"          // for deterministic_output
    2322#include "Common/utility.h"            // for maybeClone
    2423#include "SynTree/Type.h"              // for Type, FunctionType, Type::Fora...
     
    107106
    108107        void EqvClass::print( std::ostream &os, Indenter indent ) const {
    109                 if( !deterministic_output ) {
    110                         os << "( ";
    111                         std::copy( vars.begin(), vars.end(), std::ostream_iterator< std::string >( os, " " ) );
    112                         os << ")";
    113                 }
     108                os << "( ";
     109                std::copy( vars.begin(), vars.end(), std::ostream_iterator< std::string >( os, " " ) );
     110                os << ")";
    114111                if ( type ) {
    115112                        os << " -> ";
     
    238235                // check safely bindable
    239236                if ( r.type && occursIn( r.type, s.vars.begin(), s.vars.end(), *this ) ) return false;
    240 
     237               
    241238                // merge classes in
    242239                r.vars.insert( s.vars.begin(), s.vars.end() );
  • src/SynTree/Declaration.h

    r6a490b2 rb7d6a36  
    302302
    303303        bool is_coroutine() { return kind == Coroutine; }
    304         bool is_generator() { return kind == Generator; }
    305         bool is_monitor  () { return kind == Monitor  ; }
    306         bool is_thread   () { return kind == Thread   ; }
     304        bool is_monitor() { return kind == Monitor; }
     305        bool is_thread() { return kind == Thread; }
    307306
    308307        virtual StructDecl * clone() const override { return new StructDecl( *this ); }
  • src/SynTree/LinkageSpec.cc

    r6a490b2 rb7d6a36  
    99// Author           : Rodolfo G. Esteves
    1010// Created On       : Sat May 16 13:22:09 2015
    11 // Last Modified By : Andrew Beach
    12 // Last Modified On : Mon Mar  2 16:13:00 2020
    13 // Update Count     : 29
     11// Last Modified By : Peter A. Buhr
     12// Last Modified On : Mon Dec 16 15:02:29 2019
     13// Update Count     : 28
    1414//
    1515
     
    2020
    2121#include "LinkageSpec.h"
    22 #include "Common/CodeLocation.h"
    2322#include "Common/SemanticError.h"
    2423
  • src/SynTree/LinkageSpec.h

    r6a490b2 rb7d6a36  
    99// Author           : Rodolfo G. Esteves
    1010// Created On       : Sat May 16 13:24:28 2015
    11 // Last Modified By : Andrew Beach
    12 // Last Modified On : Mon Mar  2 16:13:00 2020
    13 // Update Count     : 21
     11// Last Modified By : Peter A. Buhr
     12// Last Modified On : Mon Dec 16 15:03:43 2019
     13// Update Count     : 20
    1414//
    1515
     
    1818#include <string>
    1919
    20 struct CodeLocation;
     20#include "Common/CodeLocation.h"
    2121
    2222namespace LinkageSpec {
  • src/SynTree/Mutator.h

    r6a490b2 rb7d6a36  
    5151        virtual Statement * mutate( CatchStmt * catchStmt ) = 0;
    5252        virtual Statement * mutate( FinallyStmt * catchStmt ) = 0;
    53         virtual Statement * mutate( SuspendStmt * suspendStmt ) = 0;
    5453        virtual Statement * mutate( WaitForStmt * waitforStmt ) = 0;
    5554        virtual Declaration * mutate( WithStmt * withStmt ) = 0;
  • src/SynTree/Statement.cc

    r6a490b2 rb7d6a36  
    420420}
    421421
    422 SuspendStmt::SuspendStmt( const SuspendStmt & other )
    423         : Statement( other )
    424         , then( maybeClone(other.then) )
    425 {}
    426 
    427 SuspendStmt::~SuspendStmt() {
    428         delete then;
    429 }
    430 
    431 void SuspendStmt::print( std::ostream & os, Indenter indent ) const {
    432         os << "Suspend Statement";
    433         switch (type) {
    434                 case None     : os << " with implicit target"; break;
    435                 case Generator: os << " for generator"       ; break;
    436                 case Coroutine: os << " for coroutine"       ; break;
    437         }
    438         os << endl;
    439         indent += 1;
    440 
    441         if(then) {
    442                 os << indent << " with post statement :" << endl;
    443                 then->print( os, indent + 1);
    444         }
    445 }
    446 
    447422WaitForStmt::WaitForStmt() : Statement() {
    448423        timeout.time      = nullptr;
  • src/SynTree/Statement.h

    r6a490b2 rb7d6a36  
    422422};
    423423
    424 class SuspendStmt : public Statement {
    425   public:
    426         CompoundStmt * then = nullptr;
    427         enum Type { None, Coroutine, Generator } type = None;
    428 
    429         SuspendStmt() = default;
    430         SuspendStmt( const SuspendStmt & );
    431         virtual ~SuspendStmt();
    432 
    433         virtual SuspendStmt * clone() const override { return new SuspendStmt( *this ); }
    434         virtual void accept( Visitor & v ) override { v.visit( this ); }
    435         virtual void accept( Visitor & v ) const override { v.visit( this ); }
    436         virtual Statement * acceptMutator( Mutator & m )  override { return m.mutate( this ); }
    437         virtual void print( std::ostream & os, Indenter indent = {} ) const override;
    438 };
    439 
    440424class WaitForStmt : public Statement {
    441425  public:
  • src/SynTree/SynTree.h

    r6a490b2 rb7d6a36  
    5454class CatchStmt;
    5555class FinallyStmt;
    56 class SuspendStmt;
    5756class WaitForStmt;
    5857class WithStmt;
  • src/SynTree/Visitor.h

    r6a490b2 rb7d6a36  
    7878        virtual void visit( FinallyStmt * node ) { visit( const_cast<const FinallyStmt *>(node) ); }
    7979        virtual void visit( const FinallyStmt * finallyStmt ) = 0;
    80         virtual void visit( SuspendStmt * node ) { visit( const_cast<const SuspendStmt *>(node) ); }
    81         virtual void visit( const SuspendStmt * suspendStmt ) = 0;
    8280        virtual void visit( WaitForStmt * node ) { visit( const_cast<const WaitForStmt *>(node) ); }
    8381        virtual void visit( const WaitForStmt * waitforStmt ) = 0;
  • src/main.cc

    r6a490b2 rb7d6a36  
    443443
    444444
    445 static const char optstring[] = ":c:ghlLmNnpdP:S:twW:D:";
     445static const char optstring[] = ":c:ghlLmNnpP:S:twW:D:";
    446446
    447447enum { PreludeDir = 128 };
     
    456456        { "no-prelude", no_argument, nullptr, 'n' },
    457457        { "prototypes", no_argument, nullptr, 'p' },
    458         { "deterministic-out", no_argument, nullptr, 'd' },
    459458        { "print", required_argument, nullptr, 'P' },
    460459        { "prelude-dir", required_argument, nullptr, PreludeDir },
     
    477476        "do not read prelude",                                // -n
    478477        "generate prototypes for prelude functions",            // -p
    479         "don't print output that isn't deterministic",        // -d
    480478        "print",                                              // -P
    481479        "<directory> prelude directory for debug/nodebug",      // no flag
     
    582580                        genproto = true;
    583581                        break;
    584                   case 'd':                                     // don't print non-deterministic output
    585                     deterministic_output = true;
    586                         break;
    587582                  case 'P':                                                                             // print options
    588583                        for ( int i = 0;; i += 1 ) {
  • tests/.expect/alloc-ERROR.txt

    r6a490b2 rb7d6a36  
    1 alloc.cfa:362:1 error: No reasonable alternatives for expression Applying untyped:
     1alloc.cfa:310:1 error: No reasonable alternatives for expression Applying untyped:
    22  Name: ?=?
    33...to:
    4   Name: ip
     4  Name: p
    55  Applying untyped:
    66    Name: realloc
     
    1919
    2020
    21 alloc.cfa:363:1 error: No reasonable alternatives for expression Applying untyped:
     21alloc.cfa:311:1 error: No reasonable alternatives for expression Applying untyped:
    2222  Name: ?=?
    2323...to:
    24   Name: ip
     24  Name: p
     25  Applying untyped:
     26    Name: alloc
     27  ...to:
     28    Name: stp
     29    Applying untyped:
     30      Name: ?*?
     31    ...to:
     32      Name: dim
     33      Sizeof Expression on: Applying untyped:
     34          Name: *?
     35        ...to:
     36          Name: stp
     37
     38
     39
     40
     41alloc.cfa:312:1 error: No reasonable alternatives for expression Applying untyped:
     42  Name: ?=?
     43...to:
     44  Name: p
    2545  Applying untyped:
    2646    Name: memset
     
    3050
    3151
    32 alloc.cfa:364:1 error: No reasonable alternatives for expression Applying untyped:
     52alloc.cfa:313:1 error: No reasonable alternatives for expression Applying untyped:
    3353  Name: ?=?
    3454...to:
    35   Name: ip
     55  Name: p
    3656  Applying untyped:
    3757    Name: memcpy
  • tests/.expect/alloc.txt

    r6a490b2 rb7d6a36  
    23230xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0xefefefef 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101
    2424
    25 CFA realloc array alloc
     25CFA resize array alloc
    26260xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef
    27 CFA realloc array alloc
     27CFA resize array alloc
    28280xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101
    29 CFA realloc array alloc
     29CFA resize array alloc
    30300xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef
    31 CFA realloc array alloc, fill
     31CFA resize array alloc
    32320xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede
    33 CFA realloc array alloc, fill
     33CFA resize array alloc
    34340xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef
    35 CFA realloc array alloc, fill
     35CFA resize array alloc, fill
    36360xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0xdeadbeef 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0x1010101 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede 0xdededede
    3737
  • tests/Makefile.am

    r6a490b2 rb7d6a36  
    4141        -quiet @CFA_FLAGS@ \
    4242        -DIN_DIR="${abs_srcdir}/.in/"
    43 
    44 AM_CFAFLAGS = -XCFA --deterministic-out
    4543
    4644# get the desired cfa to test
  • tests/Makefile.in

    r6a490b2 rb7d6a36  
    408408        -DIN_DIR="${abs_srcdir}/.in/"
    409409
    410 AM_CFAFLAGS = -XCFA --deterministic-out
    411410
    412411# get the desired cfa to test
  • tests/alloc.cfa

    r6a490b2 rb7d6a36  
    1010// Created On       : Wed Feb  3 07:56:22 2016
    1111// Last Modified By : Peter A. Buhr
    12 // Last Modified On : Mon Apr  6 21:08:23 2020
    13 // Update Count     : 428
     12// Last Modified On : Sun Feb 16 09:21:13 2020
     13// Update Count     : 405
    1414//
    1515
     
    2828        size_t dim = 10;
    2929        char fill = '\xde';
    30         int * ip, * ip1;
     30        int * p, * p1;
    3131
    3232        // allocation, non-array types
    3333
    34         ip = (int *)malloc( sizeof(*ip) );                                      // C malloc, type unsafe
    35         *ip = 0xdeadbeef;
    36         printf( "C   malloc %#x\n", *ip );
    37         free( ip );
    38 
    39         ip = malloc();                                                                          // CFA malloc, type safe
    40         *ip = 0xdeadbeef;
    41         printf( "CFA malloc %#x\n", *ip );
    42         free( ip );
    43 
    44         ip = alloc();                                                                           // CFA alloc, type safe
    45         *ip = 0xdeadbeef;
    46         printf( "CFA alloc %#x\n", *ip );
    47         free( ip );
    48 
    49         ip = alloc_set( fill );                                                         // CFA alloc, fill
    50         printf( "CFA alloc, fill %08x\n", *ip );
    51         free( ip );
    52 
    53         ip = alloc_set( 3 );                                                            // CFA alloc, fill
    54         printf( "CFA alloc, fill %d\n", *ip );
    55         free( ip );
     34        p = (int *)malloc( sizeof(*p) );                                        // C malloc, type unsafe
     35        *p = 0xdeadbeef;
     36        printf( "C   malloc %#x\n", *p );
     37        free( p );
     38
     39        p = malloc();                                       // CFA malloc, type safe
     40        *p = 0xdeadbeef;
     41        printf( "CFA malloc %#x\n", *p );
     42        free( p );
     43
     44        p = alloc();                                        // CFA alloc, type safe
     45        *p = 0xdeadbeef;
     46        printf( "CFA alloc %#x\n", *p );
     47        free( p );
     48
     49        p = alloc_set( fill );                                                          // CFA alloc, fill
     50        printf( "CFA alloc, fill %08x\n", *p );
     51        free( p );
     52
     53        p = alloc_set( 3 );                                                                     // CFA alloc, fill
     54        printf( "CFA alloc, fill %d\n", *p );
     55        free( p );
    5656
    5757
     
    5959        printf( "\n" );
    6060
    61         ip = (int *)calloc( dim, sizeof( *ip ) );                       // C array calloc, type unsafe
     61        p = (int *)calloc( dim, sizeof( *p ) );                         // C array calloc, type unsafe
    6262        printf( "C   array calloc, fill 0\n" );
    63         for ( i; dim ) { printf( "%#x ", ip[i] ); }
    64         printf( "\n" );
    65         free( ip );
    66 
    67         ip = calloc( dim );                                                                     // CFA array calloc, type safe
     63        for ( i; dim ) { printf( "%#x ", p[i] ); }
     64        printf( "\n" );
     65        free( p );
     66
     67        p = calloc( dim );                                  // CFA array calloc, type safe
    6868        printf( "CFA array calloc, fill 0\n" );
    69         for ( i; dim ) { printf( "%#x ", ip[i] ); }
    70         printf( "\n" );
    71         free( ip );
    72 
    73         ip = alloc( dim );                                                                      // CFA array alloc, type safe
    74         for ( i; dim ) { ip[i] = 0xdeadbeef; }
     69        for ( i; dim ) { printf( "%#x ", p[i] ); }
     70        printf( "\n" );
     71        free( p );
     72
     73        p = alloc( dim );                                   // CFA array alloc, type safe
     74        for ( i; dim ) { p[i] = 0xdeadbeef; }
    7575        printf( "CFA array alloc, no fill\n" );
    76         for ( i; dim ) { printf( "%#x ", ip[i] ); }
    77         printf( "\n" );
    78         free( ip );
    79 
    80         ip = alloc_set( 2 * dim, fill );                                        // CFA array alloc, fill
     76        for ( i; dim ) { printf( "%#x ", p[i] ); }
     77        printf( "\n" );
     78        free( p );
     79
     80        p = alloc_set( 2 * dim, fill );                                         // CFA array alloc, fill
    8181        printf( "CFA array alloc, fill %#hhx\n", fill );
    82         for ( i; 2 * dim ) { printf( "%#x ", ip[i] ); }
    83         printf( "\n" );
    84         free( ip );
    85 
    86         ip = alloc_set( 2 * dim, 0xdeadbeef );                          // CFA array alloc, fill
     82        for ( i; 2 * dim ) { printf( "%#x ", p[i] ); }
     83        printf( "\n" );
     84        free( p );
     85
     86        p = alloc_set( 2 * dim, 0xdeadbeef );                           // CFA array alloc, fill
    8787        printf( "CFA array alloc, fill %#hhx\n", 0xdeadbeef );
    88         for ( i; 2 * dim ) { printf( "%#x ", ip[i] ); }
    89         printf( "\n" );
    90         // do not free
    91 
    92         ip1 = alloc_set( 2 * dim, ip );                                         // CFA array alloc, fill
     88        for ( i; 2 * dim ) { printf( "%#x ", p[i] ); }
     89        printf( "\n" );
     90        // do not free
     91
     92        p1 = alloc_set( 2 * dim, p );                                           // CFA array alloc, fill
    9393        printf( "CFA array alloc, fill from array\n" );
    94         for ( i; 2 * dim ) { printf( "%#x %#x, ", ip[i], ip1[i] ); }
    95         free( ip1 );
    96         printf( "\n" );
    97 
    98 
    99         // realloc, non-array types
    100         printf( "\n" );
    101 
    102         ip = (int *)realloc( ip, dim * sizeof(*ip) );           // C realloc
     94        for ( i; 2 * dim ) { printf( "%#x %#x, ", p[i], p1[i] ); }
     95        free( p1 );
     96        printf( "\n" );
     97
     98
     99        // resize, non-array types
     100        printf( "\n" );
     101
     102        p = (int *)realloc( p, dim * sizeof(*p) );                      // C realloc
    103103        printf( "C realloc\n" );
    104         for ( i; dim ) { printf( "%#x ", ip[i] ); }
    105         printf( "\n" );
    106         // do not free
    107 
    108         ip = realloc( ip, 2 * dim * sizeof(*ip) );                      // CFA realloc
    109         for ( i; dim ~ 2 * dim ) { ip[i] = 0x1010101; }
     104        for ( i; dim ) { printf( "%#x ", p[i] ); }
     105        printf( "\n" );
     106        // do not free
     107
     108        p = realloc( p, 2 * dim * sizeof(*p) );             // CFA realloc
     109        for ( i; dim ~ 2 * dim ) { p[i] = 0x1010101; }
    110110        printf( "CFA realloc\n" );
    111         for ( i; 2 * dim ) { printf( "%#x ", ip[i] ); }
    112         printf( "\n" );
    113         // do not free
    114 
    115 
    116         // realloc, array types
    117         printf( "\n" );
    118 
    119         ip = alloc( ip, dim );                                                          // CFA realloc array alloc
    120         for ( i; dim ) { ip[i] = 0xdeadbeef; }
    121         printf( "CFA realloc array alloc\n" );
    122         for ( i; dim ) { printf( "%#x ", ip[i] ); }
    123         printf( "\n" );
    124         // do not free
    125 
    126         ip = alloc( ip, 2 * dim );                                                      // CFA realloc array alloc
    127         for ( i; dim ~ 2 * dim ) { ip[i] = 0x1010101; }         // fill upper part
    128         printf( "CFA realloc array alloc\n" );
    129         for ( i; 2 * dim ) { printf( "%#x ", ip[i] ); }
    130         printf( "\n" );
    131         // do not free
    132 
    133         ip = alloc( ip, dim );                                                          // CFA realloc array alloc
    134         printf( "CFA realloc array alloc\n" );
    135         for ( i; dim ) { printf( "%#x ", ip[i] ); }
    136         printf( "\n" );
    137         // do not free
    138 
    139         ip = alloc_set( ip, 3 * dim, fill );                            // CFA realloc array alloc, fill
    140         printf( "CFA realloc array alloc, fill\n" );
    141         for ( i; 3 * dim ) { printf( "%#x ", ip[i] ); }
    142         printf( "\n" );
    143         // do not free
    144 
    145         ip = alloc_set( ip, dim, fill );                                        // CFA realloc array alloc, fill
    146         printf( "CFA realloc array alloc, fill\n" );
    147         for ( i; dim ) { printf( "%#x ", ip[i] ); }
    148         printf( "\n" );
    149         // do not free
    150 
    151         ip = alloc_set( ip, 3 * dim, fill );                            // CFA realloc array alloc, fill
    152         printf( "CFA realloc array alloc, fill\n" );
    153         for ( i; 3 * dim ) { printf( "%#x ", ip[i] ); }
    154         printf( "\n" );
    155         // do not free
    156 #if 0 // FIX ME
    157         ip = alloc_set( ip, 5 * dim, 5 );                                       // CFA realloc array alloc, 5
    158         printf( "CFA realloc array alloc, 5\n" );
    159         for ( i; 5 * dim ) { printf( "%#x ", ip[i] ); }
    160         printf( "\n" );
    161         // do not free
    162 
    163         ip = alloc_set( ip, dim, 5 );                                           // CFA realloc array alloc, 5
    164         printf( "CFA realloc array alloc, 5\n" );
    165         for ( i; dim ) { printf( "%#x ", ip[i] ); }
    166         printf( "\n" );
    167         // do not free
    168 
    169         ip = alloc_set( ip, 5 * dim, 5 );                                       // CFA realloc array alloc, 5
    170         printf( "CFA realloc array alloc, 5\n" );
    171         for ( i; 5 * dim ) { printf( "%#x ", ip[i] ); }
    172         printf( "\n" );
    173 #endif // 0
    174         free( ip );
    175 
    176         // resize, non-array types
    177 
    178         struct S {
    179                 int a[5];
    180         };
    181 
    182     ip = alloc();
    183         *ip = 5;
    184     double * dp = alloc( ip );
    185         *dp = 5.5;
    186     S * sp = alloc( dp );
    187         *sp = (S){ {0, 1, 2, 3, 4} };
    188     ip = alloc( sp );
    189         *ip = 3;
    190     free( ip );
     111        for ( i; 2 * dim ) { printf( "%#x ", p[i] ); }
     112        printf( "\n" );
     113        // do not free
    191114
    192115
    193116        // resize, array types
    194 
    195     ip = alloc( 5 );
    196         for ( i; 5 ) { ip[i] = 5; }
    197     dp = alloc( ip, 5 );
    198         for ( i; 5 ) { dp[i] = 5.5; }
    199     sp = alloc( dp, 5 );
    200         for ( i; 5 ) { sp[i] = (S){ {0, 1, 2, 3, 4} }; }
    201     ip = alloc( sp, 3 );
    202         for ( i; 3 ) { ip[i] = 3; }
    203     ip = alloc( ip, 7 );
    204         for ( i; 7 ) { ip[i] = 7; }
    205     ip = alloc( ip, 7, false );
    206         for ( i; 7 ) { ip[i] = 7; }
    207     free( ip );
     117        printf( "\n" );
     118
     119        p = alloc( p, dim );                                // CFA resize array alloc
     120        for ( i; dim ) { p[i] = 0xdeadbeef; }
     121        printf( "CFA resize array alloc\n" );
     122        for ( i; dim ) { printf( "%#x ", p[i] ); }
     123        printf( "\n" );
     124        // do not free
     125
     126        p = alloc( p, 2 * dim );                            // CFA resize array alloc
     127        for ( i; dim ~ 2 * dim ) { p[i] = 0x1010101; }          // fill upper part
     128        printf( "CFA resize array alloc\n" );
     129        for ( i; 2 * dim ) { printf( "%#x ", p[i] ); }
     130        printf( "\n" );
     131        // do not free
     132
     133        p = alloc( p, dim );                                // CFA resize array alloc
     134        printf( "CFA resize array alloc\n" );
     135        for ( i; dim ) { printf( "%#x ", p[i] ); }
     136        printf( "\n" );
     137        // do not free
     138
     139        p = alloc_set( p, 3 * dim, fill );                                      // CFA resize array alloc, fill
     140        printf( "CFA resize array alloc\n" );
     141        for ( i; 3 * dim ) { printf( "%#x ", p[i] ); }
     142        printf( "\n" );
     143        // do not free
     144
     145        p = alloc_set( p, dim, fill );                                          // CFA resize array alloc, fill
     146        printf( "CFA resize array alloc\n" );
     147        for ( i; dim ) { printf( "%#x ", p[i] ); }
     148        printf( "\n" );
     149        // do not free
     150
     151        p = alloc_set( p, 3 * dim, fill );                                      // CFA resize array alloc, fill
     152        printf( "CFA resize array alloc, fill\n" );
     153        for ( i; 3 * dim ) { printf( "%#x ", p[i] );; }
     154        printf( "\n" );
     155        free( p );
    208156
    209157
     
    220168        free( stp );
    221169
    222         stp = &(*memalign( Alignment )){ 42, 42.5 };            // CFA memalign
     170        stp = &(*memalign( Alignment )){ 42, 42.5 };          // CFA memalign
    223171        assert( (uintptr_t)stp % Alignment == 0 );
    224172        printf( "CFA memalign %d %g\n", stp->x, stp->y );
     
    352300        free( fp - 1 );
    353301
    354         ip = foo( bar( baz( malloc(), 0 ), 0 ), 0 );
    355         *ip = 0xdeadbeef;
    356         printf( "CFA deep malloc %#x\n", *ip );
    357         free( ip );
     302        p = foo( bar( baz( malloc(), 0 ), 0 ), 0 );
     303        *p = 0xdeadbeef;
     304        printf( "CFA deep malloc %#x\n", *p );
     305        free( p );
    358306
    359307#ifdef ERR1
    360308        stp = malloc();
    361309        printf( "\nSHOULD FAIL\n" );
    362         ip = realloc( stp, dim * sizeof( *stp ) );
    363         ip = memset( stp, 10 );
    364         ip = memcpy( &st1, &st );
     310        p = realloc( stp, dim * sizeof( *stp ) );
     311        p = alloc( stp, dim * sizeof( *stp ) );
     312        p = memset( stp, 10 );
     313        p = memcpy( &st1, &st );
    365314#endif
    366315} // main
  • tests/concurrent/.expect/monitor.txt

    r6a490b2 rb7d6a36  
    1 3000000
     14000000
  • tests/concurrent/coroutineYield.cfa

    r6a490b2 rb7d6a36  
    3333                        sout | "Coroutine 2";
    3434                #endif
    35                 suspend;
     35                suspend();
    3636        }
    3737}
  • tests/concurrent/monitor.cfa

    r6a490b2 rb7d6a36  
    2929
    3030void main( MyThread & this ) {
    31         for(int i = 0; i < 750_000; i++) {
     31        for(int i = 0; i < 1_000_000; i++) {
    3232                increment( global );
    3333        }
  • tests/concurrent/multi-monitor.cfa

    r6a490b2 rb7d6a36  
    1111
    1212void increment( monitor_t & mutex p1, monitor_t & mutex p2, int & value ) {
    13         assert(active_thread() == get_monitor(p1)->owner);
    14         assert(active_thread() == get_monitor(p2)->owner);
    1513        value += 1;
    16         assert(active_thread() == get_monitor(p1)->owner);
    17         assert(active_thread() == get_monitor(p2)->owner);
    1814}
    1915
  • tests/concurrent/signal/block.cfa

    r6a490b2 rb7d6a36  
    3333
    3434monitor global_data_t {
    35         $thread * last_thread;
    36         $thread * last_signaller;
     35        thread_desc * last_thread;
     36        thread_desc * last_signaller;
    3737};
    3838
     
    8282        if( !is_empty( cond ) ) {
    8383
    84                 $thread * next = front( cond );
     84                thread_desc * next = front( cond );
    8585
    8686                if( ! signal_block( cond ) ) {
  • tests/coroutine/.expect/fmtLines.txt

    r6a490b2 rb7d6a36  
    4848{                                                         // f  or n  ewli 
    4949ne c  hara  cter  s                                     su 
    50 spen  d;                                        i  f (   fmt. 
    51 ch !  = '\  n' )   bre  ak;      
    52         //   igno  re n  ewli  ne                
    53                 }   // f  or                            so  ut | 
    54  fmt  .ch;                                                      /  / pr 
    55 int   char  acte  r                       } // 
    56  for                    s  out   | "    ";       
    57                                                         /  / pr  int   bloc 
    58 k se  para  tor         } /  / fo 
    59 r               s  out   | nl  ;                                                         
    60                 //   pri  nt g  roup   sep 
    61 arat  or        }   //   for}   //  
    62 main  void   prt  ( Fo  rmat 
    63  & f  mt,   char   ch   ) {   
    64    f  mt.c  h =   ch;      r 
    65 esum  e( f  mt )  ;} /  / pr 
    66 tint   mai  n()   {     Fo  rmat 
    67  fmt  ; ch  ar c  h;    f  or ( 
    68  ;;   ) {               sin   | c  h;            
    69                                                                   // r  ead   one  
    70 char  acte  r       if (   eof 
    71 ( si  n )   ) br  eak;                                   
    72                         /  / eo  f ?            prt  ( fm 
    73 t, c  h );      } /  / fo  r} / 
    74 / ma  in//   Loc  al V  aria 
    75 bles  : //  // t  ab-w  idth 
    76 : 4   ////   com  pile  -com 
    77 mand  : "c  fa f  mtLi  nes. 
    78 cfa"   ///  / En  d: /  /
     50spen  d();                                      if   ( fm 
     51t.ch   !=   '\n'   ) b  reak 
     52;               /  / ig  nore   new  line 
     53                                  } //   for                              sout 
     54 | f  mt.c  h;                                                  //  
     55prin  t ch  arac  ter                   }  
     56// f  or                        sou  t |   "  " 
     57;                                                               //   prin  t bl 
     58ock   sepa  rato  r             }   //  
     59for             sou  t |   nl;                                   
     60                                  // p  rint   gro  up s 
     61epar  ator      } /  / fo  r} / 
     62/ ma  invo  id p  rt(   Form 
     63at &   fmt  , ch  ar c  h )   
     64{      fmt  .ch   = ch  ;    
     65 res  ume(   fmt   );}   //  
     66prti  nt m  ain(  ) {     Form 
     67at f  mt;         char   ch;    for 
     68 ( ;  ; )   {           s  in |   ch; 
     69                                                                                //   rea  d on 
     70e ch  arac  ter     if   ( e 
     71of(   sin   ) )   brea  k;               
     72                                        //   eof   ?            p  rt(  
     73fmt,   ch   );  }   //   for} 
     74 //   main  // L  ocal   Var 
     75iabl  es:   ////   tab  -wid 
     76th:   4 //  // c  ompi  le-c 
     77omma  nd:   "cfa   fmt  Line 
     78s.cf  a" /  ///   End:   //
  • tests/coroutine/.in/fmtLines.txt

    r6a490b2 rb7d6a36  
    3535                        for ( fmt.b = 0; fmt.b < 4; fmt.b += 1 ) {      // blocks of 4 characters
    3636                                for ( ;; ) {                                                    // for newline characters
    37                                         suspend;
     37                                        suspend();
    3838                                        if ( fmt.ch != '\n' ) break;            // ignore newline
    3939                                } // for
  • tests/coroutine/cntparens.cfa

    r6a490b2 rb7d6a36  
    1 //
     1// 
    22// Cforall Version 1.0.0 Copyright (C) 2017 University of Waterloo
    33//
    44// The contents of this file are covered under the licence agreement in the
    55// file "LICENCE" distributed with Cforall.
    6 //
     6// 
    77// cntparens.cfa -- match left/right parenthesis
    8 //
     8// 
    99// Author           : Peter A. Buhr
    1010// Created On       : Sat Apr 20 11:04:45 2019
     
    1212// Last Modified On : Sat Apr 20 11:06:21 2019
    1313// Update Count     : 1
    14 //
     14// 
    1515
    1616#include <fstream.hfa>
     
    2626void main( CntParens & cpns ) with( cpns ) {
    2727        for ( ; ch == '('; cnt += 1 ) {                                         // left parenthesis
    28                 suspend;
     28                suspend();
    2929        }
    3030        for ( ; ch == ')' && cnt > 1; cnt -= 1 ) {                      // right parenthesis
    31                 suspend;
     31                suspend();
    3232        }
    3333        status = ch == ')' ? Match : Error;
    3434} // main
    35 
     35       
    3636void ?{}( CntParens & cpns ) with( cpns ) { status = Cont; cnt = 0; }
    3737
  • tests/coroutine/devicedriver.cfa

    r6a490b2 rb7d6a36  
    1 //
     1// 
    22// Cforall Version 1.0.0 Copyright (C) 2017 University of Waterloo
    33//
    44// The contents of this file are covered under the licence agreement in the
    55// file "LICENCE" distributed with Cforall.
    6 //
    7 // devicedriver.cfa --
    8 //
     6// 
     7// devicedriver.cfa -- 
     8// 
    99// Author           : Peter A. Buhr
    1010// Created On       : Sat Mar 16 15:30:34 2019
     
    1212// Last Modified On : Sat Apr 20 09:07:19 2019
    1313// Update Count     : 90
    14 //
     14// 
    1515
    1616#include <fstream.hfa>
     
    2929
    3030void checkCRC( Driver & d, unsigned int sum ) with( d ) {
    31         suspend;
     31        suspend();
    3232        unsigned short int crc = byte << 8;                                     // sign extension over written
    33         suspend;
     33        suspend();
    3434        // prevent sign extension for signed char
    3535        status = (crc | (unsigned char)byte) == sum ? MSG : ECRC;
     
    4141                status = CONT;
    4242                unsigned int lnth = 0, sum = 0;
    43                 while ( byte != STX ) suspend;
     43                while ( byte != STX ) suspend();
    4444          emsg: for () {
    45                         suspend;
     45                        suspend();
    4646                        choose ( byte ) {                                                       // process byte
    4747                          case STX:
    48                                 status = ESTX; suspend; continue msg;
     48                                status = ESTX; suspend(); continue msg;
    4949                          case ETX:
    5050                                break emsg;
    5151                          case ESC:
    52                                 suspend;
     52                                suspend();
    5353                        } // choose
    5454                        if ( lnth >= MaxMsg ) {                                         // buffer full ?
    55                                 status = ELNTH; suspend; continue msg;
     55                                status = ELNTH; suspend(); continue msg;
    5656                        } // if
    5757                        msg[lnth++] = byte;
     
    6060                msg[lnth] = '\0';                                                               // terminate string
    6161                checkCRC( d, sum );                                                             // refactor CRC check
    62                 suspend;
     62                suspend();
    6363        } // for
    6464} // main
  • tests/coroutine/fibonacci.cfa

    r6a490b2 rb7d6a36  
    2222        int fn1, fn2;                                                                           // retained between resumes
    2323        fn = 0;  fn1 = fn;                                                                      // 1st case
    24         suspend;                                                                                        // restart last resume
     24        suspend();                                                                                      // restart last resume
    2525        fn = 1;  fn2 = fn1;  fn1 = fn;                                          // 2nd case
    26         suspend;                                                                                        // restart last resume
     26        suspend();                                                                                      // restart last resume
    2727        for () {
    2828                fn = fn1 + fn2;  fn2 = fn1;  fn1 = fn;                  // general case
    29                 suspend;                                                                                // restart last resume
     29                suspend();                                                                              // restart last resume
    3030        } // for
    3131}
  • tests/coroutine/fibonacci_1.cfa

    r6a490b2 rb7d6a36  
    1212// Last Modified On : Thu Mar 21 08:10:45 2019
    1313// Update Count     : 25
    14 //
     14// 
    1515
    1616#include <fstream.hfa>
     
    2323        [fn1, fn] = [0, 1];                                                                     // precompute first two states
    2424        for () {
    25                 suspend;                                                                                // restart last resume
     25                suspend();                                                                              // restart last resume
    2626                [fn1, fn] = [fn, fn1 + fn];                                             // general case
    2727        } // for
  • tests/coroutine/fmtLines.cfa

    r6a490b2 rb7d6a36  
    2727                        for ( b = 0; b < 4; b += 1 ) {                          // blocks of 4 characters
    2828                                for () {                                                                // for newline characters
    29                                         suspend;
     29                                        suspend();
    3030                                  if ( ch != '\n' ) break;                              // ignore newline
    3131                                } // for
  • tests/coroutine/raii.cfa

    r6a490b2 rb7d6a36  
    3939        Raii raii = { "Coroutine" };
    4040        sout | "Before Suspend";
    41         suspend;
     41        suspend();
    4242        sout | "After Suspend";
    4343}
  • tests/coroutine/runningTotal.cfa

    r6a490b2 rb7d6a36  
    2525void update( RunTotal & rntl, int input ) with( rntl ) { // helper
    2626        total += input;                                                                         // remember between activations
    27         suspend;                                                                                        // inactivate on stack
     27        suspend();                                                                                      // inactivate on stack
    2828}
    2929
  • tests/coroutine/suspend_then.cfa

    r6a490b2 rb7d6a36  
    1515
    1616#include <fstream.hfa>
     17#include <coroutine.hfa>
    1718
    18 generator Fibonacci {
    19         int fn;                                                                         // used for communication
    20         int fn1, fn2;                                                           // retained between resumes
    21 };
     19void then() {
     20        sout | "Then!";
     21}
     22
     23coroutine Fibonacci { int fn; };                                                // used for communication
    2224
    2325void main( Fibonacci & fib ) with( fib ) {                              // called on first resume
     26        int fn1, fn2;                                                           // retained between resumes
    2427        fn = 0;  fn1 = fn;                                                      // 1st case
    25         suspend { sout | "Then!"; }                                             // restart last resume
     28        suspend_then(then);                                                     // restart last resume
    2629        fn = 1;  fn2 = fn1;  fn1 = fn;                                  // 2nd case
    27         suspend { sout | "Then!"; }                                             // restart last resume
     30        suspend_then(then);                                                     // restart last resume
    2831        for () {
    2932                fn = fn1 + fn2;  fn2 = fn1;  fn1 = fn;                  // general case
    30                 suspend { sout | "Then!"; }                                     // restart last resume
     33                suspend_then(then);                                             // restart last resume
    3134        } // for
    3235}
  • tests/errors/.expect/completeType.txt

    r6a490b2 rb7d6a36  
    2727    void
    2828  )
    29   Environment: -> instance of struct A with body 0 (no widening)
     29  Environment:( _83_4_DT ) -> instance of struct A with body 0 (no widening)
    3030
    3131
     
    5050    void
    5151  )
    52   Environment: -> instance of struct B with body 1 (no widening)
     52  Environment:( _83_4_DT ) -> instance of struct B with body 1 (no widening)
    5353
    5454
     
    127127          void
    128128        )
    129         Environment: -> instance of type T (not function type) (no widening)
     129        Environment:( _102_0_T ) -> instance of type T (not function type) (no widening)
    130130
    131131      Could not satisfy assertion:
    132132?=?: pointer to function
    133133        ... with parameters
    134           reference to instance of type _104_0_T (not function type)
    135           instance of type _104_0_T (not function type)
     134          reference to instance of type _102_0_T (not function type)
     135          instance of type _102_0_T (not function type)
    136136        ... returning
    137           _retval__operator_assign: instance of type _104_0_T (not function type)
     137          _retval__operator_assign: instance of type _102_0_T (not function type)
    138138          ... with attributes:
    139139            Attribute with name: unused
  • tests/manipulatorsOutput1.cfa

    r6a490b2 rb7d6a36  
    77// Created On       : Sat Jun  8 18:04:11 2019
    88// Last Modified By : Peter A. Buhr
    9 // Last Modified On : Fri May  1 11:51:44 2020
    10 // Update Count     : 9
     9// Last Modified On : Mon Jun 10 12:37:28 2019
     10// Update Count     : 8
    1111//
    1212
     
    1717        signed char sc = -12;
    1818        printf( "%hhd %2hhd %5.2hhd %-5.2hhd %hho %#hho %hhx %#hhx %#8hhx %#8.10hhx %#8.3hhX %+-8.3hhd %08hhd\n", sc, sc, sc, sc, sc, sc, sc, sc, sc, sc, sc, sc, sc );
    19         sout | sc | wd(2,sc) | wd(5,2,sc) | left(wd(5,2,sc)) | nobase(oct(sc)) | oct(sc) | nonl;
    20         sout | nobase(hex(sc)) | hex(sc) | wd(8,hex(sc)) | wd(8,10,hex(sc)) | upcase(wd(8,3,hex(sc))) | nonl;
    21         sout | left(sign(upcase(wd(8,3,sc)))) | pad0(wd(8,sc));
     19        sout | sc | wd(2,sc) | wd(5,2,sc) | left(wd(5,2,sc)) | nobase(oct(sc)) | oct(sc) | nobase(hex(sc)) | hex(sc) | wd(8,hex(sc)) | wd(8,10,hex(sc)) | upcase(wd(8,3,hex(sc))) | left(sign(upcase(wd(8,3,sc)))) | pad0(wd(8,sc));
    2220
    2321        sout | "unsigned char";
    2422        unsigned char usc = 12;
    2523        printf( "%hhu %2hhu %5.2hhu %-5.2hhu %hho %#hho %hhx %#hhx %#8hhx %#8.10hhx %#8.3hhX %-8.3hhu %08hhu\n", usc, usc, usc, usc, usc, usc, usc, usc, usc, usc, usc, usc, usc );
    26         sout | usc | wd(2,usc) | wd(5,2,usc) | left(wd(5,2,usc)) | nobase(oct(usc)) | oct(usc) | nonl;
    27         sout | nobase(hex(usc)) | hex(usc) | wd(8,hex(usc)) | wd(8,10,hex(usc)) | upcase(wd(8,3,hex(usc))) | nonl;
    28         sout | left(upcase(wd(8,3,usc))) | pad0(wd(8,usc));
     24        sout | usc | wd(2,usc) | wd(5,2,usc) | left(wd(5,2,usc)) | nobase(oct(usc)) | oct(usc) | nobase(hex(usc)) | hex(usc) | wd(8,hex(usc)) | wd(8,10,hex(usc)) | upcase(wd(8,3,hex(usc))) | left(upcase(wd(8,3,usc))) | pad0(wd(8,usc));
    2925
    3026        sout | "signed short int";
    3127        signed short int si = -12;
    3228        printf( "%hd %2hd %5.2hd %-5.2hd %ho %#ho %hx %#hx %#8hx %#8.10hx %#8.3hX %+-8.3hd %08hd\n", si, si, si, si, si, si, si, si, si, si, si, si, si );
    33         sout | si | wd(2,si) | wd(5,2,si) | left(wd(5,2,si)) | nobase(oct(si)) | oct(si) | nonl;
    34         sout | nobase(hex(si)) | hex(si) | wd(8,hex(si)) | wd(8,10,hex(si)) | upcase(wd(8,3,hex(si))) | nonl;
    35         sout | left(sign(upcase(wd(8,3,si)))) | pad0(wd(8,si));
     29        sout | si | wd(2,si) | wd(5,2,si) | left(wd(5,2,si)) | nobase(oct(si)) | oct(si) | nobase(hex(si)) | hex(si) | wd(8,hex(si)) | wd(8,10,hex(si)) | upcase(wd(8,3,hex(si))) | left(sign(upcase(wd(8,3,si)))) | pad0(wd(8,si));
    3630
    3731        sout | "unsigned short int";
    3832        unsigned short int usi = 12;
    3933        printf( "%hu %2hu %5.2hu %-5.2hu %ho %#ho %hx %#hx %#8hx %#8.10hx %#8.3hX %-8.3hu %08hu\n", usi, usi, usi, usi, usi, usi, usi, usi, usi, usi, usi, usi, usi );
    40         sout | usi | wd(2,usi) | wd(5,2,usi) | left(wd(5,2,usi)) | nobase(oct(usi)) | oct(usi) | nonl;
    41         sout | nobase(hex(usi)) | hex(usi) | wd(8,hex(usi)) | wd(8,10,hex(usi)) | upcase(wd(8,3,hex(usi))) | nonl;
    42         sout | left(upcase(wd(8,3,usi))) | pad0(wd(8,usi));
     34        sout | usi | wd(2,usi) | wd(5,2,usi) | left(wd(5,2,usi)) | nobase(oct(usi)) | oct(usi) | nobase(hex(usi)) | hex(usi) | wd(8,hex(usi)) | wd(8,10,hex(usi)) | upcase(wd(8,3,hex(usi))) | left(upcase(wd(8,3,usi))) | pad0(wd(8,usi));
    4335
    4436        sout | "signed int";
    4537        signed int i = -12;
    4638        printf( "%d %2d %5.2d %-5.2d %o %#o %x %#x %#8x %#8.10x %#8.3X %+-8.3d %08d\n", i, i, i, i, i, i, i, i, i, i, i, i, i );
    47         sout | i | wd(2,i) | wd(5,2,i) | left(wd(5,2,i)) | nobase(oct(i)) | oct(i) | nonl;
    48         sout | nobase(hex(i)) | hex(i) | wd(8,hex(i)) | wd(8,10,hex(i)) | upcase(wd(8,3,hex(i))) | nonl;
    49         sout | left(sign(upcase(wd(8,3,i)))) | pad0(wd(8,i));
     39        sout | i | wd(2,i) | wd(5,2,i) | left(wd(5,2,i)) | nobase(oct(i)) | oct(i) | nobase(hex(i)) | hex(i) | wd(8,hex(i)) | wd(8,10,hex(i)) | upcase(wd(8,3,hex(i))) | left(sign(upcase(wd(8,3,i)))) | pad0(wd(8,i));
    5040
    5141        sout | "unsigned int";
    5242        unsigned int ui = 12;
    5343        printf( "%u %2u %5.2u %-5.2u %o %#o %x %#x %#8x %#8.10x %#8.3X %-8.3u %08u\n", ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui, ui );
    54         sout | ui | wd(2,ui) | wd(5,2,ui) | left(wd(5,2,ui)) | nobase(oct(ui)) | oct(ui) | nonl;
    55         sout | nobase(hex(ui)) | hex(ui) | wd(8,hex(ui)) | wd(8,10,hex(ui)) | upcase(wd(8,3,hex(ui))) | nonl;
    56         sout | left(upcase(wd(8,3,ui))) | pad0(wd(8,ui));
     44        sout | ui | wd(2,ui) | wd(5,2,ui) | left(wd(5,2,ui)) | nobase(oct(ui)) | oct(ui) | nobase(hex(ui)) | hex(ui) | wd(8,hex(ui)) | wd(8,10,hex(ui)) | upcase(wd(8,3,hex(ui))) | left(upcase(wd(8,3,ui))) | pad0(wd(8,ui));
    5745
    5846        sout | "signed long long int";
    5947        signed long long int lli = -12;
    6048        printf( "%lld %2lld %5.2lld %-5.2lld %llo %#llo %llx %#llx %#8llx %#8.10llx %#8.3llX %+-8.3lld %08lld\n", lli, lli, lli, lli, lli, lli, lli, lli, lli, lli, lli, lli, lli );
    61         sout | lli | wd(2,lli) | wd(5,2,lli) | left(wd(5,2,lli)) | nobase(oct(lli)) | oct(lli) | nonl;
    62         sout | nobase(hex(lli)) | hex(lli) | wd(8,hex(lli)) | wd(8,10,hex(lli)) | upcase(wd(8,3,hex(lli))) | nonl;
    63         sout | left(sign(upcase(wd(8,3,lli)))) | pad0(wd(8,lli));
     49        sout | lli | wd(2,lli) | wd(5,2,lli) | left(wd(5,2,lli)) | nobase(oct(lli)) | oct(lli) | nobase(hex(lli)) | hex(lli) | wd(8,hex(lli)) | wd(8,10,hex(lli)) | upcase(wd(8,3,hex(lli))) | left(sign(upcase(wd(8,3,lli)))) | pad0(wd(8,lli));
    6450
    6551        sout | "unsigned long long int";
    6652        unsigned long long int ulli = 12;
    6753        printf( "%llu %2llu %5.2llu %-5.2llu %llo %#llo %llx %#llx %#8llx %#8.10llx %#8.3llX %-8.3llu %08llu\n", ulli, ulli, ulli, ulli, ulli, ulli, ulli, ulli, ulli, ulli, ulli, ulli, ulli );
    68         sout | ulli | wd(2,ulli) | wd(5,2,ulli) | left(wd(5,2,ulli)) | nobase(oct(ulli)) | oct(ulli) | nonl;
    69         sout | nobase(hex(ulli)) | hex(ulli) | wd(8,hex(ulli)) | wd(8,10,hex(ulli)) | upcase(wd(8,3,hex(ulli))) | nonl;
    70         sout | left(upcase(wd(8,3,ulli))) | pad0(wd(8,ulli));
     54        sout | ulli | wd(2,ulli) | wd(5,2,ulli) | left(wd(5,2,ulli)) | nobase(oct(ulli)) | oct(ulli) | nobase(hex(ulli)) | hex(ulli) | wd(8,hex(ulli)) | wd(8,10,hex(ulli)) | upcase(wd(8,3,hex(ulli))) | left(upcase(wd(8,3,ulli))) | pad0(wd(8,ulli));
    7155
    7256        sout | nl | "binary integral";
    73         sout | bin(0) | bin(13) | upcase(bin(13)) | nobase(bin(13)) | left(wd(8,bin(13))) | wd(8,bin(13)) | nonl;
    74         sout | pad0(left(wd(8,bin(13)))) | pad0(wd(8,bin(13))) | pad0(wd(8,10,bin(13))) | pad0(wd(8,6,bin(13)));
     57        sout | bin(0) | bin(13) | upcase(bin(13)) | nobase(bin(13)) | left(wd(8,bin(13))) | wd(8,bin(13)) | pad0(left(wd(8,bin(13)))) | pad0(wd(8,bin(13))) | pad0(wd(8,10,bin(13))) | pad0(wd(8,6,bin(13)));
    7558
    7659
     
    7962        printf( "%g  %8g %#8g %g %8g %8.0g %#8.0g %8.2g %#8.2g %-8.2g %-8.2g %-#8.2g %-+8.2g %-+#8.2g %08.2g %8.2E %8.2a %#8.2A %#8.2e\n",
    8063                    0.0,3.0F,3.0F, f,  f,    f,     f,    f,     f,  3.0F,      f,      f,      f,       f,     f,    f,    f,     f,     f );
    81         sout | 0.0 | wd(8, 3.0F) | nodp(wd(8, 3.0F)) | f | wd(8, f) | ws(8,0, f) | nodp(ws(8,0, f)) | ws(8,2, f) | nodp(ws(8,2, f)) | nonl;
    82         sout | left(ws(8,2, 3.0F)) | left(ws(8,2, f)) | left(nodp(ws(8,2, f))) | left(sign(ws(8,2, f))) | left(sign(nodp(ws(8,2, f)))) | nonl;
    83         sout | pad0(ws(8,2, f)) | upcase(wd(8,2, sci(f))) | wd(8,2, hex(f)) | upcase(wd(8,2, hex(f))) | nodp(wd(8,2, sci(f)));
     64        sout | 0.0 | wd(8, 3.0F) | nodp(wd(8, 3.0F)) | f | wd(8, f) | ws(8,0, f) | nodp(ws(8,0, f)) | ws(8,2, f) | nodp(ws(8,2, f)) | left(ws(8,2, 3.0F)) | left(ws(8,2, f)) | left(nodp(ws(8,2, f))) | left(sign(ws(8,2, f))) | left(sign(nodp(ws(8,2, f)))) | pad0(ws(8,2, f)) | upcase(wd(8,2, sci(f))) | wd(8,2, hex(f)) | upcase(wd(8,2, hex(f))) | nodp(wd(8,2, sci(f)));
    8465
    8566        sout | "double";
     
    8768        printf( "%g  %#8f %g %8f %#8.0f %8.0f %8.2f %-8.2f %-+#8.2f %08.2F %8.2E %8.2a %8.2A %8.2e\n",
    8869                        0.0,  3.0, d,  d,     d,    d,    d,     d,       d,     d,    d,    d,    d,    d );
    89         sout | 0.0 | wd(8, 3.0) | d | wd(8, d) | nodp(wd(8,0, d)) | wd(8,0, d) | wd(8,2, d) | nonl;
    90         sout | left(wd(8,2, d)) | left(sign(wd(8,2, d))) | pad0(upcase(wd(8,2, d))) | upcase(wd(8,2, sci(d))) | wd(8,2, hex(d)) | upcase(wd(8,2, hex(d))) | wd(8,2, sci(d));
     70        sout | 0.0 | wd(8, 3.0) | d | wd(8, d) | nodp(wd(8,0, d)) | wd(8,0, d) | wd(8,2, d) | left(wd(8,2, d)) | left(sign(wd(8,2, d))) | pad0(upcase(wd(8,2, d))) | upcase(wd(8,2, sci(d))) | wd(8,2, hex(d)) | upcase(wd(8,2, hex(d))) | wd(8,2, sci(d));
    9171
    9272        sout | "long double";
     
    9474        printf( "%Lg  %#8Lf %Lg %8Lf %#8.0Lf %8.0Lf %8.2Lf %-8.2Lf %-+#8.2Lf %08.2LF %8.2LE %8.2La %8.2LA %8.2Le\n",
    9575                        0.0L,  3.0L, ld,  ld,     ld,    ld,    ld,     ld,       ld,     ld,    ld,    ld,    ld,    ld );
    96         sout | 0.0L | wd(8, 3.0L) | ld | wd(8, ld) | nodp(wd(8,0, ld)) | wd(8,0, ld) | wd(8,2, ld) | nonl;
    97         sout | left(wd(8,2, ld)) | left(sign(wd(8,2, ld))) | pad0(upcase(wd(8,2, ld))) | upcase(wd(8,2, sci(ld))) | wd(8,2, hex(ld)) | upcase(wd(8,2, hex(ld))) | wd(8,2, sci(ld));
     76        sout | 0.0L | wd(8, 3.0L) | ld | wd(8, ld) | nodp(wd(8,0, ld)) | wd(8,0, ld) | wd(8,2, ld) | left(wd(8,2, ld)) | left(sign(wd(8,2, ld))) | pad0(upcase(wd(8,2, ld))) | upcase(wd(8,2, sci(ld))) | wd(8,2, hex(ld)) | upcase(wd(8,2, hex(ld))) | wd(8,2, sci(ld));
    9877
    9978
     
    10180        char c = 'a';
    10281        printf( "%c %2c %5c %-5c %hho %#hho %hhx %#hhx %#8hhx %#8hhX %-8c %8c\n", c, c, c, c, c, c, c, c, c, c, c, c );
    103         sout | c | ' ' | wd(2,c) | wd(5,c) | left(wd(5,c)) | nobase(oct(c)) | oct(c) | nonl;
    104         sout | nobase(hex(c)) | hex(c) | wd(8,hex(c)) | upcase(wd(8,hex(c))) | left(wd(8,c)) | wd(8,c);
     82        sout | c | ' ' | wd(2,c) | wd(5,c) | left(wd(5,c)) | nobase(oct(c)) | oct(c) | nobase(hex(c)) | hex(c) | wd(8,hex(c)) | upcase(wd(8,hex(c))) | left(wd(8,c)) | wd(8,c);
    10583
    10684        sout | nl | "string";
  • tests/pybin/settings.py

    r6a490b2 rb7d6a36  
    2323class Architecture:
    2424        KnownArchitectures = {
    25                 'x64'         : 'x64',
    26                 'x86-64'      : 'x64',
    27                 'x86_64'      : 'x64',
    28                 'x86'         : 'x86',
    29                 'aarch64'     : 'arm',
    30                 'i386'        : 'x86',
    31                 'i486'        : 'x86',
    32                 'i686'        : 'x86',
    33                 'Intel 80386' : 'x86',
    34                 'arm'         : 'arm',
    35                 'ARM'         : 'arm',
     25                'x64'           : 'x64',
     26                'x86-64'        : 'x64',
     27                'x86_64'        : 'x64',
     28                'x86'           : 'x86',
     29                'aarch64'       : 'arm',
     30                'i386'          : 'x86',
     31                'i486'          : 'x86',
     32                'i686'          : 'x86',
     33                'Intel 80386'   : 'x86',
     34                'arm'           : 'arm',
     35                'ARM'           : 'arm',
    3636        }
    3737
     
    7777                        print("updated to %s" % self.target)
    7878
    79         def filter(self, tests):
    80                 return [test for test in tests if not test.arch or self.target == test.arch]
     79        def match(self, arch):
    8180                return True if not arch else self.target == arch
    8281
    83         @staticmethod
    84         def make_canonical(arch):
     82        @classmethod
     83        def make_canonical(_, arch):
    8584                return Architecture.KnownArchitectures[arch]
    8685
     
    105104                self.total  = Timeouts.check(tg)
    106105
    107         @staticmethod
    108         def check(value):
     106        @classmethod
     107        def check(_, value):
    109108                if value < 1:
    110109                        print("Timeouts must be at least 1 second", file=sys.stderr)
     
    114113
    115114def init( options ):
    116         global all_arch
    117         global all_debug
    118         global all_install
    119115        global arch
    120116        global archive
    121         global continue_
    122117        global debug
     118        global distcc
    123119        global dry_run
    124120        global generating
     
    127123        global output_width
    128124        global timeout
    129         global timeout2gdb
    130125
    131         all_arch     = [Architecture(o) for o in list(dict.fromkeys(options.arch   ))] if options.arch else [Architecture(None)]
    132         all_debug    = [Debug(o)        for o in list(dict.fromkeys(options.debug  ))]
    133         all_install  = [Install(o)      for o in list(dict.fromkeys(options.install))]
     126        arch         = Architecture(options.arch)
    134127        archive      = os.path.abspath(os.path.join(original_path, options.archive_errors)) if options.archive_errors else None
    135         continue_    = options.continue_
     128        debug        = Debug(options.debug)
    136129        dry_run      = options.dry_run # must be called before tools.config_hash()
     130        distcc       = "DISTCC_CFA_PATH=~/.cfadistcc/%s/cfa" % tools.config_hash()
    137131        generating   = options.regenerate_expected
     132        install      = Install(options.install)
    138133        make         = ['make']
    139134        output_width = 24
    140135        timeout      = Timeouts(options.timeout, options.global_timeout)
    141         timeout2gdb  = options.timeout_with_gdb
    142136
    143137        # if we distribute, distcc errors will fail tests, use log file for distcc
     
    152146
    153147def validate():
    154         """Validate the current configuration and update globals"""
    155 
    156         global distcc
    157         distcc       = "DISTCC_CFA_PATH=~/.cfadistcc/%s/cfa" % tools.config_hash()
    158148        errf = os.path.join(BUILDDIR, ".validate.err")
    159149        make_ret, out = tools.make( ".validate", error_file = errf, output_file=subprocess.DEVNULL, error=subprocess.DEVNULL )
  • tests/pybin/test_run.py

    r6a490b2 rb7d6a36  
    4040                return os.path.normpath( os.path.join(settings.BUILDDIR, self.path, self.name) )
    4141
    42         @staticmethod
    43         def valid_name(name):
     42        @classmethod
     43        def valid_name(_, name):
    4444                return not name.endswith( ('.c', '.cc', '.cpp', '.cfa') )
    4545
    46         @staticmethod
    47         def new_target(target, arch):
     46        @classmethod
     47        def from_target(_, target):
    4848                test = Test()
    4949                test.name = os.path.basename(target)
    5050                test.path = os.path.relpath (os.path.dirname(target), settings.SRCDIR)
    51                 test.arch = arch.target if arch else ''
     51                test.arch = settings.arch.target if settings.arch.cross_compile else ''
    5252                return test
    5353
     
    7272                return text
    7373
    74         @staticmethod
    75         def fmtDur( duration ):
     74        @classmethod
     75        def fmtDur( cls, duration ):
    7676                if duration :
    7777                        hours, rem = divmod(duration, 3600)
  • tests/pybin/tools.py

    r6a490b2 rb7d6a36  
    7575                                        return proc.returncode, out.decode("utf-8") if out else None
    7676                                except subprocess.TimeoutExpired:
    77                                         if settings.timeout2gdb:
    78                                                 print("Process {} timeout".format(proc.pid))
    79                                                 proc.communicate()
    80                                                 return 124, str(None)
    81                                         else:
    82                                                 proc.send_signal(signal.SIGABRT)
    83                                                 proc.communicate()
    84                                                 return 124, str(None)
     77                                        proc.send_signal(signal.SIGABRT)
     78                                        proc.communicate()
     79                                        return 124, str(None)
    8580
    8681        except Exception as ex:
     
    327322        raise argparse.ArgumentTypeError(msg)
    328323
    329 # Convert a function that converts a string to one that converts comma separated string.
    330 def comma_separated(elements):
    331     return lambda string: [elements(part) for part in string.split(',')]
    332 
    333324def fancy_print(text):
    334325        column = which('column')
  • tests/test.py

    r6a490b2 rb7d6a36  
    66
    77import argparse
    8 import itertools
    98import re
    109import sys
     
    3029                        test.path = match.group(1)
    3130                        test.arch = match.group(3)[1:] if match.group(3) else None
    32                         expected.append(test)
     31                        if settings.arch.match(test.arch):
     32                                expected.append(test)
    3333
    3434        path_walk( match_test )
     
    5353                ]
    5454
    55         # sort the test alphabetically for convenience
    56         test_list.sort(key=lambda t: ('~' if t.arch else '') + t.target() + (t.arch if t.arch else ''))
    57 
    5855        return test_list
    5956
     
    6764                for testname in options.tests :
    6865                        testname = canonical_path( testname )
    69                         # first check if this is a valid name to regenerate
    7066                        if Test.valid_name(testname):
    71                                 # this is a valid name, let's check if it already exists
    7267                                found = [test for test in all_tests if canonical_path( test.target() ) == testname]
    73                                 if not found:
    74                                         # it's a new name, create it according to the name and specified architecture
    75                                         if options.arch:
    76                                                 # user specified one or multiple architectures, assume the tests will have architecture specific results
    77                                                 tests.extend( [Test.new_target(testname, arch) for arch in settings.all_arch] )
    78                                         else:
    79                                                 # user didn't specify an architecture, just create a cross platform test
    80                                                 tests.append( Test.new_target( testname, None ) )
    81                                 elif len(found) == 1 and not found[0].arch:
    82                                         # we found a single test, the user better be wanting to create a cross platform test
    83                                         if options.arch:
    84                                                 print('ERROR: "%s", test has no specified architecture but --arch was specified, ignoring it' % testname, file=sys.stderr)
    85                                         else:
    86                                                 tests.append( found[0] )
    87                                 else:
    88                                         # this test is already cross platform, just add a test for each platform the user asked
    89                                         tests.extend( [Test.new_target(testname, arch) for arch in settings.all_arch] )
    90 
    91                                         # print a warning if it users didn't ask for a specific architecture
    92                                         if not options.arch:
    93                                                 print('WARNING: "%s", test has architecture specific expected files but --arch was not specified, regenerating only for current host' % testname, file=sys.stderr)
    94 
     68                                tests.append( found[0] if len(found) == 1 else Test.from_target(testname) )
    9569                        else :
    9670                                print('ERROR: "%s", tests are not allowed to end with a C/C++/CFA extension, ignoring it' % testname, file=sys.stderr)
     
    10276
    10377                        if test :
    104                                 tests.extend( test )
     78                                tests.append( test[0] )
    10579                        else :
    10680                                print('ERROR: No expected file for test %s, ignoring it' % testname, file=sys.stderr)
     
    11286        # create a parser with the arguments for the tests script
    11387        parser = argparse.ArgumentParser(description='Script which runs cforall tests')
    114         parser.add_argument('--debug', help='Run all tests in debug or release', type=comma_separated(yes_no), default='yes')
    115         parser.add_argument('--install', help='Run all tests based on installed binaries or tree binaries', type=comma_separated(yes_no), default='no')
    116         parser.add_argument('--arch', help='Test for specific architecture', type=comma_separated(str), default=None)
    117         parser.add_argument('--continue', help='When multiple specifications are passed (debug/install/arch), sets whether or not to continue if the last specification failed', type=yes_no, default='yes', dest='continue_')
     88        parser.add_argument('--debug', help='Run all tests in debug or release', type=yes_no, default='yes')
     89        parser.add_argument('--install', help='Run all tests based on installed binaries or tree binaries', type=yes_no, default='no')
     90        parser.add_argument('--arch', help='Test for specific architecture', type=str, default='')
    11891        parser.add_argument('--timeout', help='Maximum duration in seconds after a single test is considered to have timed out', type=int, default=60)
    11992        parser.add_argument('--global-timeout', help='Maximum cumulative duration in seconds after the ALL tests are considered to have timed out', type=int, default=7200)
    120         parser.add_argument('--timeout-with-gdb', help='Instead of killing the command when it times out, orphan it and print process id to allow gdb to attach', type=yes_no, default="no")
    12193        parser.add_argument('--dry-run', help='Don\'t run the tests, only output the commands', action='store_true')
    12294        parser.add_argument('--list', help='List all test available', action='store_true')
     
    206178
    207179                else:
    208                         if os.stat(out_file).st_size < 1048576:
    209                                 with open (out_file, "r") as myfile:
    210                                         error = myfile.read()
    211                         else:
    212                                 error = "Output log can't be read, file is bigger than 1MB, see {} for actual error\n".format(out_file)
     180                        with open (out_file, "r") as myfile:
     181                                error = myfile.read()
    213182
    214183                        ret, info = core_info(exe_file)
     
    246215                return False, ""
    247216        except Exception as ex:
    248                 print("Unexpected error in worker thread running {}: {}".format(t.target(), ex), file=sys.stderr)
     217                print("Unexpected error in worker thread: %s" % ex, file=sys.stderr)
    249218                sys.stderr.flush()
    250219                return False, ""
     
    309278        make('clean', output_file=subprocess.DEVNULL, error=subprocess.DEVNULL)
    310279
    311         return failed
     280        return 1 if failed else 0
    312281
    313282
     
    323292        settings.init( options )
    324293
     294        # fetch the liest of all valid tests
     295        all_tests = list_tests( options.include, options.exclude )
     296
     297
     298        # if user wants all tests than no other treatement of the test list is required
     299        if options.all or options.list or options.list_comp or options.include :
     300                tests = all_tests
     301
     302        #otherwise we need to validate that the test list that was entered is valid
     303        else :
     304                tests = valid_tests( options )
     305
     306        # make sure we have at least some test to run
     307        if not tests :
     308                print('ERROR: No valid test to run', file=sys.stderr)
     309                sys.exit(1)
     310
     311
     312        # sort the test alphabetically for convenience
     313        tests.sort(key=lambda t: (t.arch if t.arch else '') + t.target())
     314
    325315        # users may want to simply list the tests
    326316        if options.list_comp :
    327                 # fetch the liest of all valid tests
    328                 tests = list_tests( None, None )
    329 
    330                 # print the possible options
    331                 print("-h --help --debug --dry-run --list --arch --all --regenerate-expected --archive-errors --install --timeout --global-timeout --timeout-with-gdb -j --jobs -I --include -E --exclude --continue ", end='')
     317                print("-h --help --debug --dry-run --list --arch --all --regenerate-expected --archive-errors --install --timeout --global-timeout -j --jobs ", end='')
    332318                print(" ".join(map(lambda t: "%s" % (t.target()), tests)))
    333319
    334320        elif options.list :
    335                 # fetch the liest of all valid tests
    336                 tests = list_tests( options.include, options.exclude )
    337 
    338                 # print the available tests
     321                print("Listing for %s:%s"% (settings.arch.string, settings.debug.string))
    339322                fancy_print("\n".join(map(lambda t: t.toString(), tests)))
    340323
    341324        else :
    342                 # fetch the liest of all valid tests
    343                 all_tests = list_tests( options.include, options.exclude )
    344 
    345                 # if user wants all tests than no other treatement of the test list is required
    346                 if options.all or options.include :
    347                         tests = all_tests
    348 
    349                 #otherwise we need to validate that the test list that was entered is valid
    350                 else :
    351                         tests = valid_tests( options )
    352 
    353                 # make sure we have at least some test to run
    354                 if not tests :
    355                         print('ERROR: No valid test to run', file=sys.stderr)
    356                         sys.exit(1)
    357 
    358                 # prep invariants
     325                # check the build configuration works
    359326                settings.prep_output(tests)
    360                 failed = 0
    361 
    362                 # for each build configurations, run the test
    363                 for arch, debug, install in itertools.product(settings.all_arch, settings.all_debug, settings.all_install):
    364                         settings.arch    = arch
    365                         settings.debug   = debug
    366                         settings.install = install
    367 
    368                         # filter out the tests for a different architecture
    369                         # tests are the same across debug/install
    370                         local_tests = settings.arch.filter( tests )
    371                         options.jobs, forceJobs = job_count( options, local_tests )
    372                         settings.update_make_cmd(forceJobs, options.jobs)
    373 
    374                         # check the build configuration works
    375                         settings.validate()
    376 
    377                         # print configuration
    378                         print('%s %i tests on %i cores (%s:%s)' % (
    379                                 'Regenerating' if settings.generating else 'Running',
    380                                 len(local_tests),
    381                                 options.jobs,
    382                                 settings.arch.string,
    383                                 settings.debug.string
    384                         ))
    385 
    386                         # otherwise run all tests and make sure to return the correct error code
    387                         failed = run_tests(local_tests, options.jobs)
    388                         if failed:
    389                                 result = 1
    390                                 if not settings.continue_:
    391                                         break
    392 
    393 
    394                 sys.exit( failed )
     327                settings.validate()
     328
     329                options.jobs, forceJobs = job_count( options, tests )
     330                settings.update_make_cmd(forceJobs, options.jobs)
     331
     332                print('%s %i tests on %i cores (%s:%s)' % (
     333                        'Regenerating' if settings.generating else 'Running',
     334                        len(tests),
     335                        options.jobs,
     336                        settings.arch.string,
     337                        settings.debug.string
     338                ))
     339
     340                # otherwise run all tests and make sure to return the correct error code
     341                sys.exit( run_tests(tests, options.jobs) )
  • tests/vector.cfa

    r6a490b2 rb7d6a36  
    1414//
    1515
     16#include <fstream.hfa>
    1617#include <vector.hfa>
    17 #include <fstream.hfa>
    1818
    1919#undef assert
     
    2828int main() {
    2929        vector( int ) iv;
    30 
    31         assert( ((uintptr_t)&iv.storage.storage ) == (((uintptr_t)&iv)) );
    32         assert( ((uintptr_t)&iv.storage.capacity) == (((uintptr_t)&iv) + sizeof(void *)) );
    33         assert( ((uintptr_t)&iv.size            ) == (((uintptr_t)&iv) + sizeof(void *) + sizeof(size_t)) );
    3430
    3531        assert( empty( &iv ) );
  • tools/build/push2dist.sh

    r6a490b2 rb7d6a36  
    1919# echo "Copying to machines : ${hosts} (hash=${hash})"
    2020
    21 files="../../../driver/cfa ../../../driver/cfa-cpp ../../../driver/cc1 ../../../driver/as defines.hfa $(find . -name '*.c*' | tr '\n' ' ')"
     21files="../../../driver/cfa ../../../driver/cfa-cpp ../../../driver/cc1 ../../../driver/as $(find . -name '*.c*' | tr '\n' ' ')"
    2222# echo "Files ${files}"
    2323
  • tools/cfa.nanorc

    r6a490b2 rb7d6a36  
    1414
    1515# Declarations
    16 color brightgreen "\<(struct|union|typedef|trait|coroutine|generator)\>"
    17 color brightgreen "\<(monitor|thread|with)\>"
     16color brightgreen "\<(struct|union|typedef|trait|coroutine|monitor|thread)\>"
     17color brightgreen "\<(with)\>"
    1818
    1919# Control Flow Structures
    2020color brightyellow "\<(if|else|while|do|for|switch|choose|case|default)\>"
    21 color brightyellow "\<(disable|enable|waitfor|when|timeout|suspend)\>"
     21color brightyellow "\<(disable|enable|waitfor|when|timeout)\>"
    2222color brightyellow "\<(try|catch(Resume)?|finally)\>"
    2323
     
    2626
    2727# Escaped Keywords, now Identifiers.
    28 color white "``\w+"
     28color white "`\w+`"
    2929
    3030# Operator Names
     
    3737## Update/Redistribute
    3838# GCC builtins
    39 color cyan "__attribute__[[:space:]]*\(\(([^)]|[^)]\))*\)\)"
     39color cyan "__attribute__[[:space:]]*\(\([^()]*(\([^()]*\)[^()]*)*\)\)"
    4040##color cyan "__(aligned|asm|builtin|hidden|inline|packed|restrict|section|typeof|weak)__"
    4141
  • tools/vscode/uwaterloo.cforall-0.1.0/package.json

    r6a490b2 rb7d6a36  
    22        "name": "cforall",
    33        "version": "0.1.0",
    4         "displayName": "Cāˆ€ (C-for-all) Language Support",
     4        "displayName": "Cforall Language Support",
    55        "description": "Cforall - colorizer, grammar and snippets.",
    66        "publisher": "uwaterloo",
     
    99                "vscode": "^1.5.0"
    1010        },
    11         "icon": "images/icon.png",
     11        "icon": "images/icon.svg",
    1212        "categories": [
    13                 "Programming Languages",
     13                "Languages",
    1414                "Linters",
    1515                "Other"
    1616        ],
    17         "activationEvents": [
    18                 "onLanguage:cforall"
    19         ],
    20         "main": "./client/main.js",
    2117        "contributes": {
    2218                "languages": [
     
    2521                                "aliases": [
    2622                                        "Cāˆ€",
     23                                        "Cforall",
    2724                                        "CForAll",
    28                                         "Cforall",
    2925                                        "cforall"
    3026                                ],
    3127                                "extensions": [
    32                                         ".cfa",
    33                                         ".hfa",
    34                                         ".ifa"
     28                                        ".cf"
    3529                                ],
    3630                                "configuration": "./cforall.configuration.json"
     
    4034                        {
    4135                                "language": "cforall",
    42                                 "scopeName": "source.cfa",
    43                                 "path": "./syntaxes/cfa.tmLanguage.json"
     36                                "scopeName": "source.cf",
     37                                "path": "./syntaxes/cfa.tmLanguage"
    4438                        }
    45                 ],
    46                 "configuration": {
    47                         "type": "object",
    48                         "title": "Example configuration",
    49                         "properties": {
    50                                 "cforall.maxNumberOfProblems": {
    51                                         "scope": "resource",
    52                                         "type": "number",
    53                                         "default": 100,
    54                                         "description": "Controls the maximum number of problems produced by the server."
    55                                 },
    56                                 "cforall.trace.server": {
    57                                         "scope": "window",
    58                                         "type": "string",
    59                                         "enum": [
    60                                                 "off",
    61                                                 "messages",
    62                                                 "verbose"
    63                                         ],
    64                                         "default": "off",
    65                                         "description": "Traces the communication between VS Code and the language server."
    66                                 }
    67                         }
    68                 }
    69         },
    70         "dependencies": {
    71                 "vscode-languageclient": "^4.1.4"
    72         },
    73         "devDependencies": {
    74                 "vscode-languageclient": "^4.1.4"
     39                ]
    7540        }
    7641}
Note: See TracChangeset for help on using the changeset viewer.