Changes in / [71d6bd8:7030dab]


Ignore:
Files:
151 added
35 deleted
291 edited

Legend:

Unmodified
Added
Removed
  • Jenkinsfile_disabled

    r71d6bd8 r7030dab  
    126126                        }
    127127
    128                         sh "${SrcDir}/configure CXX=${Settings.Compiler.CXX} CC=${Settings.Compiler.CC} ${Settings.Architecture.flags} ${targets} --quiet"
     128                        sh "${SrcDir}/configure CXX=${Settings.Compiler.CXX} CC=${Settings.Compiler.CC} ${Settings.Architecture.flags} AR=gcc-ar RANLIB=gcc-ranlib ${targets} --quiet --prefix=${BuildDir}"
    129129
    130130                        // Configure libcfa
     
    155155                dir (BuildDir) {
    156156                        sh "make -j 8 --no-print-directory -C libcfa/${Settings.Architecture.name}-nodebug"
     157                }
     158        }
     159
     160        build_stage('Build : install', true) {
     161                // Build outside of the src tree to ease cleaning
     162                dir (BuildDir) {
     163                        sh "make -j 8 --no-print-directory install"
    157164                }
    158165        }
     
    179186                echo "Archiving core dumps"
    180187                dir (BuildDir) {
    181                         archiveArtifacts artifacts: "tests/crashes/**/*", fingerprint: true
     188                        archiveArtifacts artifacts: "tests/crashes/**/*,lib/**/lib*.so*", fingerprint: true
    182189                }
    183190                throw err
     
    215222
    216223                //Then publish the results
    217                 do_plot(Settings.RunBenchmark && Settings.Publish, 'compile'       , groupCompile    , false, 'Compilation')
    218                 do_plot(Settings.RunBenchmark && Settings.Publish, 'compile.diff'  , groupCompile    , true , 'Compilation (relative)')
    219                 do_plot(Settings.RunBenchmark && Settings.Publish, 'ctxswitch'     , groupConcurrency, false, 'Context Switching')
    220                 do_plot(Settings.RunBenchmark && Settings.Publish, 'ctxswitch.diff', groupConcurrency, true , 'Context Switching (relative)')
    221                 do_plot(Settings.RunBenchmark && Settings.Publish, 'mutex'         , groupConcurrency, false, 'Mutual Exclusion')
    222                 do_plot(Settings.RunBenchmark && Settings.Publish, 'mutex.diff'    , groupConcurrency, true , 'Mutual Exclusion (relative)')
    223                 do_plot(Settings.RunBenchmark && Settings.Publish, 'signal'        , groupConcurrency, false, 'Internal and External Scheduling')
    224                 do_plot(Settings.RunBenchmark && Settings.Publish, 'signal.diff'   , groupConcurrency, true , 'Internal and External Scheduling (relative)')
     224                do_plot(Settings.RunBenchmark && Settings.Publish, 'compile'        , groupCompile    , false, 'Compilation')
     225                do_plot(Settings.RunBenchmark && Settings.Publish, 'compile.diff'   , groupCompile    , true , 'Compilation (relative)')
     226                do_plot(Settings.RunBenchmark && Settings.Publish, 'ctxswitch'      , groupConcurrency, false, 'Context Switching')
     227                do_plot(Settings.RunBenchmark && Settings.Publish, 'ctxswitch.diff' , groupConcurrency, true , 'Context Switching (relative)')
     228                do_plot(Settings.RunBenchmark && Settings.Publish, 'mutex'          , groupConcurrency, false, 'Mutual Exclusion')
     229                do_plot(Settings.RunBenchmark && Settings.Publish, 'mutex.diff'     , groupConcurrency, true , 'Mutual Exclusion (relative)')
     230                do_plot(Settings.RunBenchmark && Settings.Publish, 'scheduling'     , groupConcurrency, false, 'Internal and External Scheduling')
     231                do_plot(Settings.RunBenchmark && Settings.Publish, 'scheduling.diff', groupConcurrency, true , 'Internal and External Scheduling (relative)')
    225232        }
    226233}
     
    325332        public String CXX
    326333        public String CC
    327 
    328         CC_Desc(String name, String CXX, String CC) {
     334        public String lto
     335
     336        CC_Desc(String name, String CXX, String CC, String lto) {
    329337                this.name = name
    330338                this.CXX = CXX
    331                 this.CC = CC
     339                this.CC  = CC
     340                this.lto = lto
    332341        }
    333342}
     
    364373                switch( param.Compiler ) {
    365374                        case 'gcc-9':
    366                                 this.Compiler = new CC_Desc('gcc-9', 'g++-9', 'gcc-9')
     375                                this.Compiler = new CC_Desc('gcc-9', 'g++-9', 'gcc-9', '-flto=auto')
    367376                        break
    368377                        case 'gcc-8':
    369                                 this.Compiler = new CC_Desc('gcc-8', 'g++-8', 'gcc-8')
     378                                this.Compiler = new CC_Desc('gcc-8', 'g++-8', 'gcc-8', '-flto=auto')
    370379                        break
    371380                        case 'gcc-7':
    372                                 this.Compiler = new CC_Desc('gcc-7', 'g++-7', 'gcc-7')
     381                                this.Compiler = new CC_Desc('gcc-7', 'g++-7', 'gcc-7', '-flto=auto')
    373382                        break
    374383                        case 'gcc-6':
    375                                 this.Compiler = new CC_Desc('gcc-6', 'g++-6', 'gcc-6')
     384                                this.Compiler = new CC_Desc('gcc-6', 'g++-6', 'gcc-6', '-flto=auto')
    376385                        break
    377386                        case 'gcc-5':
    378                                 this.Compiler = new CC_Desc('gcc-5', 'g++-5', 'gcc-5')
     387                                this.Compiler = new CC_Desc('gcc-5', 'g++-5', 'gcc-5', '-flto=auto')
    379388                        break
    380389                        case 'gcc-4.9':
    381                                 this.Compiler = new CC_Desc('gcc-4.9', 'g++-4.9', 'gcc-4.9')
     390                                this.Compiler = new CC_Desc('gcc-4.9', 'g++-4.9', 'gcc-4.9', '-flto=auto')
    382391                        break
    383392                        case 'clang':
    384                                 this.Compiler = new CC_Desc('clang', 'clang++-6.0', 'gcc-6')
     393                                this.Compiler = new CC_Desc('clang', 'clang++-6.0', 'gcc-6', '-flto=thin -flto-jobs=0')
    385394                        break
    386395                        default :
     
    439448        // prepare the properties
    440449        properties ([                                                                                                   \
     450                buildDiscarder(logRotator(                                                                              \
     451                        artifactDaysToKeepStr: '',                                                                      \
     452                        artifactNumToKeepStr: '',                                                                       \
     453                        daysToKeepStr: '730',                                                                           \
     454                        numToKeepStr: '1000'                                                                            \
     455                )),                                                                                                             \
    441456                [$class: 'ParametersDefinitionProperty',                                                                \
    442457                        parameterDefinitions: [                                                                         \
  • benchmark/Makefile.am

    r71d6bd8 r7030dab  
    1111## Created On       : Sun May 31 09:08:15 2015
    1212## Last Modified By : Peter A. Buhr
    13 ## Last Modified On : Mon Jul 29 18:02:19 2019
    14 ## Update Count     : 54
     13## Last Modified On : Tue Mar 10 11:41:18 2020
     14## Update Count     : 258
    1515###############################################################################
    1616
     
    2828BENCH_V_CFA = $(__bench_v_CFA_$(__quiet))
    2929BENCH_V_CXX = $(__bench_v_CXX_$(__quiet))
     30BENCH_V_UPP = $(__bench_v_UPP_$(__quiet))
    3031BENCH_V_GOC = $(__bench_v_GOC_$(__quiet))
     32BENCH_V_PY = $(__bench_v_PY_$(__quiet))
     33BENCH_V_RUSTC = $(__bench_v_RUSTC_$(__quiet))
     34BENCH_V_NODEJS = $(__bench_v_NODEJS_$(__quiet))
    3135BENCH_V_JAVAC = $(__bench_v_JAVAC_$(__quiet))
    32 BENCH_V_UPP = $(__bench_v_UPP_$(__quiet))
    3336
    3437__quiet = verbose
     
    3639__bench_v_CFA_quiet = @
    3740__bench_v_CXX_quiet = @
     41__bench_v_UPP_quiet = @
    3842__bench_v_GOC_quiet = @
     43__bench_v_RUSTC_quiet = @
    3944__bench_v_JAVAC_quiet = @
    40 __bench_v_UPP_quiet = @
    4145__bench_v_CC_verbose = $(AM_V_CC)
    4246__bench_v_CFA_verbose = $(AM_V_CFA)
    4347__bench_v_CXX_verbose = $(AM_V_CXX)
     48__bench_v_UPP_verbose = $(AM_V_UPP)
    4449__bench_v_GOC_verbose = $(AM_V_GOC)
     50__bench_v_PY_verbose = $(AM_V_PY)
     51__bench_v_RUSTC_verbose = $(AM_V_RUST)
     52__bench_v_NODEJS_verbose = $(AM_V_NODEJS)
    4553__bench_v_JAVAC_verbose = $(AM_V_JAVAC)
    46 __bench_v_UPP_verbose = $(AM_V_UPP)
    4754
    4855
     
    5158STATS    = ${abs_top_srcdir}/tools/stat.py
    5259# NEED AT LEAST 4 DATA VALUES FOR BENCHMARKS BECAUSE THE MAX AND MIN VALUES ARE REMOVED
    53 repeats  = 5 # 31 for benchmarks
     60repeats  = 13 # 31 for benchmarks
    5461arch     = x64
    5562skipcompile = no
     
    6269
    6370dummyC.c:
    64         @echo "int main() { return 0; }" > ${@}
     71        echo "int main() { return 0; }" > ${@}
    6572
    6673dummyCXX.cpp:
    67         @echo "int main() { return 0; }" > ${@}
    68 
     74        echo "int main() { return 0; }" > ${@}
     75
     76.SILENT:                # do not print recipe
    6977.NOTPARALLEL:
    70 .PHONY: compile.csv ctxswitch.csv mutex.csv signal.csv
    71 
    72 ## =========================================================================================================
    73 all : ctxswitch$(EXEEXT) mutex$(EXEEXT) signal$(EXEEXT) waitfor$(EXEEXT) creation$(EXEEXT)
     78.PHONY: jenkins cleancsv
     79
     80## =========================================================================================================
     81
     82all : basic$(EXEEXT) ctxswitch$(EXEEXT) mutex$(EXEEXT) schedint$(EXEEXT) schedext$(EXEEXT) creation$(EXEEXT)
     83
     84basic_loop_DURATION = 15000000000
     85basic_function_DURATION = 10000000000
     86basic_tls_fetch_add_DURATION = 10000000000
     87basic_DURATION = 250000000
     88
     89ctxswitch_pthread_DURATION = 25000000
     90ctxswitch_rust_thread_DURATION = $(ctxswitch_pthread_DURATION)
     91ctxswitch_cfa_generator_DURATION = 5000000000
     92ctxswitch_nodejs_await_DURATION = 5000000
     93ctxswitch_DURATION = 100000000
     94
     95#mutex_java_DURATION = 10000000
     96mutex_DURATION = 50000000
     97
     98schedint_pthread_DURATION = 1000000
     99schedint_java_DURATION = $(schedint_pthread_DURATION)
     100schedint_rust_DURATION = $(schedint_pthread_DURATION)
     101schedint_DURATION = 10000000
     102
     103schedext_DURATION = 10000000
     104
     105creation_pthread_DURATION = 250000
     106creation_rust_thread_DURATION = ${creation_pthread_DURATION}
     107creation_java_thread_DURATION = ${creation_pthread_DURATION}
     108creation_cfa_coroutine_DURATION = 100000000
     109creation_cfa_coroutine_eager_DURATION = 10000000
     110creation_cfa_generator_DURATION = 1000000000
     111creation_upp_coroutine_DURATION = ${creation_cfa_coroutine_eager_DURATION}
     112creation_cfa_thread_DURATION = 10000000
     113creation_upp_thread_DURATION = ${creation_cfa_thread_DURATION}
     114creation_DURATION = 10000000
    74115
    75116%.run : %$(EXEEXT) ${REPEAT}
    76         @rm -f .result.log
    77         @echo "------------------------------------------------------"
    78         @echo $<
    79         @${REPEAT} ${repeats} ./a.out | tee -a .result.log
    80         @${STATS} .result.log
    81         @echo "------------------------------------------------------"
    82         @rm -f a.out .result.log *.class
     117        rm -f .result.log
     118        echo "------------------------------------------------------"
     119        echo $<
     120        ${REPEAT} ${repeats} -- ./a.out\
     121                $(if ${$(subst -,_,$(basename $@))_DURATION},\
     122                        ${$(subst -,_,$(basename $@))_DURATION},\
     123                        ${$(firstword $(subst -, ,$(basename $@)))_DURATION}) | tee -a .result.log
     124        ${STATS} .result.log
     125        echo "------------------------------------------------------"
     126        rm -f a.out .result.log *.class
     127
     128#       ${REPEAT} ${repeats} -- /usr/bin/time -f "%Uu %Ss %Er %Mkb" ./a.out
    83129
    84130%.runquiet :
    85         @+make $(basename $@) CFLAGS="-w" __quiet=quiet
    86         @taskset -c 1 ./a.out
    87         @rm -f a.out
     131        +make $(basename $@) CFLAGS="-w" __quiet=quiet
     132        taskset -c 1 ./a.out
     133        rm -f a.out
    88134
    89135%.make :
    90         @printf "${PRINT_FORMAT}" $(basename $(subst compile-,,$@))
    91         @+/usr/bin/time -f ${TIME_FORMAT} make $(basename $@) 2>&1
     136        printf "${PRINT_FORMAT}" $(basename $(subst compile-,,$@))
     137        +/usr/bin/time -f ${TIME_FORMAT} make $(basename $@) 2>&1
    92138
    93139${REPEAT} :
    94         @+make -C ${abs_top_builddir}/tools repeat
     140        +make -C ${abs_top_builddir}/tools repeat
    95141
    96142## =========================================================================================================
     
    98144FIX_NEW_LINES = cat $@ | tr "\n" "\t" | sed -r 's/\t,/,/' | tr "\t" "\n" > $@
    99145
    100 jenkins$(EXEEXT):
     146cleancsv:
     147        rm -f compile.csv basic.csv ctxswitch.csv mutex.csv scheduling.csv
     148
     149jenkins$(EXEEXT): cleancsv
    101150@DOifskipcompile@
    102         @+make compile.csv
    103         @-+make compile.diff.csv
     151        +make compile.csv
     152        -+make compile.diff.csv
    104153@DOendif@
    105         @+make ctxswitch.csv
    106         @-+make ctxswitch.diff.csv
    107         @+make mutex.csv
    108         @-+make mutex.diff.csv
    109         @+make signal.csv
    110         @-+make signal.diff.csv
     154        +make ctxswitch.csv
     155        -+make ctxswitch.diff.csv
     156        +make mutex.csv
     157        -+make mutex.diff.csv
     158        +make scheduling.csv
     159        -+make scheduling.diff.csv
    111160@DOifskipcompile@
    112161        cat compile.csv
     
    117166        cat mutex.csv
    118167        -cat mutex.diff.csv
    119         cat signal.csv
    120         -cat signal.diff.csv
     168        cat scheduling.csv
     169        -cat scheduling.diff.csv
    121170
    122171compile.csv:
    123         @echo "array,attributes,empty,expression,io,monitor,operators,typeof" > $@
    124         @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-array.make >> $@
    125         @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-attributes.make >> $@
    126         @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-empty.make >> $@
    127         @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-expression.make >> $@
    128         @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-io.make >> $@
    129         @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-monitor.make >> $@
    130         @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-operators.make >> $@
    131         @+make TIME_FORMAT='%e' PRINT_FORMAT='' compile-typeof.make >> $@
    132         @$(srcdir)/fixcsv.sh $@
     172        echo "building $@"
     173        echo "array,attributes,empty,expression,io,monitor,operators,typeof" > $@
     174        +make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-array.make >> $@
     175        +make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-attributes.make >> $@
     176        +make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-empty.make >> $@
     177        +make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-expression.make >> $@
     178        +make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-io.make >> $@
     179        +make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-monitor.make >> $@
     180        +make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-operators.make >> $@
     181        +make TIME_FORMAT='%e' PRINT_FORMAT='' compile-typeof.make >> $@
     182        $(srcdir)/fixcsv.sh $@
    133183
    134184ctxswitch.csv:
    135         @echo "generator,coroutine,thread" > $@
    136         @+make ctxswitch-cfa_generator.runquiet >> $@ && echo -n ',' >> $@
    137         @+make ctxswitch-cfa_coroutine.runquiet >> $@ && echo -n ',' >> $@
    138         @+make ctxswitch-cfa_thread.runquiet >> $@
    139         @$(srcdir)/fixcsv.sh $@
     185        echo "building $@"
     186        echo "generator,coroutine,thread" > $@
     187        +make ctxswitch-cfa_generator.runquiet >> $@ && echo -n ',' >> $@
     188        +make ctxswitch-cfa_coroutine.runquiet >> $@ && echo -n ',' >> $@
     189        +make ctxswitch-cfa_thread.runquiet >> $@
     190        $(srcdir)/fixcsv.sh $@
    140191
    141192mutex.csv:
    142         @echo "1-monitor,2-monitor" > $@
    143         @+make mutex-cfa1.runquiet >> $@ && echo -n ',' >> $@
    144         @+make mutex-cfa2.runquiet >> $@
    145         @$(srcdir)/fixcsv.sh $@
    146 
    147 signal.csv:
    148         @echo "signal-1,signal-2,waitfor-1,waitfor-2" > $@
    149         @+make signal-cfa1.runquiet >> $@ && echo -n ',' >> $@
    150         @+make signal-cfa2.runquiet >> $@ && echo -n ',' >> $@
    151         @+make waitfor-cfa1.runquiet >> $@ && echo -n ',' >> $@
    152         @+make waitfor-cfa2.runquiet >> $@
    153         @$(srcdir)/fixcsv.sh $@
     193        echo "building $@"
     194        echo "1-monitor,2-monitor" > $@
     195        +make mutex-cfa1.runquiet >> $@ && echo -n ',' >> $@
     196        +make mutex-cfa2.runquiet >> $@
     197        $(srcdir)/fixcsv.sh $@
     198
     199scheduling.csv:
     200        echo "building $@"
     201        echo "schedint-1,schedint-2,schedext-1,schedext-2" > $@
     202        +make schedint-cfa1.runquiet >> $@ && echo -n ',' >> $@
     203        +make schedint-cfa2.runquiet >> $@ && echo -n ',' >> $@
     204        +make schedext-cfa1.runquiet >> $@ && echo -n ',' >> $@
     205        +make schedext-cfa2.runquiet >> $@
     206        $(srcdir)/fixcsv.sh $@
    154207
    155208%.diff.csv: %.csv
    156         @test -e $(srcdir)/baselines/$(arch)/$< || (echo "Error : Missing baseline for ${<}" && false)
    157         @$(srcdir)/baselines/calc.py $(srcdir)/baselines/$(arch)/$(<) $(<) > $@
    158 
    159 
    160 ## =========================================================================================================
    161 loop$(EXEEXT):
    162         $(BENCH_V_CC)$(COMPILE) -DBENCH_N=5000000000 $(srcdir)/loop.c
    163 
    164 function$(EXEEXT):
    165         $(BENCH_V_CC)$(COMPILE) -DBENCH_N=5000000000 $(srcdir)/function.c
    166 
    167 fetch_add$(EXEEXT):
    168         $(BENCH_V_CC)$(COMPILE) -DBENCH_N=500000000  $(srcdir)/fetch_add.c
    169 
    170 ttst_lock$(EXEEXT):
    171         $(BENCH_V_CC)$(COMPILE) -DBENCH_N=500000000  $(srcdir)/ttst_lock.c
    172 
    173 tls-fetch_add$(EXEEXT):
    174         $(BENCH_V_CC)$(COMPILE) -DBENCH_N=500000000  $(srcdir)/tls-fetch_add.c
    175 
    176 ## =========================================================================================================
    177 CTXSWITCH_DEPEND  =                 \
    178         loop.run                                \
    179         function.run                    \
    180         fetch_add.run                   \
    181         ttst_lock.run                   \
    182         tls-fetch_add.run                       \
    183         ctxswitch-pthread.run           \
     209        test -e $(srcdir)/baselines/$(arch)/$< || (echo "Error : Missing baseline for ${<}" && false)
     210        $(srcdir)/baselines/calc.py $(srcdir)/baselines/$(arch)/$(<) $(<) > $@
     211
     212## =========================================================================================================
     213
     214BASIC_DEPEND  =                                 \
     215        basic-loop.run                          \
     216        basic-function.run                      \
     217        basic-fetch_add.run                     \
     218        basic-ttst_lock.run                     \
     219        basic-tls-fetch_add.run
     220
     221basic-loop$(EXEEXT):
     222        $(BENCH_V_CC)$(COMPILE) $(srcdir)/basic/loop.c
     223
     224basic-function$(EXEEXT):
     225        $(BENCH_V_CC)$(COMPILE) $(srcdir)/basic/function.c
     226
     227basic-fetch_add$(EXEEXT):
     228        $(BENCH_V_CC)$(COMPILE) $(srcdir)/basic/fetch_add.c
     229
     230basic-ttst_lock$(EXEEXT):
     231        $(BENCH_V_CC)$(COMPILE) $(srcdir)/basic/ttst_lock.c
     232
     233basic-tls-fetch_add$(EXEEXT):
     234        $(BENCH_V_CC)$(COMPILE) $(srcdir)/basic/tls_fetch_add.c
     235
     236basic$(EXEEXT): $(BASIC_DEPEND)
     237
     238## =========================================================================================================
     239
     240CTXSWITCH_DEPEND  =                     \
    184241        ctxswitch-cfa_generator.run     \
    185242        ctxswitch-cfa_coroutine.run     \
     
    188245        ctxswitch-upp_coroutine.run     \
    189246        ctxswitch-upp_thread.run        \
    190         ctxswitch-goroutine.run         \
    191         ctxswitch-java_thread.run
    192 
     247        ctxswitch-python_coroutine.run  \
     248        ctxswitch-nodejs_coroutine.run  \
     249        ctxswitch-nodejs_await.run      \
     250        ctxswitch-goroutine_thread.run  \
     251        ctxswitch-rust_thread.run       \
     252        ctxswitch-nodejs_coroutine.run  \
     253        ctxswitch-java_thread.run       \
     254        ctxswitch-pthread.run
    193255
    194256if WITH_LIBFIBRE
    195 CTXSWITCH_DEPEND  +=           \
    196         ctxswitch-kos_fibre.run  \
     257CTXSWITCH_DEPEND  +=                    \
     258        ctxswitch-kos_fibre.run         \
    197259        ctxswitch-kos_fibre2.run
    198 
    199260
    200261ctxswitch-kos_fibre$(EXEEXT):
     
    207268ctxswitch$(EXEEXT): $(CTXSWITCH_DEPEND)
    208269
    209 ctxswitch-pthread$(EXEEXT):
    210         $(BENCH_V_CC)$(COMPILE)    -DBENCH_N=50000000 $(srcdir)/ctxswitch/pthreads.c
    211 
    212270ctxswitch-cfa_generator$(EXEEXT):
    213         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_gen.cfa
     271        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/ctxswitch/cfa_gen.cfa
    214272
    215273ctxswitch-cfa_coroutine$(EXEEXT):
    216         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_cor.cfa
     274        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/ctxswitch/cfa_cor.cfa
    217275
    218276ctxswitch-cfa_thread$(EXEEXT):
    219         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_thrd.cfa
     277        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/ctxswitch/cfa_thrd.cfa
    220278
    221279ctxswitch-cfa_thread2$(EXEEXT):
    222         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_thrd2.cfa
     280        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/ctxswitch/cfa_thrd2.cfa
    223281
    224282ctxswitch-upp_coroutine$(EXEEXT):
    225         $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/upp_cor.cc
     283        $(BENCH_V_UPP)$(UPPCOMPILE) $(srcdir)/ctxswitch/upp_cor.cc
    226284
    227285ctxswitch-upp_thread$(EXEEXT):
    228         $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/upp_thrd.cc
    229 
    230 ctxswitch-goroutine$(EXEEXT):
     286        $(BENCH_V_UPP)$(UPPCOMPILE) $(srcdir)/ctxswitch/upp_thrd.cc
     287
     288ctxswitch-python_coroutine$(EXEEXT):
     289        $(BENCH_V_PY)echo "#!/bin/sh" > a.out
     290        echo "python3.7 $(srcdir)/ctxswitch/python_cor.py" >> a.out
     291        chmod a+x a.out
     292
     293ctxswitch-nodejs_coroutine$(EXEEXT):
     294        $(BENCH_V_NODEJS)echo "#!/bin/sh" > a.out
     295        echo "nodejs $(srcdir)/ctxswitch/node_cor.js" >> a.out
     296        chmod a+x a.out
     297
     298ctxswitch-nodejs_await$(EXEEXT):
     299        $(BENCH_V_NODEJS)echo "#!/bin/sh" > a.out
     300        echo "nodejs $(srcdir)/ctxswitch/node_await.js" >> a.out
     301        chmod a+x a.out
     302
     303ctxswitch-goroutine_thread$(EXEEXT):
    231304        $(BENCH_V_GOC)go build -o a.out $(srcdir)/ctxswitch/goroutine.go
     305
     306ctxswitch-rust_thread$(EXEEXT):
     307        $(BENCH_V_RUSTC)rustc -C opt-level=3 -o a.out $(srcdir)/ctxswitch/rust_thrd.rs
    232308
    233309ctxswitch-java_thread$(EXEEXT):
    234310        $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/ctxswitch/JavaThread.java
    235         @echo "#!/bin/sh" > a.out
    236         @echo "java JavaThread" >> a.out
    237         @chmod a+x a.out
    238 
    239 ## =========================================================================================================
    240 mutex$(EXEEXT) :\
    241         loop.run                        \
    242         function.run            \
    243         fetch_add.run           \
    244         mutex-pthread_lock.run  \
    245         mutex-upp.run           \
     311        echo "#!/bin/sh" > a.out
     312        echo "java JavaThread" >> a.out
     313        chmod a+x a.out
     314
     315ctxswitch-pthread$(EXEEXT):
     316        $(BENCH_V_CC)$(COMPILE) $(srcdir)/ctxswitch/pthreads.c
     317
     318## =========================================================================================================
     319
     320mutex$(EXEEXT) :                \
    246321        mutex-cfa1.run          \
    247322        mutex-cfa2.run          \
    248323        mutex-cfa4.run          \
    249         mutex-java_thread.run
    250 
    251 mutex-pthread_lock$(EXEEXT):
    252         $(BENCH_V_CC)$(COMPILE)    -DBENCH_N=50000000 $(srcdir)/mutex/pthreads.c
     324        mutex-upp.run           \
     325        mutex-go.run            \
     326        mutex-rust.run          \
     327        mutex-java.run          \
     328        mutex-pthread.run
     329
     330mutex-pthread$(EXEEXT):
     331        $(BENCH_V_CC)$(COMPILE) $(srcdir)/mutex/pthreads.c
     332
     333mutex-cfa1$(EXEEXT):
     334        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/mutex/cfa1.cfa
     335
     336mutex-cfa2$(EXEEXT):
     337        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/mutex/cfa2.cfa
     338
     339mutex-cfa4$(EXEEXT):
     340        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/mutex/cfa4.cfa
    253341
    254342mutex-upp$(EXEEXT):
    255         $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/mutex/upp.cc
    256 
    257 mutex-cfa1$(EXEEXT):
    258         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=5000000  $(srcdir)/mutex/cfa1.cfa
    259 
    260 mutex-cfa2$(EXEEXT):
    261         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=5000000  $(srcdir)/mutex/cfa2.cfa
    262 
    263 mutex-cfa4$(EXEEXT):
    264         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=5000000  $(srcdir)/mutex/cfa4.cfa
    265 
    266 mutex-java_thread$(EXEEXT):
     343        $(BENCH_V_UPP)$(UPPCOMPILE) $(srcdir)/mutex/upp.cc
     344
     345mutex-go$(EXEEXT):
     346        $(BENCH_V_GOC)go build -o a.out $(srcdir)/mutex/goroutine.go
     347
     348mutex-rust$(EXEEXT):
     349        $(BENCH_V_RUSTC)rustc -C opt-level=3 -o a.out $(srcdir)/mutex/rust.rs
     350
     351mutex-java$(EXEEXT):
    267352        $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/mutex/JavaThread.java
    268         @echo "#!/bin/sh" > a.out
    269         @echo "java JavaThread" >> a.out
    270         @chmod a+x a.out
    271 
    272 ## =========================================================================================================
    273 signal$(EXEEXT) :\
    274         signal-pthread_cond.run \
    275         signal-upp.run          \
    276         signal-cfa1.run         \
    277         signal-cfa2.run         \
    278         signal-cfa4.run         \
    279         signal-java_thread.run
    280 
    281 signal-pthread_cond$(EXEEXT):
    282         $(BENCH_V_CC)$(COMPILE)    -DBENCH_N=500000  $(srcdir)/schedint/pthreads.c
    283 
    284 signal-upp$(EXEEXT):
    285         $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=5000000 $(srcdir)/schedint/upp.cc
    286 
    287 signal-cfa1$(EXEEXT):
    288         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedint/cfa1.cfa
    289 
    290 signal-cfa2$(EXEEXT):
    291         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedint/cfa2.cfa
    292 
    293 signal-cfa4$(EXEEXT):
    294         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedint/cfa4.cfa
    295 
    296 signal-java_thread$(EXEEXT):
     353        echo "#!/bin/sh" > a.out
     354        echo "java JavaThread" >> a.out
     355        chmod a+x a.out
     356
     357## =========================================================================================================
     358
     359schedint$(EXEEXT) :             \
     360        schedint-cfa1.run       \
     361        schedint-cfa2.run       \
     362        schedint-cfa4.run       \
     363        schedint-upp.run        \
     364        schedint-rust.run       \
     365        schedint-java.run       \
     366        schedint-pthread.run
     367
     368schedint-cfa1$(EXEEXT):
     369        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/schedint/cfa1.cfa
     370
     371schedint-cfa2$(EXEEXT):
     372        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/schedint/cfa2.cfa
     373
     374schedint-cfa4$(EXEEXT):
     375        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/schedint/cfa4.cfa
     376
     377schedint-upp$(EXEEXT):
     378        $(BENCH_V_UPP)$(UPPCOMPILE) $(srcdir)/schedint/upp.cc
     379
     380schedint-rust$(EXEEXT):
     381        $(BENCH_V_RUSTC)rustc -C opt-level=3 -o a.out $(srcdir)/schedint/rust.rs
     382
     383schedint-java$(EXEEXT):
    297384        $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/schedint/JavaThread.java
    298         @echo "#!/bin/sh" > a.out
    299         @echo "java JavaThread" >> a.out
    300         @chmod a+x a.out
    301 
    302 
    303 ## =========================================================================================================
    304 waitfor$(EXEEXT) :\
    305         waitfor-upp.run         \
    306         waitfor-cfa1.run                \
    307         waitfor-cfa2.run                \
    308         waitfor-cfa4.run
    309 
    310 waitfor-upp$(EXEEXT):
    311         $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=5000000 $(srcdir)/schedext/upp.cc
    312 
    313 waitfor-cfa1$(EXEEXT):
    314         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedext/cfa1.cfa
    315 
    316 waitfor-cfa2$(EXEEXT):
    317         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedext/cfa2.cfa
    318 
    319 waitfor-cfa4$(EXEEXT):
    320         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedext/cfa4.cfa
    321 
    322 ## =========================================================================================================
    323 creation$(EXEEXT) :\
    324         creation-pthread.run                    \
     385        echo "#!/bin/sh" > a.out
     386        echo "java JavaThread" >> a.out
     387        chmod a+x a.out
     388
     389schedint-pthread$(EXEEXT):
     390        $(BENCH_V_CC)$(COMPILE) $(srcdir)/schedint/pthreads.c
     391
     392## =========================================================================================================
     393
     394schedext$(EXEEXT) :             \
     395        schedext-cfa1.run       \
     396        schedext-cfa2.run       \
     397        schedext-cfa4.run       \
     398        schedext-upp.run        \
     399        schedext-goroutine.run
     400
     401schedext-cfa1$(EXEEXT):
     402        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/schedext/cfa1.cfa
     403
     404schedext-cfa2$(EXEEXT):
     405        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/schedext/cfa2.cfa
     406
     407schedext-cfa4$(EXEEXT):
     408        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/schedext/cfa4.cfa
     409
     410schedext-upp$(EXEEXT):
     411        $(BENCH_V_UPP)$(UPPCOMPILE) $(srcdir)/schedext/upp.cc
     412
     413schedext-goroutine$(EXEEXT):
     414        $(BENCH_V_GOC)go build -o a.out $(srcdir)/schedext/goroutine.go
     415
     416
     417## =========================================================================================================
     418
     419creation$(EXEEXT) :                             \
     420        creation-cfa_generator.run              \
    325421        creation-cfa_coroutine.run              \
    326422        creation-cfa_coroutine_eager.run        \
     
    328424        creation-upp_coroutine.run              \
    329425        creation-upp_thread.run                 \
    330         creation-goroutine.run                  \
    331         creation-java_thread.run
     426        creation-python_coroutine.run           \
     427        creation-nodejs_coroutine.run           \
     428        creation-goroutine_thread.run           \
     429        creation-rust_thread.run                \
     430        creation-java_thread.run                \
     431        creation-pthread.run
     432
     433creation-cfa_generator$(EXEEXT):
     434        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/creation/cfa_gen.cfa
    332435
    333436creation-cfa_coroutine$(EXEEXT):
    334         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=10000000 $(srcdir)/creation/cfa_cor.cfa
     437        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/creation/cfa_cor.cfa
    335438
    336439creation-cfa_coroutine_eager$(EXEEXT):
    337         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=10000000 $(srcdir)/creation/cfa_cor.cfa  -DEAGER
     440        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/creation/cfa_cor.cfa  -DEAGER
    338441
    339442creation-cfa_thread$(EXEEXT):
    340         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=10000000 $(srcdir)/creation/cfa_thrd.cfa
     443        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/creation/cfa_thrd.cfa
    341444
    342445creation-upp_coroutine$(EXEEXT):
    343         $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/creation/upp_cor.cc
     446        $(BENCH_V_UPP)$(UPPCOMPILE) $(srcdir)/creation/upp_cor.cc
    344447
    345448creation-upp_thread$(EXEEXT):
    346         $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/creation/upp_thrd.cc
    347 
    348 creation-pthread$(EXEEXT):
    349         $(BENCH_V_CC)$(COMPILE)    -DBENCH_N=250000   $(srcdir)/creation/pthreads.c
    350 
    351 creation-goroutine$(EXEEXT):
     449        $(BENCH_V_UPP)$(UPPCOMPILE) $(srcdir)/creation/upp_thrd.cc
     450
     451creation-python_coroutine$(EXEEXT):
     452        $(BENCH_V_PY)echo "#!/bin/sh" > a.out
     453        echo "python3.7 $(srcdir)/creation/python_cor.py" >> a.out
     454        chmod a+x a.out
     455
     456creation-nodejs_coroutine$(EXEEXT):
     457        $(BENCH_V_NODEJS)echo "#!/bin/sh" > a.out
     458        echo "nodejs $(srcdir)/creation/node_cor.js" >> a.out
     459        chmod a+x a.out
     460
     461creation-goroutine_thread$(EXEEXT):
    352462        $(BENCH_V_GOC)go build -o a.out $(srcdir)/creation/goroutine.go
     463
     464creation-rust_thread$(EXEEXT):
     465        $(BENCH_V_RUSTC)rustc -C opt-level=3 -o a.out $(srcdir)/creation/rust_thrd.rs
    353466
    354467creation-java_thread$(EXEEXT):
    355468        $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/creation/JavaThread.java
    356         @echo "#!/bin/sh" > a.out
    357         @echo "java JavaThread" >> a.out
    358         @chmod a+x a.out
    359 
    360 ## =========================================================================================================
    361 
    362 compile$(EXEEXT) :\
     469        echo "#!/bin/sh" > a.out
     470        echo "java JavaThread" >> a.out
     471        chmod a+x a.out
     472
     473creation-pthread$(EXEEXT):
     474        $(BENCH_V_CC)$(COMPILE) $(srcdir)/creation/pthreads.c
     475
     476## =========================================================================================================
     477
     478compile$(EXEEXT) :              \
    363479        compile-array.make      \
    364480        compile-attributes.make \
     
    370486        compile-typeof.make
    371487
    372 
    373488testdir = $(top_srcdir)/tests
    374489
    375490compile-array$(EXEEXT):
    376         @$(CFACOMPILE) -fsyntax-only -w $(testdir)/array.cfa
     491        $(CFACOMPILE) -fsyntax-only -w $(testdir)/array.cfa
    377492
    378493compile-attributes$(EXEEXT):
    379         @$(CFACOMPILE) -fsyntax-only -w $(testdir)/attributes.cfa
     494        $(CFACOMPILE) -fsyntax-only -w $(testdir)/attributes.cfa
    380495
    381496compile-empty$(EXEEXT):
    382         @$(CFACOMPILE) -fsyntax-only -w $(srcdir)/compile/empty.cfa
     497        $(CFACOMPILE) -fsyntax-only -w $(srcdir)/compile/empty.cfa
    383498
    384499compile-expression$(EXEEXT):
    385         @$(CFACOMPILE) -fsyntax-only -w $(testdir)/expression.cfa
     500        $(CFACOMPILE) -fsyntax-only -w $(testdir)/expression.cfa
    386501
    387502compile-io$(EXEEXT):
    388         @$(CFACOMPILE) -fsyntax-only -w $(testdir)/io1.cfa
     503        $(CFACOMPILE) -fsyntax-only -w $(testdir)/io1.cfa
    389504
    390505compile-monitor$(EXEEXT):
    391         @$(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/monitor.cfa
     506        $(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/monitor.cfa
    392507
    393508compile-operators$(EXEEXT):
    394         @$(CFACOMPILE) -fsyntax-only -w $(testdir)/operators.cfa
     509        $(CFACOMPILE) -fsyntax-only -w $(testdir)/operators.cfa
    395510
    396511compile-thread$(EXEEXT):
    397         @$(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/thread.cfa
     512        $(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/thread.cfa
    398513
    399514compile-typeof$(EXEEXT):
    400         @$(CFACOMPILE) -fsyntax-only -w $(testdir)/typeof.cfa
     515        $(CFACOMPILE) -fsyntax-only -w $(testdir)/typeof.cfa
     516
     517## =========================================================================================================
     518
     519size$(EXEEXT) : size-cfa.runquiet
     520
     521size-cfa$(EXEEXT):
     522        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/size/size.cfa
  • benchmark/Makefile.in

    r71d6bd8 r7030dab  
    9393EXTRA_PROGRAMS = dummy$(EXEEXT)
    9494@WITH_LIBFIBRE_TRUE@am__append_1 = \
    95 @WITH_LIBFIBRE_TRUE@    ctxswitch-kos_fibre.run  \
     95@WITH_LIBFIBRE_TRUE@    ctxswitch-kos_fibre.run         \
    9696@WITH_LIBFIBRE_TRUE@    ctxswitch-kos_fibre2.run
    9797
     
    352352LTCFACOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
    353353        $(LIBTOOLFLAGS) --mode=compile $(CFACC) $(DEFS) \
    354         $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CFAFLAGS) $(CFAFLAGS) \
    355         $(AM_CFLAGS) $(CFLAGS)
     354        $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CFAFLAGS) $(AM_CFLAGS) $(CFAFLAGS) $(CFLAGS)
    356355
    357356AM_V_CFA = $(am__v_CFA_@AM_V@)
     
    359358am__v_CFA_0 = @echo "  CFA     " $@;
    360359am__v_CFA_1 =
    361 AM_V_JAVAC = $(am__v_JAVAC_@AM_V@)
    362 am__v_JAVAC_ = $(am__v_JAVAC_@AM_DEFAULT_V@)
    363 am__v_JAVAC_0 = @echo "  JAVAC   " $@;
    364 am__v_JAVAC_1 =
    365 AM_V_GOC = $(am__v_GOC_@AM_V@)
    366 am__v_GOC_ = $(am__v_GOC_@AM_DEFAULT_V@)
    367 am__v_GOC_0 = @echo "  GOC     " $@;
    368 am__v_GOC_1 =
    369360UPPCC = u++
    370361UPPCOMPILE = $(UPPCC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_UPPFLAGS) $(UPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_CFLAGS) $(CFLAGS)
     
    373364am__v_UPP_0 = @echo "  UPP     " $@;
    374365am__v_UPP_1 =
     366AM_V_GOC = $(am__v_GOC_@AM_V@)
     367am__v_GOC_ = $(am__v_GOC_@AM_DEFAULT_V@)
     368am__v_GOC_0 = @echo "  GOC     " $@;
     369am__v_GOC_1 =
     370AM_V_PY = $(am__v_PY_@AM_V@)
     371am__v_PY_ = $(am__v_PY_@AM_DEFAULT_V@)
     372am__v_PY_0 = @echo "  PYTHON  " $@;
     373am__v_PY_1 =
     374AM_V_RUST = $(am__v_RUST_@AM_V@)
     375am__v_RUST_ = $(am__v_RUST_@AM_DEFAULT_V@)
     376am__v_RUST_0 = @echo "  RUST    " $@;
     377am__v_RUST_1 =
     378AM_V_NODEJS = $(am__v_NODEJS_@AM_V@)
     379am__v_NODEJS_ = $(am__v_NODEJS_@AM_DEFAULT_V@)
     380am__v_NODEJS_0 = @echo "  NODEJS  " $@;
     381am__v_NODEJS_1 =
     382AM_V_JAVAC = $(am__v_JAVAC_@AM_V@)
     383am__v_JAVAC_ = $(am__v_JAVAC_@AM_DEFAULT_V@)
     384am__v_JAVAC_0 = @echo "  JAVAC   " $@;
     385am__v_JAVAC_1 =
    375386
    376387# applies to both programs
     
    381392BENCH_V_CFA = $(__bench_v_CFA_$(__quiet))
    382393BENCH_V_CXX = $(__bench_v_CXX_$(__quiet))
     394BENCH_V_UPP = $(__bench_v_UPP_$(__quiet))
    383395BENCH_V_GOC = $(__bench_v_GOC_$(__quiet))
     396BENCH_V_PY = $(__bench_v_PY_$(__quiet))
     397BENCH_V_RUSTC = $(__bench_v_RUSTC_$(__quiet))
     398BENCH_V_NODEJS = $(__bench_v_NODEJS_$(__quiet))
    384399BENCH_V_JAVAC = $(__bench_v_JAVAC_$(__quiet))
    385 BENCH_V_UPP = $(__bench_v_UPP_$(__quiet))
    386400__quiet = verbose
    387401__bench_v_CC_quiet = @
    388402__bench_v_CFA_quiet = @
    389403__bench_v_CXX_quiet = @
     404__bench_v_UPP_quiet = @
    390405__bench_v_GOC_quiet = @
     406__bench_v_RUSTC_quiet = @
    391407__bench_v_JAVAC_quiet = @
    392 __bench_v_UPP_quiet = @
    393408__bench_v_CC_verbose = $(AM_V_CC)
    394409__bench_v_CFA_verbose = $(AM_V_CFA)
    395410__bench_v_CXX_verbose = $(AM_V_CXX)
     411__bench_v_UPP_verbose = $(AM_V_UPP)
    396412__bench_v_GOC_verbose = $(AM_V_GOC)
     413__bench_v_PY_verbose = $(AM_V_PY)
     414__bench_v_RUSTC_verbose = $(AM_V_RUST)
     415__bench_v_NODEJS_verbose = $(AM_V_NODEJS)
    397416__bench_v_JAVAC_verbose = $(AM_V_JAVAC)
    398 __bench_v_UPP_verbose = $(AM_V_UPP)
    399417TOOLSDIR = ${abs_top_builddir}/tools/
    400418REPEAT = ${abs_top_builddir}/tools/repeat
    401419STATS = ${abs_top_srcdir}/tools/stat.py
    402420# NEED AT LEAST 4 DATA VALUES FOR BENCHMARKS BECAUSE THE MAX AND MIN VALUES ARE REMOVED
    403 repeats = 5 # 31 for benchmarks
     421repeats = 13 # 31 for benchmarks
    404422arch = x64
    405423skipcompile = no
     
    407425PRINT_FORMAT = %20s: #Comments needed for spacing
    408426dummy_SOURCES = dummyC.c dummyCXX.cpp
     427basic_loop_DURATION = 15000000000
     428basic_function_DURATION = 10000000000
     429basic_tls_fetch_add_DURATION = 10000000000
     430basic_DURATION = 250000000
     431ctxswitch_pthread_DURATION = 25000000
     432ctxswitch_rust_thread_DURATION = $(ctxswitch_pthread_DURATION)
     433ctxswitch_cfa_generator_DURATION = 5000000000
     434ctxswitch_nodejs_await_DURATION = 5000000
     435ctxswitch_DURATION = 100000000
     436
     437#mutex_java_DURATION = 10000000
     438mutex_DURATION = 50000000
     439schedint_pthread_DURATION = 1000000
     440schedint_java_DURATION = $(schedint_pthread_DURATION)
     441schedint_rust_DURATION = $(schedint_pthread_DURATION)
     442schedint_DURATION = 10000000
     443schedext_DURATION = 10000000
     444creation_pthread_DURATION = 250000
     445creation_rust_thread_DURATION = ${creation_pthread_DURATION}
     446creation_java_thread_DURATION = ${creation_pthread_DURATION}
     447creation_cfa_coroutine_DURATION = 100000000
     448creation_cfa_coroutine_eager_DURATION = 10000000
     449creation_cfa_generator_DURATION = 1000000000
     450creation_upp_coroutine_DURATION = ${creation_cfa_coroutine_eager_DURATION}
     451creation_cfa_thread_DURATION = 10000000
     452creation_upp_thread_DURATION = ${creation_cfa_thread_DURATION}
     453creation_DURATION = 10000000
    409454FIX_NEW_LINES = cat $@ | tr "\n" "\t" | sed -r 's/\t,/,/' | tr "\t" "\n" > $@
    410 CTXSWITCH_DEPEND = loop.run function.run fetch_add.run ttst_lock.run \
    411         tls-fetch_add.run ctxswitch-pthread.run \
    412         ctxswitch-cfa_generator.run ctxswitch-cfa_coroutine.run \
    413         ctxswitch-cfa_thread.run ctxswitch-cfa_thread2.run \
    414         ctxswitch-upp_coroutine.run ctxswitch-upp_thread.run \
    415         ctxswitch-goroutine.run ctxswitch-java_thread.run \
    416         $(am__append_1)
     455BASIC_DEPEND = \
     456        basic-loop.run                          \
     457        basic-function.run                      \
     458        basic-fetch_add.run                     \
     459        basic-ttst_lock.run                     \
     460        basic-tls-fetch_add.run
     461
     462CTXSWITCH_DEPEND = ctxswitch-cfa_generator.run \
     463        ctxswitch-cfa_coroutine.run ctxswitch-cfa_thread.run \
     464        ctxswitch-cfa_thread2.run ctxswitch-upp_coroutine.run \
     465        ctxswitch-upp_thread.run ctxswitch-python_coroutine.run \
     466        ctxswitch-nodejs_coroutine.run ctxswitch-nodejs_await.run \
     467        ctxswitch-goroutine_thread.run ctxswitch-rust_thread.run \
     468        ctxswitch-nodejs_coroutine.run ctxswitch-java_thread.run \
     469        ctxswitch-pthread.run $(am__append_1)
    417470testdir = $(top_srcdir)/tests
    418471all: all-am
     
    733786
    734787dummyC.c:
    735         @echo "int main() { return 0; }" > ${@}
     788        echo "int main() { return 0; }" > ${@}
    736789
    737790dummyCXX.cpp:
    738         @echo "int main() { return 0; }" > ${@}
    739 
     791        echo "int main() { return 0; }" > ${@}
     792
     793.SILENT:                # do not print recipe
    740794.NOTPARALLEL:
    741 .PHONY: compile.csv ctxswitch.csv mutex.csv signal.csv
    742 
    743 all : ctxswitch$(EXEEXT) mutex$(EXEEXT) signal$(EXEEXT) waitfor$(EXEEXT) creation$(EXEEXT)
     795.PHONY: jenkins cleancsv
     796
     797all : basic$(EXEEXT) ctxswitch$(EXEEXT) mutex$(EXEEXT) schedint$(EXEEXT) schedext$(EXEEXT) creation$(EXEEXT)
    744798
    745799%.run : %$(EXEEXT) ${REPEAT}
    746         @rm -f .result.log
    747         @echo "------------------------------------------------------"
    748         @echo $<
    749         @${REPEAT} ${repeats} ./a.out | tee -a .result.log
    750         @${STATS} .result.log
    751         @echo "------------------------------------------------------"
    752         @rm -f a.out .result.log *.class
     800        rm -f .result.log
     801        echo "------------------------------------------------------"
     802        echo $<
     803        ${REPEAT} ${repeats} -- ./a.out\
     804                $(if ${$(subst -,_,$(basename $@))_DURATION},\
     805                        ${$(subst -,_,$(basename $@))_DURATION},\
     806                        ${$(firstword $(subst -, ,$(basename $@)))_DURATION}) | tee -a .result.log
     807        ${STATS} .result.log
     808        echo "------------------------------------------------------"
     809        rm -f a.out .result.log *.class
     810
     811#       ${REPEAT} ${repeats} -- /usr/bin/time -f "%Uu %Ss %Er %Mkb" ./a.out
    753812
    754813%.runquiet :
    755         @+make $(basename $@) CFLAGS="-w" __quiet=quiet
    756         @taskset -c 1 ./a.out
    757         @rm -f a.out
     814        +make $(basename $@) CFLAGS="-w" __quiet=quiet
     815        taskset -c 1 ./a.out
     816        rm -f a.out
    758817
    759818%.make :
    760         @printf "${PRINT_FORMAT}" $(basename $(subst compile-,,$@))
    761         @+/usr/bin/time -f ${TIME_FORMAT} make $(basename $@) 2>&1
     819        printf "${PRINT_FORMAT}" $(basename $(subst compile-,,$@))
     820        +/usr/bin/time -f ${TIME_FORMAT} make $(basename $@) 2>&1
    762821
    763822${REPEAT} :
    764         @+make -C ${abs_top_builddir}/tools repeat
    765 
    766 jenkins$(EXEEXT):
     823        +make -C ${abs_top_builddir}/tools repeat
     824
     825cleancsv:
     826        rm -f compile.csv basic.csv ctxswitch.csv mutex.csv scheduling.csv
     827
     828jenkins$(EXEEXT): cleancsv
    767829@DOifskipcompile@
    768         @+make compile.csv
    769         @-+make compile.diff.csv
     830        +make compile.csv
     831        -+make compile.diff.csv
    770832@DOendif@
    771         @+make ctxswitch.csv
    772         @-+make ctxswitch.diff.csv
    773         @+make mutex.csv
    774         @-+make mutex.diff.csv
    775         @+make signal.csv
    776         @-+make signal.diff.csv
     833        +make ctxswitch.csv
     834        -+make ctxswitch.diff.csv
     835        +make mutex.csv
     836        -+make mutex.diff.csv
     837        +make scheduling.csv
     838        -+make scheduling.diff.csv
    777839@DOifskipcompile@
    778840        cat compile.csv
     
    783845        cat mutex.csv
    784846        -cat mutex.diff.csv
    785         cat signal.csv
    786         -cat signal.diff.csv
     847        cat scheduling.csv
     848        -cat scheduling.diff.csv
    787849
    788850compile.csv:
    789         @echo "array,attributes,empty,expression,io,monitor,operators,typeof" > $@
    790         @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-array.make >> $@
    791         @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-attributes.make >> $@
    792         @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-empty.make >> $@
    793         @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-expression.make >> $@
    794         @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-io.make >> $@
    795         @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-monitor.make >> $@
    796         @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-operators.make >> $@
    797         @+make TIME_FORMAT='%e' PRINT_FORMAT='' compile-typeof.make >> $@
    798         @$(srcdir)/fixcsv.sh $@
     851        echo "building $@"
     852        echo "array,attributes,empty,expression,io,monitor,operators,typeof" > $@
     853        +make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-array.make >> $@
     854        +make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-attributes.make >> $@
     855        +make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-empty.make >> $@
     856        +make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-expression.make >> $@
     857        +make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-io.make >> $@
     858        +make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-monitor.make >> $@
     859        +make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-operators.make >> $@
     860        +make TIME_FORMAT='%e' PRINT_FORMAT='' compile-typeof.make >> $@
     861        $(srcdir)/fixcsv.sh $@
    799862
    800863ctxswitch.csv:
    801         @echo "generator,coroutine,thread" > $@
    802         @+make ctxswitch-cfa_generator.runquiet >> $@ && echo -n ',' >> $@
    803         @+make ctxswitch-cfa_coroutine.runquiet >> $@ && echo -n ',' >> $@
    804         @+make ctxswitch-cfa_thread.runquiet >> $@
    805         @$(srcdir)/fixcsv.sh $@
     864        echo "building $@"
     865        echo "generator,coroutine,thread" > $@
     866        +make ctxswitch-cfa_generator.runquiet >> $@ && echo -n ',' >> $@
     867        +make ctxswitch-cfa_coroutine.runquiet >> $@ && echo -n ',' >> $@
     868        +make ctxswitch-cfa_thread.runquiet >> $@
     869        $(srcdir)/fixcsv.sh $@
    806870
    807871mutex.csv:
    808         @echo "1-monitor,2-monitor" > $@
    809         @+make mutex-cfa1.runquiet >> $@ && echo -n ',' >> $@
    810         @+make mutex-cfa2.runquiet >> $@
    811         @$(srcdir)/fixcsv.sh $@
    812 
    813 signal.csv:
    814         @echo "signal-1,signal-2,waitfor-1,waitfor-2" > $@
    815         @+make signal-cfa1.runquiet >> $@ && echo -n ',' >> $@
    816         @+make signal-cfa2.runquiet >> $@ && echo -n ',' >> $@
    817         @+make waitfor-cfa1.runquiet >> $@ && echo -n ',' >> $@
    818         @+make waitfor-cfa2.runquiet >> $@
    819         @$(srcdir)/fixcsv.sh $@
     872        echo "building $@"
     873        echo "1-monitor,2-monitor" > $@
     874        +make mutex-cfa1.runquiet >> $@ && echo -n ',' >> $@
     875        +make mutex-cfa2.runquiet >> $@
     876        $(srcdir)/fixcsv.sh $@
     877
     878scheduling.csv:
     879        echo "building $@"
     880        echo "schedint-1,schedint-2,schedext-1,schedext-2" > $@
     881        +make schedint-cfa1.runquiet >> $@ && echo -n ',' >> $@
     882        +make schedint-cfa2.runquiet >> $@ && echo -n ',' >> $@
     883        +make schedext-cfa1.runquiet >> $@ && echo -n ',' >> $@
     884        +make schedext-cfa2.runquiet >> $@
     885        $(srcdir)/fixcsv.sh $@
    820886
    821887%.diff.csv: %.csv
    822         @test -e $(srcdir)/baselines/$(arch)/$< || (echo "Error : Missing baseline for ${<}" && false)
    823         @$(srcdir)/baselines/calc.py $(srcdir)/baselines/$(arch)/$(<) $(<) > $@
    824 
    825 loop$(EXEEXT):
    826         $(BENCH_V_CC)$(COMPILE) -DBENCH_N=5000000000 $(srcdir)/loop.c
    827 
    828 function$(EXEEXT):
    829         $(BENCH_V_CC)$(COMPILE) -DBENCH_N=5000000000 $(srcdir)/function.c
    830 
    831 fetch_add$(EXEEXT):
    832         $(BENCH_V_CC)$(COMPILE) -DBENCH_N=500000000  $(srcdir)/fetch_add.c
    833 
    834 ttst_lock$(EXEEXT):
    835         $(BENCH_V_CC)$(COMPILE) -DBENCH_N=500000000  $(srcdir)/ttst_lock.c
    836 
    837 tls-fetch_add$(EXEEXT):
    838         $(BENCH_V_CC)$(COMPILE) -DBENCH_N=500000000  $(srcdir)/tls-fetch_add.c
     888        test -e $(srcdir)/baselines/$(arch)/$< || (echo "Error : Missing baseline for ${<}" && false)
     889        $(srcdir)/baselines/calc.py $(srcdir)/baselines/$(arch)/$(<) $(<) > $@
     890
     891basic-loop$(EXEEXT):
     892        $(BENCH_V_CC)$(COMPILE) $(srcdir)/basic/loop.c
     893
     894basic-function$(EXEEXT):
     895        $(BENCH_V_CC)$(COMPILE) $(srcdir)/basic/function.c
     896
     897basic-fetch_add$(EXEEXT):
     898        $(BENCH_V_CC)$(COMPILE) $(srcdir)/basic/fetch_add.c
     899
     900basic-ttst_lock$(EXEEXT):
     901        $(BENCH_V_CC)$(COMPILE) $(srcdir)/basic/ttst_lock.c
     902
     903basic-tls-fetch_add$(EXEEXT):
     904        $(BENCH_V_CC)$(COMPILE) $(srcdir)/basic/tls_fetch_add.c
     905
     906basic$(EXEEXT): $(BASIC_DEPEND)
    839907
    840908@WITH_LIBFIBRE_TRUE@ctxswitch-kos_fibre$(EXEEXT):
     
    846914ctxswitch$(EXEEXT): $(CTXSWITCH_DEPEND)
    847915
    848 ctxswitch-pthread$(EXEEXT):
    849         $(BENCH_V_CC)$(COMPILE)    -DBENCH_N=50000000 $(srcdir)/ctxswitch/pthreads.c
    850 
    851916ctxswitch-cfa_generator$(EXEEXT):
    852         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_gen.cfa
     917        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/ctxswitch/cfa_gen.cfa
    853918
    854919ctxswitch-cfa_coroutine$(EXEEXT):
    855         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_cor.cfa
     920        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/ctxswitch/cfa_cor.cfa
    856921
    857922ctxswitch-cfa_thread$(EXEEXT):
    858         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_thrd.cfa
     923        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/ctxswitch/cfa_thrd.cfa
    859924
    860925ctxswitch-cfa_thread2$(EXEEXT):
    861         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_thrd2.cfa
     926        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/ctxswitch/cfa_thrd2.cfa
    862927
    863928ctxswitch-upp_coroutine$(EXEEXT):
    864         $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/upp_cor.cc
     929        $(BENCH_V_UPP)$(UPPCOMPILE) $(srcdir)/ctxswitch/upp_cor.cc
    865930
    866931ctxswitch-upp_thread$(EXEEXT):
    867         $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/upp_thrd.cc
    868 
    869 ctxswitch-goroutine$(EXEEXT):
     932        $(BENCH_V_UPP)$(UPPCOMPILE) $(srcdir)/ctxswitch/upp_thrd.cc
     933
     934ctxswitch-python_coroutine$(EXEEXT):
     935        $(BENCH_V_PY)echo "#!/bin/sh" > a.out
     936        echo "python3.7 $(srcdir)/ctxswitch/python_cor.py" >> a.out
     937        chmod a+x a.out
     938
     939ctxswitch-nodejs_coroutine$(EXEEXT):
     940        $(BENCH_V_NODEJS)echo "#!/bin/sh" > a.out
     941        echo "nodejs $(srcdir)/ctxswitch/node_cor.js" >> a.out
     942        chmod a+x a.out
     943
     944ctxswitch-nodejs_await$(EXEEXT):
     945        $(BENCH_V_NODEJS)echo "#!/bin/sh" > a.out
     946        echo "nodejs $(srcdir)/ctxswitch/node_await.js" >> a.out
     947        chmod a+x a.out
     948
     949ctxswitch-goroutine_thread$(EXEEXT):
    870950        $(BENCH_V_GOC)go build -o a.out $(srcdir)/ctxswitch/goroutine.go
     951
     952ctxswitch-rust_thread$(EXEEXT):
     953        $(BENCH_V_RUSTC)rustc -C opt-level=3 -o a.out $(srcdir)/ctxswitch/rust_thrd.rs
    871954
    872955ctxswitch-java_thread$(EXEEXT):
    873956        $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/ctxswitch/JavaThread.java
    874         @echo "#!/bin/sh" > a.out
    875         @echo "java JavaThread" >> a.out
    876         @chmod a+x a.out
    877 
    878 mutex$(EXEEXT) :\
    879         loop.run                        \
    880         function.run            \
    881         fetch_add.run           \
    882         mutex-pthread_lock.run  \
    883         mutex-upp.run           \
     957        echo "#!/bin/sh" > a.out
     958        echo "java JavaThread" >> a.out
     959        chmod a+x a.out
     960
     961ctxswitch-pthread$(EXEEXT):
     962        $(BENCH_V_CC)$(COMPILE) $(srcdir)/ctxswitch/pthreads.c
     963
     964mutex$(EXEEXT) :                \
    884965        mutex-cfa1.run          \
    885966        mutex-cfa2.run          \
    886967        mutex-cfa4.run          \
    887         mutex-java_thread.run
    888 
    889 mutex-pthread_lock$(EXEEXT):
    890         $(BENCH_V_CC)$(COMPILE)    -DBENCH_N=50000000 $(srcdir)/mutex/pthreads.c
     968        mutex-upp.run           \
     969        mutex-go.run            \
     970        mutex-rust.run          \
     971        mutex-java.run          \
     972        mutex-pthread.run
     973
     974mutex-pthread$(EXEEXT):
     975        $(BENCH_V_CC)$(COMPILE) $(srcdir)/mutex/pthreads.c
     976
     977mutex-cfa1$(EXEEXT):
     978        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/mutex/cfa1.cfa
     979
     980mutex-cfa2$(EXEEXT):
     981        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/mutex/cfa2.cfa
     982
     983mutex-cfa4$(EXEEXT):
     984        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/mutex/cfa4.cfa
    891985
    892986mutex-upp$(EXEEXT):
    893         $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/mutex/upp.cc
    894 
    895 mutex-cfa1$(EXEEXT):
    896         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=5000000  $(srcdir)/mutex/cfa1.cfa
    897 
    898 mutex-cfa2$(EXEEXT):
    899         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=5000000  $(srcdir)/mutex/cfa2.cfa
    900 
    901 mutex-cfa4$(EXEEXT):
    902         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=5000000  $(srcdir)/mutex/cfa4.cfa
    903 
    904 mutex-java_thread$(EXEEXT):
     987        $(BENCH_V_UPP)$(UPPCOMPILE) $(srcdir)/mutex/upp.cc
     988
     989mutex-go$(EXEEXT):
     990        $(BENCH_V_GOC)go build -o a.out $(srcdir)/mutex/goroutine.go
     991
     992mutex-rust$(EXEEXT):
     993        $(BENCH_V_RUSTC)rustc -C opt-level=3 -o a.out $(srcdir)/mutex/rust.rs
     994
     995mutex-java$(EXEEXT):
    905996        $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/mutex/JavaThread.java
    906         @echo "#!/bin/sh" > a.out
    907         @echo "java JavaThread" >> a.out
    908         @chmod a+x a.out
    909 
    910 signal$(EXEEXT) :\
    911         signal-pthread_cond.run \
    912         signal-upp.run          \
    913         signal-cfa1.run         \
    914         signal-cfa2.run         \
    915         signal-cfa4.run         \
    916         signal-java_thread.run
    917 
    918 signal-pthread_cond$(EXEEXT):
    919         $(BENCH_V_CC)$(COMPILE)    -DBENCH_N=500000  $(srcdir)/schedint/pthreads.c
    920 
    921 signal-upp$(EXEEXT):
    922         $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=5000000 $(srcdir)/schedint/upp.cc
    923 
    924 signal-cfa1$(EXEEXT):
    925         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedint/cfa1.cfa
    926 
    927 signal-cfa2$(EXEEXT):
    928         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedint/cfa2.cfa
    929 
    930 signal-cfa4$(EXEEXT):
    931         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedint/cfa4.cfa
    932 
    933 signal-java_thread$(EXEEXT):
     997        echo "#!/bin/sh" > a.out
     998        echo "java JavaThread" >> a.out
     999        chmod a+x a.out
     1000
     1001schedint$(EXEEXT) :             \
     1002        schedint-cfa1.run       \
     1003        schedint-cfa2.run       \
     1004        schedint-cfa4.run       \
     1005        schedint-upp.run        \
     1006        schedint-rust.run       \
     1007        schedint-java.run       \
     1008        schedint-pthread.run
     1009
     1010schedint-cfa1$(EXEEXT):
     1011        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/schedint/cfa1.cfa
     1012
     1013schedint-cfa2$(EXEEXT):
     1014        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/schedint/cfa2.cfa
     1015
     1016schedint-cfa4$(EXEEXT):
     1017        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/schedint/cfa4.cfa
     1018
     1019schedint-upp$(EXEEXT):
     1020        $(BENCH_V_UPP)$(UPPCOMPILE) $(srcdir)/schedint/upp.cc
     1021
     1022schedint-rust$(EXEEXT):
     1023        $(BENCH_V_RUSTC)rustc -C opt-level=3 -o a.out $(srcdir)/schedint/rust.rs
     1024
     1025schedint-java$(EXEEXT):
    9341026        $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/schedint/JavaThread.java
    935         @echo "#!/bin/sh" > a.out
    936         @echo "java JavaThread" >> a.out
    937         @chmod a+x a.out
    938 
    939 waitfor$(EXEEXT) :\
    940         waitfor-upp.run         \
    941         waitfor-cfa1.run                \
    942         waitfor-cfa2.run                \
    943         waitfor-cfa4.run
    944 
    945 waitfor-upp$(EXEEXT):
    946         $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=5000000 $(srcdir)/schedext/upp.cc
    947 
    948 waitfor-cfa1$(EXEEXT):
    949         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedext/cfa1.cfa
    950 
    951 waitfor-cfa2$(EXEEXT):
    952         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedext/cfa2.cfa
    953 
    954 waitfor-cfa4$(EXEEXT):
    955         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedext/cfa4.cfa
    956 
    957 creation$(EXEEXT) :\
    958         creation-pthread.run                    \
     1027        echo "#!/bin/sh" > a.out
     1028        echo "java JavaThread" >> a.out
     1029        chmod a+x a.out
     1030
     1031schedint-pthread$(EXEEXT):
     1032        $(BENCH_V_CC)$(COMPILE) $(srcdir)/schedint/pthreads.c
     1033
     1034schedext$(EXEEXT) :             \
     1035        schedext-cfa1.run       \
     1036        schedext-cfa2.run       \
     1037        schedext-cfa4.run       \
     1038        schedext-upp.run        \
     1039        schedext-goroutine.run
     1040
     1041schedext-cfa1$(EXEEXT):
     1042        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/schedext/cfa1.cfa
     1043
     1044schedext-cfa2$(EXEEXT):
     1045        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/schedext/cfa2.cfa
     1046
     1047schedext-cfa4$(EXEEXT):
     1048        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/schedext/cfa4.cfa
     1049
     1050schedext-upp$(EXEEXT):
     1051        $(BENCH_V_UPP)$(UPPCOMPILE) $(srcdir)/schedext/upp.cc
     1052
     1053schedext-goroutine$(EXEEXT):
     1054        $(BENCH_V_GOC)go build -o a.out $(srcdir)/schedext/goroutine.go
     1055
     1056creation$(EXEEXT) :                             \
     1057        creation-cfa_generator.run              \
    9591058        creation-cfa_coroutine.run              \
    9601059        creation-cfa_coroutine_eager.run        \
     
    9621061        creation-upp_coroutine.run              \
    9631062        creation-upp_thread.run                 \
    964         creation-goroutine.run                  \
    965         creation-java_thread.run
     1063        creation-python_coroutine.run           \
     1064        creation-nodejs_coroutine.run           \
     1065        creation-goroutine_thread.run           \
     1066        creation-rust_thread.run                \
     1067        creation-java_thread.run                \
     1068        creation-pthread.run
     1069
     1070creation-cfa_generator$(EXEEXT):
     1071        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/creation/cfa_gen.cfa
    9661072
    9671073creation-cfa_coroutine$(EXEEXT):
    968         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=10000000 $(srcdir)/creation/cfa_cor.cfa
     1074        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/creation/cfa_cor.cfa
    9691075
    9701076creation-cfa_coroutine_eager$(EXEEXT):
    971         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=10000000 $(srcdir)/creation/cfa_cor.cfa  -DEAGER
     1077        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/creation/cfa_cor.cfa  -DEAGER
    9721078
    9731079creation-cfa_thread$(EXEEXT):
    974         $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=10000000 $(srcdir)/creation/cfa_thrd.cfa
     1080        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/creation/cfa_thrd.cfa
    9751081
    9761082creation-upp_coroutine$(EXEEXT):
    977         $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/creation/upp_cor.cc
     1083        $(BENCH_V_UPP)$(UPPCOMPILE) $(srcdir)/creation/upp_cor.cc
    9781084
    9791085creation-upp_thread$(EXEEXT):
    980         $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/creation/upp_thrd.cc
    981 
    982 creation-pthread$(EXEEXT):
    983         $(BENCH_V_CC)$(COMPILE)    -DBENCH_N=250000   $(srcdir)/creation/pthreads.c
    984 
    985 creation-goroutine$(EXEEXT):
     1086        $(BENCH_V_UPP)$(UPPCOMPILE) $(srcdir)/creation/upp_thrd.cc
     1087
     1088creation-python_coroutine$(EXEEXT):
     1089        $(BENCH_V_PY)echo "#!/bin/sh" > a.out
     1090        echo "python3.7 $(srcdir)/creation/python_cor.py" >> a.out
     1091        chmod a+x a.out
     1092
     1093creation-nodejs_coroutine$(EXEEXT):
     1094        $(BENCH_V_NODEJS)echo "#!/bin/sh" > a.out
     1095        echo "nodejs $(srcdir)/creation/node_cor.js" >> a.out
     1096        chmod a+x a.out
     1097
     1098creation-goroutine_thread$(EXEEXT):
    9861099        $(BENCH_V_GOC)go build -o a.out $(srcdir)/creation/goroutine.go
     1100
     1101creation-rust_thread$(EXEEXT):
     1102        $(BENCH_V_RUSTC)rustc -C opt-level=3 -o a.out $(srcdir)/creation/rust_thrd.rs
    9871103
    9881104creation-java_thread$(EXEEXT):
    9891105        $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/creation/JavaThread.java
    990         @echo "#!/bin/sh" > a.out
    991         @echo "java JavaThread" >> a.out
    992         @chmod a+x a.out
    993 
    994 compile$(EXEEXT) :\
     1106        echo "#!/bin/sh" > a.out
     1107        echo "java JavaThread" >> a.out
     1108        chmod a+x a.out
     1109
     1110creation-pthread$(EXEEXT):
     1111        $(BENCH_V_CC)$(COMPILE) $(srcdir)/creation/pthreads.c
     1112
     1113compile$(EXEEXT) :              \
    9951114        compile-array.make      \
    9961115        compile-attributes.make \
     
    10031122
    10041123compile-array$(EXEEXT):
    1005         @$(CFACOMPILE) -fsyntax-only -w $(testdir)/array.cfa
     1124        $(CFACOMPILE) -fsyntax-only -w $(testdir)/array.cfa
    10061125
    10071126compile-attributes$(EXEEXT):
    1008         @$(CFACOMPILE) -fsyntax-only -w $(testdir)/attributes.cfa
     1127        $(CFACOMPILE) -fsyntax-only -w $(testdir)/attributes.cfa
    10091128
    10101129compile-empty$(EXEEXT):
    1011         @$(CFACOMPILE) -fsyntax-only -w $(srcdir)/compile/empty.cfa
     1130        $(CFACOMPILE) -fsyntax-only -w $(srcdir)/compile/empty.cfa
    10121131
    10131132compile-expression$(EXEEXT):
    1014         @$(CFACOMPILE) -fsyntax-only -w $(testdir)/expression.cfa
     1133        $(CFACOMPILE) -fsyntax-only -w $(testdir)/expression.cfa
    10151134
    10161135compile-io$(EXEEXT):
    1017         @$(CFACOMPILE) -fsyntax-only -w $(testdir)/io1.cfa
     1136        $(CFACOMPILE) -fsyntax-only -w $(testdir)/io1.cfa
    10181137
    10191138compile-monitor$(EXEEXT):
    1020         @$(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/monitor.cfa
     1139        $(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/monitor.cfa
    10211140
    10221141compile-operators$(EXEEXT):
    1023         @$(CFACOMPILE) -fsyntax-only -w $(testdir)/operators.cfa
     1142        $(CFACOMPILE) -fsyntax-only -w $(testdir)/operators.cfa
    10241143
    10251144compile-thread$(EXEEXT):
    1026         @$(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/thread.cfa
     1145        $(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/thread.cfa
    10271146
    10281147compile-typeof$(EXEEXT):
    1029         @$(CFACOMPILE) -fsyntax-only -w $(testdir)/typeof.cfa
     1148        $(CFACOMPILE) -fsyntax-only -w $(testdir)/typeof.cfa
     1149
     1150size$(EXEEXT) : size-cfa.runquiet
     1151
     1152size-cfa$(EXEEXT):
     1153        $(BENCH_V_CFA)$(CFACOMPILE) $(srcdir)/size/size.cfa
    10301154
    10311155# Tell versions [3.59,3.63) of GNU make to not export all variables.
  • benchmark/bench.h

    r71d6bd8 r7030dab  
    55#endif
    66        #include <stdlib.h>
    7         #include <unistd.h>                                     // sysconf
     7        #include <stdint.h>                             // uint64_t
     8        #include <unistd.h>                             // sysconf
    89#if ! defined(__cforall)
    910        #include <time.h>
     
    1516
    1617
    17 static inline unsigned long long int bench_time() {
    18     struct timespec ts;
    19     clock_gettime(
    20 #if defined( __linux__ )
    21          CLOCK_THREAD_CPUTIME_ID,
    22 #elif defined( __freebsd__ )
    23          CLOCK_PROF,
    24 #elif defined( __solaris__ )
    25          CLOCK_HIGHRES,
    26 #else
    27     #error uC++ : internal error, unsupported architecture
    28 #endif
    29          &ts );
    30     return 1000000000LL * ts.tv_sec + ts.tv_nsec;
    31 } // Time
     18static inline uint64_t bench_time() {
     19        struct timespec ts;
     20        clock_gettime( CLOCK_THREAD_CPUTIME_ID, &ts );
     21        return 1000000000LL * ts.tv_sec + ts.tv_nsec;
     22} // bench_time
    3223
    3324#ifndef BENCH_N
    34 #define BENCH_N 500 //10000000
     25#define BENCH_N 10000000
    3526#endif
    3627
     28size_t times = BENCH_N;
     29
     30#define BENCH_START()                           \
     31        if ( argc > 2 ) exit( EXIT_FAILURE );   \
     32        if ( argc == 2 ) {                      \
     33                times = atoi( argv[1] );        \
     34        }
     35
    3736#define BENCH(statement, output)                \
    38         size_t n = BENCH_N;                     \
    39         if( argc > 2 ) return 1;                \
    40         if( argc == 2 ) {                               \
    41                 n = atoi(argv[1]);              \
    42         }                                               \
    43         long long int StartTime, EndTime;       \
     37        uint64_t StartTime, EndTime;            \
    4438        StartTime = bench_time();               \
    45         statement;                                      \
     39        statement;                              \
    4640        EndTime = bench_time();                 \
    47         double output =         \
    48             (double)( EndTime - StartTime ) / n;
     41        double output = (double)( EndTime - StartTime ) / times;
     42
    4943
    5044#if defined(__cforall)
     
    5347}
    5448#endif
     49#if defined(__U_CPLUSPLUS__)
     50unsigned int uDefaultPreemption() {
     51        return 0;
     52}
     53#endif
  • benchmark/creation/JavaThread.java

    r71d6bd8 r7030dab  
    2626        static int x = 2;
    2727
    28         static private final int NoOfTimes = Integer.parseInt("10000") ;
     28        static private int times = Integer.parseInt("10000") ;
    2929
    3030        public static class MyThread extends Thread {
     
    3333        }
    3434        public static void helper() throws InterruptedException {
    35                 for(int i = 1; i <= NoOfTimes; i += 1) {
     35                for(int i = 1; i <= times; i += 1) {
    3636                        MyThread m = new MyThread();
    3737                        x = nextRandom( x );
     
    4444                helper();
    4545                long end = System.nanoTime();
    46                 System.out.println( (end - start) / NoOfTimes );
     46                System.out.println( (end - start) / times );
    4747        }
    4848        public static void main(String[] args) throws InterruptedException {
    49                 for (int n = Integer.parseInt("5"); --n >= 0 ; ) {
     49                if ( args.length > 2 ) System.exit( 1 );
     50                if ( args.length == 2 ) { times = Integer.parseInt(args[1]); }
     51
     52                for (int i = Integer.parseInt("5"); --i >= 0 ; ) {
    5053                        InnerMain();
    51                         Thread.sleep(2000);     // 2 seconds
     54                        Thread.sleep(2000);             // 2 seconds
    5255                        x = nextRandom(x);
    5356                }
     
    5558        }
    5659}
     60
     61// Local Variables: //
     62// tab-width: 4 //
     63// End: //
  • benchmark/creation/cfa_cor.cfa

    r71d6bd8 r7030dab  
    55
    66coroutine MyCoroutine {};
    7 void ?{} (MyCoroutine & this) {
     7void ?{}( MyCoroutine & this ) {
    88#ifdef EAGER
    9         resume(this);
     9        resume( this );
    1010#endif
    1111}
    12 void main(MyCoroutine &) {}
     12void main( MyCoroutine & ) {}
    1313
    14 int main(int argc, char* argv[]) {
     14int main( int argc, char * argv[] ) {
     15        BENCH_START()
    1516        BENCH(
    16                 for ( i; n ) {
    17                         MyCoroutine m;
     17                for ( times ) {
     18                        MyCoroutine c;
    1819                },
    1920                result
    2021        )
     22        printf( "%g\n", result );
     23}
    2124
    22         printf("%g\n", result);
    23 }
     25// Local Variables: //
     26// tab-width: 4 //
     27// End: //
  • benchmark/creation/cfa_thrd.cfa

    r71d6bd8 r7030dab  
    77void main(MyThread &) {}
    88
    9 int main(int argc, char* argv[]) {
     9int main( int argc, char * argv[] ) {
     10        BENCH_START()
    1011        BENCH(
    11                 for ( i; n ) {
     12                for ( times ) {
    1213                        MyThread m;
    1314                },
    1415                result
    1516        )
     17        printf( "%g\n", result );
     18}
    1619
    17         printf("%g\n", result);
    18 }
     20// Local Variables: //
     21// tab-width: 4 //
     22// End: //
  • benchmark/creation/goroutine.go

    r71d6bd8 r7030dab  
    22
    33import (
    4     "fmt"
    5     "time"
     4        "fmt"
     5        "time"
     6        "os"
     7        "strconv"
    68)
    79
     
    1719
    1820func main() {
    19         const NoOfTimes = 500000
     21        var times int = 10000000
     22        if len( os.Args ) > 2 { os.Exit( 1 ) }
     23        if len( os.Args ) == 2 { times, _ = strconv.Atoi(os.Args[1]) }
     24
    2025        start := time.Now()
    21         for i := 1; i <= NoOfTimes; i += 1 {
     26        for i := 1; i <= times; i += 1 {
    2227                go noop()               // creation
     28                <- shake                // wait for completion
    2329        }
    2430        end := time.Now()
    25         fmt.Printf("%d\n", end.Sub(start) / time.Duration(NoOfTimes))
    26         <- shake
     31        fmt.Printf( "%d\n", end.Sub(start) / time.Duration(times) )
    2732}
     33
     34// Local Variables: //
     35// tab-width: 4 //
     36// End: //
  • benchmark/creation/pthreads.c

    r71d6bd8 r7030dab  
    44#include "bench.h"
    55
    6 static void *foo(void *arg) {
     6static void * foo(void *arg) {
    77    return arg;
    88}
    99
    10 int main(int argc, char* argv[]) {
     10int main( int argc, char * argv[] ) {
     11        BENCH_START()
    1112        BENCH(
    12                 for (size_t i = 0; i < n; i++) {
     13                for (size_t i = 0; i < times; i++) {
    1314                        pthread_t thread;
    1415                        if (pthread_create(&thread, NULL, foo, NULL) < 0) {
     
    1617                                return 1;
    1718                        }
    18 
    1919                        if (pthread_join( thread, NULL) < 0) {
    2020                                perror( "failure" );
     
    2424                result
    2525        )
     26        printf( "%g\n", result );
     27}
    2628
    27         printf("%g\n", result);
    28 }
     29// Local Variables: //
     30// tab-width: 4 //
     31// End: //
  • benchmark/creation/upp_cor.cc

    r71d6bd8 r7030dab  
    55_Coroutine MyCor {
    66        void main() {}
     7  public:
     8        MyCor() { resume(); }
    79};
    810
    9 int main(int argc, char* argv[]) {
     11int main( int argc, char * argv[] ) {
     12        BENCH_START()
    1013        BENCH(
    11                 for (size_t i = 0; i < n; i++) {
     14                for (size_t i = 0; i < times; i++) {
    1215                        MyCor m;
    1316                },
    1417                result
    1518        )
     19        printf( "%g\n", result );
     20}
    1621
    17         printf("%g\n", result);
    18 }
     22// Local Variables: //
     23// tab-width: 4 //
     24// End: //
  • benchmark/creation/upp_thrd.cc

    r71d6bd8 r7030dab  
    77};
    88
    9 int main(int argc, char* argv[]) {
     9int main( int argc, char * argv[] ) {
     10        BENCH_START()
    1011        BENCH(
    11                 for (size_t i = 0; i < n; i++) {
     12                for (size_t i = 0; i < times; i++) {
    1213                        MyThread m;
    1314                },
    1415                result
    1516        )
     17        printf( "%g\n", result );
     18}
    1619
    17         printf("%g\n", result);
    18 }
     20// Local Variables: //
     21// tab-width: 4 //
     22// End: //
  • benchmark/ctxswitch/JavaThread.java

    r71d6bd8 r7030dab  
    2626        static int x = 2;
    2727
    28         static private final int NoOfTimes = Integer.parseInt("1000000") ;
     28        static private int times = Integer.parseInt("100000");
    2929
    3030        public static void helper() {
    31                 for(int i = 1; i <= NoOfTimes; i += 1) {
     31                for(int i = 1; i <= times; i += 1) {
    3232                        Thread.yield();
    3333                }
     
    3737                helper();
    3838                long end = System.nanoTime();
    39                 System.out.println( (end - start) / NoOfTimes );
     39                System.out.println( (end - start) / times );
    4040        }
    4141        public static void main(String[] args) throws InterruptedException {
    42                 for (int n = Integer.parseInt("5"); --n >= 0 ; ) {
     42                if ( args.length > 2 ) System.exit( 1 );
     43                if ( args.length == 2 ) { times = Integer.parseInt(args[1]); }
     44
     45                for (int i = Integer.parseInt("5"); --i >= 0 ; ) {
    4346                        InnerMain();
    44                         Thread.sleep(2000);     // 2 seconds
     47                        Thread.sleep(2000);     // 2 seconds
    4548                        x = nextRandom(x);
    4649                }
     
    4851        }
    4952}
     53
     54// Local Variables: //
     55// tab-width: 4 //
     56// End: //
  • benchmark/ctxswitch/cfa_cor.cfa

    r71d6bd8 r7030dab  
    22#include <thread.hfa>
    33
    4 #include "bench.h"
     4#include "../bench.h"
    55
    6 coroutine GreatSuspender {};
    7 
    8 void ?{}( GreatSuspender & this ) {
    9         prime(this);
    10 }
    11 
    12 void main( __attribute__((unused)) GreatSuspender & this ) {
    13         while( true ) {
    14                 suspend();
     6coroutine C {};
     7void main( __attribute__((unused)) C & ) {
     8        for () {
     9                suspend;
    1510        }
    1611}
    17 
    18 int main(int argc, char* argv[]) {
    19         GreatSuspender s;
    20 
     12int main( int argc, char * argv[] ) {
     13        C c;
     14        BENCH_START()
    2115        BENCH(
    22                 for ( i; n ) {
    23                         resume( s );
     16                for ( times ) {
     17                        resume( c );
    2418                },
    2519                result
    2620        )
     21        printf( "%g\n", result );
     22}
    2723
    28         printf("%g\n", result);
    29 }
     24// Local Variables: //
     25// tab-width: 4 //
     26// End: //
  • benchmark/ctxswitch/cfa_cor_then.cfa

    r71d6bd8 r7030dab  
    66void noOp(void) {}
    77
    8 coroutine GreatSuspender {};
     8coroutine C {} c;
    99
    10 void ?{}( GreatSuspender & this ) {
     10void ?{}( C & this ) {
    1111        prime(this);
    1212}
    1313
    14 void main( __attribute__((unused)) GreatSuspender & this ) {
    15         while( true ) {
     14void main( __attribute__((unused)) C & this ) {
     15        while () {
    1616                suspend_then(noOp);
    1717        }
    1818}
    1919
    20 int main(int argc, char* argv[]) {
    21         GreatSuspender s;
    22 
     20int main( int argc, char * argv[] ) {
     21        BENCH_START()
    2322        BENCH(
    24                 for ( i; n ) {
    25                         resume( s );
     23                for ( times ) {
     24                        resume( c );
    2625                },
    2726                result
    2827        )
     28        printf( "%g\n", result );
     29}
    2930
    30         printf("%g\n", result);
    31 }
     31// Local Variables: //
     32// tab-width: 4 //
     33// End: //
  • benchmark/ctxswitch/cfa_gen.cfa

    r71d6bd8 r7030dab  
    11#include "../bench.h"
    22
    3 typedef struct {
    4         void * next;
    5 } GreatSuspender;
    6 
    7 void comain( GreatSuspender * this ) {
    8     if ( __builtin_expect(this->next != 0, 1) ) goto *(this->next);
    9     this->next = &&s1;
     3generator G {};
     4void main( G & ) {
    105        for () {
    11             return;
    12           s1: ;
     6                suspend;
    137        }
    148}
    159
    16 int main(int argc, char* argv[]) {
    17     GreatSuspender s = { 0 };
    18 
     10int main( int argc, char * argv[] ) {
     11        G g;
     12        BENCH_START()
    1913        BENCH(
    20                 for ( i; n ) {
    21                         comain( &s );
     14                for ( times ) {
     15                        resume( g );
    2216                },
    2317                result
    2418        )
     19        printf( "%g\n", result );
     20}
    2521
    26         printf("%g\n", result);
    27 }
     22// Local Variables: //
     23// tab-width: 4 //
     24// End: //
  • benchmark/ctxswitch/cfa_thrd.cfa

    r71d6bd8 r7030dab  
    33#include "bench.h"
    44
    5 int main(int argc, char* argv[]) {
     5int main( int argc, char * argv[] ) {
     6        BENCH_START()
    67        BENCH(
    7                 for ( i; n ) {
     8                for ( times ) {
    89                        yield();
    910                },
    1011                result
    1112        )
     13        printf( "%g\n", result );
     14}
    1215
    13         printf("%g\n", result);
    14 }
     16// Local Variables: //
     17// tab-width: 4 //
     18// End: //
  • benchmark/ctxswitch/cfa_thrd2.cfa

    r71d6bd8 r7030dab  
    88
    99void main(__attribute__((unused)) Fibre & this) {
    10         while(!done) {
     10        while ( ! done ) {
    1111                yield();
    1212        }
    1313}
    1414
    15 int main(int argc, char* argv[]) {
     15int main( int argc, char * argv[] ) {
     16        BENCH_START()
    1617        Fibre f1;
    1718        BENCH(
    18                 for ( i; n ) {
     19                for ( times ) {
    1920                        yield();
    2021                },
    2122                result
    2223        )
     24        printf( "%g\n", result );
     25        done = true;
     26}
    2327
    24         printf("%g\n", result);
    25         done = true;
    26         return 0;
    27 }
     28// Local Variables: //
     29// tab-width: 4 //
     30// End: //
  • benchmark/ctxswitch/goroutine.go

    r71d6bd8 r7030dab  
    22
    33import (
    4     "fmt"
    5     "runtime"
    6     "time"
     4        "fmt"
     5        "time"
     6        "os"
     7        "strconv"
     8        "runtime"
    79)
    810
     
    2830
    2931func main() {
    30         const NoOfTimes = 10000000
    31         go ContextSwitch( NoOfTimes )           // context switch
     32        var times int = 10000000
     33        if len( os.Args ) > 2 { os.Exit( 1 ) }
     34        if len( os.Args ) == 2 { times, _ = strconv.Atoi(os.Args[1]) }
     35        go ContextSwitch( times )               // context switch
    3236        <- shake
    3337}
     38
     39// Local Variables: //
     40// tab-width: 4 //
     41// End: //
  • benchmark/ctxswitch/kos_fibre.cpp

    r71d6bd8 r7030dab  
    33#include "bench.h"
    44
    5 int main(int argc, char* argv[]) {
     5int main( int argc, char * argv[] ) {
     6        BENCH_START()
    67        BENCH(
    7                 for (size_t i = 0; i < n; i++) {
     8                for (size_t i = 0; i < times; i++) {
    89                        Fibre::yield();
    910                },
    1011                result
    1112        )
    12         printf("%g\n", result);
    13         return 0;
     13        printf( "%g\n", result );
    1414}
     15
     16// Local Variables: //
     17// tab-width: 4 //
     18// End: //
  • benchmark/ctxswitch/kos_fibre2.cpp

    r71d6bd8 r7030dab  
    1111}
    1212
    13 int main(int argc, char* argv[]) {
     13int main( int argc, char * argv[] ) {
     14        BENCH_START()
    1415        Fibre* f1 = (new Fibre)->run(f1main);
    1516        BENCH(
    16                 for (size_t i = 0; i < n; i++) {
     17                for (size_t i = 0; i < times; i++) {
    1718                        Fibre::yield();
    1819                },
    1920                result
    2021        )
    21         printf("%g\n", result);
     22        printf( "%g\n", result );
    2223        done = true;
    2324        Fibre::yield();
    2425        f1->join();
    25         return 0;
    2626}
     27
     28// Local Variables: //
     29// tab-width: 4 //
     30// End: //
  • benchmark/ctxswitch/pthreads.c

    r71d6bd8 r7030dab  
    66#include "bench.h"
    77
    8 int main(int argc, char* argv[]) {
     8int main( int argc, char * argv[] ) {
     9        BENCH_START()
    910        BENCH(
    10                 for (size_t i = 0; i < n; i++) {
     11                for (size_t i = 0; i < times; i++) {
    1112                        sched_yield();
    1213                },
    1314                result
    1415        )
    15 
    16         printf("%g\n", result);
     16        printf( "%g\n", result );
    1717}
  • benchmark/ctxswitch/upp_cor.cc

    r71d6bd8 r7030dab  
    33#include "bench.h"
    44
    5 _Coroutine GreatSuspender {
    6 public:
    7         GreatSuspender() {
    8                 resume();
    9         }
    10 
    11         void do_resume() {
    12                 resume();
    13         }
    14 private:
     5_Coroutine C {
    156        void main() {
    167                while( true ) {
     
    189                }
    1910        }
    20 };
    21 
    22 int main(int argc, char* argv[]) {
    23         GreatSuspender s;
    24 
     11  public:
     12        void do_resume() {
     13                resume();
     14        }
     15} c;
     16int main( int argc, char * argv[] ) {
     17        BENCH_START()
    2518        BENCH(
    26                 for (size_t i = 0; i < n; i++) {
    27                         s.do_resume();
     19                for (size_t i = 0; i < times; i++) {
     20                        c.do_resume();
    2821                },
    2922                result
    3023        )
     24        printf( "%g\n", result );
     25}
    3126
    32         printf("%g\n", result);
    33 }
     27// Local Variables: //
     28// tab-width: 4 //
     29// End: //
  • benchmark/ctxswitch/upp_thrd.cc

    r71d6bd8 r7030dab  
    33#include "bench.h"
    44
    5 int main(int argc, char* argv[]) {
     5int main( int argc, char * argv[] ) {
     6        BENCH_START()
    67        BENCH(
    7                 for (size_t i = 0; i < n; i++) {
     8                for (size_t i = 0; i < times; i++) {
    89                        uThisTask().yield();
    910                },
    1011                result
    1112        )
     13        printf( "%g\n", result );
     14}
    1215
    13         printf("%g\n", result);
    14 }
     16// Local Variables: //
     17// tab-width: 4 //
     18// End: //
  • benchmark/mutex/JavaThread.java

    r71d6bd8 r7030dab  
    2626        static int x = 2;
    2727
    28         static private final int NoOfTimes = Integer.parseInt("100000000") ;
     28        static private int times = Integer.parseInt("100000000");
    2929
    3030        public synchronized void noop() {
     
    3535                // Inhibit biased locking ...
    3636                x = (j.hashCode() ^ System.identityHashCode(j)) | 1 ;     
    37                 for(int i = 1; i <= NoOfTimes; i += 1) {
     37                for(int i = 1; i <= times; i += 1) {
    3838                        x = nextRandom(x);
    3939                        j.noop();
     
    4444                helper();
    4545                long end = System.nanoTime();
    46                 System.out.println( (end - start) / NoOfTimes );
     46                System.out.println( (end - start) / times );
    4747        }
    4848        public static void main(String[] args) throws InterruptedException {
     49                if ( args.length > 2 ) System.exit( 1 );
     50                if ( args.length == 2 ) { times = Integer.parseInt(args[1]); }
     51
    4952                for (int n = Integer.parseInt("5"); --n >= 0 ; ) {
    5053                        InnerMain();
     
    5558        }
    5659}
     60
     61// Local Variables: //
     62// tab-width: 4 //
     63// End: //
  • benchmark/mutex/cfa1.cfa

    r71d6bd8 r7030dab  
    44#include "bench.h"
    55
    6 monitor M {};
    7 void __attribute__((noinline)) call( M & mutex m ) {}
     6monitor M {} m1;
     7void __attribute__((noinline)) call( M & mutex p1 ) {}
    88
    9 int main(int argc, char* argv[]) {
    10         M m;
     9int main( int argc, char * argv[] ) {
     10        BENCH_START()
    1111        BENCH(
    12                 for ( i; n ) {
    13                         call(m);
     12                for ( times ) {
     13                        call( m1 );
    1414                },
    1515                result
    1616        )
     17        printf( "%g\n", result );
     18}
    1719
    18         printf("%g\n", result);
    19 }
     20// Local Variables: //
     21// tab-width: 4 //
     22// End: //
  • benchmark/mutex/cfa2.cfa

    r71d6bd8 r7030dab  
    44#include "bench.h"
    55
    6 monitor M {};
    7 void __attribute__((noinline)) call( M & mutex m1, M & mutex m2 ) {}
     6monitor M {} m1, m2;
    87
    9 int main(int argc, char* argv[]) {
    10         M m1, m2;
     8void __attribute__((noinline)) call( M & mutex p1, M & mutex p2 ) {}
     9
     10int main( int argc, char * argv[] ) {
     11        BENCH_START()
    1112        BENCH(
    12                 for ( i; n ) {
    13                         call(m1, m2);
     13                for ( times ) {
     14                        call( m1, m2 );
    1415                },
    1516                result
    1617        )
     18        printf( "%g\n", result );
     19}
    1720
    18         printf("%g\n", result);
    19 }
     21// Local Variables: //
     22// tab-width: 4 //
     23// End: //
  • benchmark/mutex/cfa4.cfa

    r71d6bd8 r7030dab  
    55
    66
    7 monitor M {};
    8 void __attribute__((noinline)) call( M & mutex m1, M & mutex m2, M & mutex m3, M & mutex m4 ) {}
     7monitor M {} m1, m2, m3, m4;
     8void __attribute__((noinline)) call( M & mutex p1, M & mutex p2, M & mutex p3, M & mutex p4 ) {}
    99
    10 int main(int argc, char* argv[]) {
    11         M m1, m2, m3, m4;
     10int main( int argc, char * argv[] ) {
     11        BENCH_START()
    1212        BENCH(
    13                 for ( i; n ) {
    14                         call(m1, m2, m3, m4);
     13                for ( times ) {
     14                        call( m1, m2, m3, m4 );
    1515                },
    1616                result
    1717        )
     18        printf( "%g\n", result );
     19}
    1820
    19         printf("%g\n", result);
    20 }
     21// Local Variables: //
     22// tab-width: 4 //
     23// End: //
  • benchmark/mutex/pthreads.c

    r71d6bd8 r7030dab  
    77
    88void __attribute__((noinline)) call() {
    9          pthread_mutex_lock  (&mutex);
    10          pthread_mutex_unlock(&mutex);
     9         pthread_mutex_lock( &mutex );
     10         pthread_mutex_unlock( &mutex );
    1111}
    12 
    13 int main(int argc, char* argv[]) {
     12int main( int argc, char * argv[] ) {
     13        BENCH_START()
    1414        BENCH(
    15                 for (size_t i = 0; i < n; i++) {
     15                for ( size_t i = 0; i < times; i++ ) {
    1616                        call();
    1717                },
    1818                result
    1919        )
     20        printf( "%g\n", result );
     21}
    2022
    21         printf("%g\n", result);
    22 }
     23// Local Variables: //
     24// tab-width: 4 //
     25// End: //
  • benchmark/mutex/upp.cc

    r71d6bd8 r7030dab  
    88};
    99
    10 int main(int argc, char* argv[]) {
     10int main( int argc, char * argv[] ) {
     11        BENCH_START()
    1112        MyMonitor m;
    1213        BENCH(
    13                 for (size_t i = 0; i < n; i++) {
     14                for ( size_t i = 0; i < times; i++ ) {
    1415                        m.call();
    1516                },
    1617                result
    1718        )
     19        printf( "%g\n", result );
     20}
    1821
    19         printf("%g\n", result);
    20 }
     22// Local Variables: //
     23// tab-width: 4 //
     24// End: //
  • benchmark/schedext/cfa1.cfa

    r71d6bd8 r7030dab  
    44#include <stdio.h>
    55
    6 #include "bench.h"
     6#include "../bench.h"
    77
    8 int argc;
    9 char** argv;
    10 volatile int go = 0;
     8monitor M {} m1;
    119
    12 monitor M {};
    13 M m1;
    14 
    15 void __attribute__((noinline)) call( M & mutex a1 ) {}
    16 
    17 int  __attribute__((noinline)) wait( M & mutex a1 ) {
    18         go = 1;
    19         BENCH(
    20                 for ( i; n ) {
    21                         waitfor(call, a1);
    22                 },
    23                 result
    24         )
    25 
    26         printf("%g\n", result);
    27         go = 0;
    28         return 0;
     10void __attribute__((noinline)) call( M & mutex p1 ) {}
     11void __attribute__((noinline)) wait( M & mutex p1 ) {
     12        for ( times ) {
     13                waitfor( call : p1 );
     14        }
    2915}
    3016
    3117thread T {};
    32 void ^?{}( T & mutex this ) {}
    3318void main( T & ) {
    34         while(go == 0) { yield(); }
    35         while(go == 1) { call(m1); }
    36 
     19        BENCH(
     20                for ( times ) { call( m1 ); },
     21                result
     22        )
     23        printf( "%g\n", result );
    3724}
    3825
    39 int main(__attribute__((unused)) int argc, __attribute__((unused)) char* argv[]) {
     26int main( int argc, char * argv[] ) {
     27        BENCH_START()
    4028        T t;
    41         return wait(m1);
     29        wait( m1 );
    4230}
     31
     32// Local Variables: //
     33// tab-width: 4 //
     34// End: //
  • benchmark/schedext/cfa2.cfa

    r71d6bd8 r7030dab  
    44#include <stdio.h>
    55
    6 #include "bench.h"
     6#include "../bench.h"
    77
    8 int argc;
    9 char** argv;
    10 volatile int go = 0;
     8monitor M {} m1, m2;
    119
    12 monitor M {};
    13 M m1, m2;
    14 
    15 void __attribute__((noinline)) call( M & mutex a1, M & mutex a2 ) {}
    16 
    17 int  __attribute__((noinline)) wait( M & mutex a1, M & mutex a2 ) {
    18         go = 1;
     10void __attribute__((noinline)) call( M & mutex p1, M & mutex p2 ) {}
     11void __attribute__((noinline)) wait( M & mutex p1, M & mutex p2 ) {
     12        for ( times ) {
     13                waitfor( call : p1, p2 );
     14        }
     15}
     16thread T {};
     17void main( T & ) {
    1918        BENCH(
    20                 for ( i; n ) {
    21                         waitfor(call, a1, a2);
     19                for ( times ) {
     20                        call( m1, m2 );
    2221                },
    2322                result
    2423        )
    25 
    26         printf("%g\n", result);
    27         go = 0;
    28         return 0;
     24        printf( "%g\n", result );
    2925}
    3026
    31 thread T {};
    32 void ^?{}( T & mutex this ) {}
    33 void main( T & ) {
    34         while(go == 0) { yield(); }
    35         while(go == 1) { call(m1, m2); }
    36 
     27int main( int argc, char * argv[] ) {
     28        BENCH_START()
     29        T t;
     30        wait( m1, m2 );
    3731}
    3832
    39 int main(__attribute__((unused)) int argc, __attribute__((unused)) char* argv[]) {
    40         T t;
    41         return wait(m1, m2);
    42 }
     33// Local Variables: //
     34// tab-width: 4 //
     35// End: //
  • benchmark/schedext/cfa4.cfa

    r71d6bd8 r7030dab  
    44#include <stdio.h>
    55
    6 #include "bench.h"
     6#include "../bench.h"
    77
    8 int argc;
    9 char** argv;
    10 volatile int go = 0;
     8monitor M {} m1, m2, m3, m4;
    119
    12 monitor M {};
    13 M m1, m2, m3, m4;
    14 
    15 void __attribute__((noinline)) call( M & mutex a1, M & mutex a2, M & mutex a3, M & mutex a4 ) {}
    16 
    17 int  __attribute__((noinline)) wait( M & mutex a1, M & mutex a2, M & mutex a3, M & mutex a4 ) {
    18         go = 1;
     10void __attribute__((noinline)) call( M & mutex p1, M & mutex p2, M & mutex p3, M & mutex p4 ) {}
     11void __attribute__((noinline)) wait( M & mutex p1, M & mutex p2, M & mutex p3, M & mutex p4 ) {
     12        for ( times ) {
     13                waitfor( call : p1, p2, p3, p4 );
     14        }
     15}
     16thread T {};
     17void main( T & ) {
    1918        BENCH(
    20                 for ( i; n ) {
    21                         waitfor(call, a1, a2, a3, a4);
     19                for ( times ) {
     20                        call( m1, m2, m3, m4 );
    2221                },
    2322                result
    2423        )
    25 
    26         printf("%g\n", result);
    27         go = 0;
    28         return 0;
     24        printf( "%g\n", result );
    2925}
    3026
    31 thread T {};
    32 void ^?{}( T & mutex this ) {}
    33 void main( T & ) {
    34         while(go == 0) { yield(); }
    35         while(go == 1) { call(m1, m2, m3, m4); }
    36 
     27int main( int argc, char * argv[] ) {
     28        BENCH_START()
     29        T t;
     30        wait( m1, m2, m3, m4 );
    3731}
    3832
    39 int main(__attribute__((unused)) int argc, __attribute__((unused)) char* argv[]) {
    40         T t;
    41         return wait(m1, m2, m3, m4);
    42 }
     33// Local Variables: //
     34// tab-width: 4 //
     35// End: //
  • benchmark/schedext/upp.cc

    r71d6bd8 r7030dab  
    33#include "bench.h"
    44
    5 int argc;
    6 char** argv;
    7 volatile int go = 0;
    8 
    95_Monitor M {
    106public:
    117        void __attribute__((noinline)) call() {}
     8        void __attribute__((noinline)) wait() {
     9                for ( size_t i = 0; i < times; i++ ) {
     10                        _Accept(call);
     11                }
     12        }
     13} m;
    1214
    13         int __attribute__((noinline)) wait() {
    14                 go = 1;
     15_Task T {
     16        void main() {
    1517                BENCH(
    16                         for (size_t i = 0; i < n; i++) {
    17                                 _Accept(call);
     18                        for ( size_t i = 0; i < times; i++ ) {
     19                                m.call();
    1820                        },
    1921                        result
    2022                )
    21 
    22                 printf("%g\n", result);
    23                 go = 0;
    24                 return 0;
     23                printf( "%g\n", result );
    2524        }
    2625};
    2726
    28 M m;
     27int main( int argc, char * argv[] ) {
     28        BENCH_START()
     29        T t;
     30        m.wait();
     31}
    2932
    30 _Task T {
    31         void main() {
    32                 while(go == 0) { yield(); }
    33                 while(go == 1) { m.call(); }
    34 
    35         }
    36 };
    37 
    38 int main(int margc, char* margv[]) {
    39         argc = margc;
    40         argv = margv;
    41         T t;
    42         return m.wait();
    43 }
     33// Local Variables: //
     34// tab-width: 4 //
     35// End: //
  • benchmark/schedint/JavaThread.java

    r71d6bd8 r7030dab  
    4949        static int x = 2;
    5050
    51         static private final int NoOfTimes = Integer.parseInt("1000000") ;
     51        static private int times = Integer.parseInt("1000000");
    5252
    5353        public static void helper( Monitor m ) throws InterruptedException {
    54                 for(int i = 1; i <= NoOfTimes; i += 1) {
     54                for(int i = 1; i <= times; i += 1) {
    5555                        m.wait();               // relase monitor lock
    5656                        m.next = true;
     
    6363                synchronized(m) {
    6464                        s.start();
    65                         while( !Monitor.go ) {
     65                        while( ! Monitor.go ) { // waiter must start first
    6666                                Thread.yield();
    6767                        }
     
    7272                Monitor.go = false;
    7373                s.join();
    74                 System.out.println( (end - start) / NoOfTimes);
     74                System.out.println( (end - start) / times);
    7575        }
    7676        public static void main(String[] args) throws InterruptedException {
     77                if ( args.length > 2 ) System.exit( 1 );
     78                if ( args.length == 2 ) { times = Integer.parseInt(args[1]); }
     79
    7780                for (int n = Integer.parseInt("5"); --n >= 0 ; ) {
    7881                        InnerMain();
     
    8386        }
    8487}
     88
     89// Local Variables: //
     90// tab-width: 4 //
     91// End: //
  • benchmark/schedint/cfa1.cfa

    r71d6bd8 r7030dab  
    44#include <stdio.h>
    55
    6 #include "bench.h"
     6#include "../bench.h"
    77
    8 int argc;
    9 char** argv;
    108volatile int go = 0;
    119
    1210condition c;
    13 monitor M {};
    14 M m1;
     11monitor M {} m1;
    1512
    16 void __attribute__((noinline)) call( M & mutex a1 ) {
    17         signal(c);
     13void __attribute__((noinline)) call( M & mutex p1 ) {
     14        signal( c );
    1815}
    19 
    20 int  __attribute__((noinline)) wait( M & mutex a1 ) {
     16void __attribute__((noinline)) wait( M & mutex p1 ) {
    2117        go = 1;
    22         BENCH(
    23                 for ( i; n ) {
    24                         wait(c);
    25                 },
    26                 result
    27         )
    28 
    29         printf("%g\n", result);
    30         go = 0;
    31         return 0;
     18        for ( times ) {
     19                wait( c );
     20        }
    3221}
    3322
    3423thread T {};
    35 void ^?{}( T & mutex ) {}
    3624void main( T & ) {
    37         while(go == 0) { yield(); }
    38         while(go == 1) { call(m1); }
    39 
     25        while ( go == 0 ) { yield(); } // waiter must start first
     26        BENCH(
     27                for ( times ) { call( m1 ); },
     28                result
     29        )
     30        printf( "%g\n", result );
    4031}
    4132
    42 int main(__attribute__((unused)) int argc, __attribute__((unused)) char* argv[]) {
     33int main( int argc, char * argv[] ) {
     34        BENCH_START()
    4335        T t;
    44         return wait(m1);
     36        wait( m1 );
    4537}
     38
     39// Local Variables: //
     40// tab-width: 4 //
     41// End: //
  • benchmark/schedint/cfa2.cfa

    r71d6bd8 r7030dab  
    44#include <stdio.h>
    55
    6 #include "bench.h"
     6#include "../bench.h"
    77
    8 int argc;
    9 char** argv;
    108volatile int go = 0;
    119
    1210condition c;
    13 monitor M {};
    14 M m1, m2;
     11monitor M {} m1, m2;
    1512
    16 void __attribute__((noinline)) call( M & mutex a1, M & mutex a2 ) {
    17         signal(c);
     13void __attribute__((noinline)) call( M & mutex p1, M & mutex p2 ) {
     14        signal( c );
    1815}
    19 
    20 int  __attribute__((noinline)) wait( M & mutex a1, M & mutex a2 ) {
     16void __attribute__((noinline)) wait( M & mutex p1, M & mutex p2 ) {
    2117        go = 1;
    22         BENCH(
    23                 for ( i; n ) {
    24                         wait(c);
    25                 },
    26                 result
    27         )
    28 
    29         printf("%g\n", result);
    30         go = 0;
    31         return 0;
     18        for ( times ) {
     19                wait( c );
     20        }
    3221}
    3322
    3423thread T {};
    35 void ^?{}( T & mutex this ) {}
    3624void main( T & ) {
    37         while(go == 0) { yield(); }
    38         while(go == 1) { call(m1, m2); }
    39 
     25        while ( go == 0 ) { yield(); } // waiter must start first
     26        BENCH(
     27                for ( times ) { call( m1, m2 ); },
     28                result
     29        )
     30        printf( "%g\n", result );
    4031}
    4132
    42 int main(__attribute__((unused)) int argc, __attribute__((unused)) char* argv[]) {
     33int main( int argc, char * argv[] ) {
     34        BENCH_START()
    4335        T t;
    44         return wait(m1, m2);
     36        wait( m1, m2 );
    4537}
     38
     39// Local Variables: //
     40// tab-width: 4 //
     41// End: //
  • benchmark/schedint/cfa4.cfa

    r71d6bd8 r7030dab  
    44#include <stdio.h>
    55
    6 #include "bench.h"
     6#include "../bench.h"
    77
    8 int argc;
    9 char** argv;
    108volatile int go = 0;
    119
    1210condition c;
    13 monitor M {};
    14 M m1, m2, m3, m4;
     11monitor M {} m1, m2, m3, m4;
    1512
    16 void __attribute__((noinline)) call( M & mutex a1, M & mutex a2, M & mutex a3, M & mutex a4 ) {
    17         signal(c);
     13void __attribute__((noinline)) call( M & mutex p1, M & mutex p2, M & mutex p3, M & mutex p4 ) {
     14        signal( c );
    1815}
    19 
    20 int  __attribute__((noinline)) wait( M & mutex a1, M & mutex a2, M & mutex a3, M & mutex a4 ) {
     16void __attribute__((noinline)) wait( M & mutex p1, M & mutex p2, M & mutex p3, M & mutex p4 ) {
    2117        go = 1;
    22         BENCH(
    23                 for ( i; n ) {
    24                         wait(c);
    25                 },
    26                 result
    27         )
    28 
    29         printf("%g\n", result);
    30         go = 0;
    31         return 0;
     18        for ( times ) {
     19                wait( c );
     20        }
    3221}
    3322
    3423thread T {};
    35 void ^?{}( T & mutex this ) {}
    3624void main( T & ) {
    37         while(go == 0) { yield(); }
    38         while(go == 1) { call(m1, m2, m3, m4); }
    39 
     25        while ( go == 0 ) { yield(); } // waiter must start first
     26        BENCH(
     27                for ( times ) { call( m1, m2, m3, m4 ); },
     28                result
     29        )
     30        printf( "%g\n", result );
    4031}
    4132
    42 int main(__attribute__((unused)) int argc, __attribute__((unused)) char* argv[]) {
     33int main( int argc, char * argv[] ) {
     34        BENCH_START()
    4335        T t;
    44         return wait(m1, m2, m3, m4);
     36        wait( m1, m2, m3, m4 );
    4537}
     38
     39// Local Variables: //
     40// tab-width: 4 //
     41// End: //
  • benchmark/schedint/pthreads.c

    r71d6bd8 r7030dab  
    44#include "bench.h"
    55
    6 int argc;
    7 char** argv;
    86volatile int go = 0;
    97
     8pthread_mutex_t m;
    109pthread_cond_t c;
    11 pthread_mutex_t m;
    1210
    1311void __attribute__((noinline)) call() {
    14         pthread_mutex_lock(&m);
    15         pthread_cond_signal(&c);
    16         pthread_mutex_unlock(&m);
     12        pthread_mutex_lock( &m );
     13        pthread_cond_signal( &c );
     14        pthread_mutex_unlock( &m );
    1715}
    1816
    19 int __attribute__((noinline)) wait() {
     17void __attribute__((noinline)) wait() {
    2018        pthread_mutex_lock(&m);
    2119        go = 1;
     20        for ( size_t i = 0; i < times; i++ ) {
     21                pthread_cond_wait( &c, &m );
     22        }
     23        go = 0;
     24        pthread_mutex_unlock( &m );
     25}
     26
     27void * thread_main( __attribute__((unused)) void * arg ) {
     28        while ( go == 0 ) { sched_yield(); } // waiter must start first
     29        // barging for lock acquire => may not execute N times
    2230        BENCH(
    23                 for (size_t i = 0; i < n; i++) {
    24                         pthread_cond_wait(&c, &m);
    25                 },
     31                while ( go == 1 ) { call(); },
    2632                result
    2733        )
    28 
    29         printf("%g\n", result);
    30         go = 0;
    31         pthread_mutex_unlock(&m);
    32         return 0;
    33 }
    34 
    35 void* thread_main(__attribute__((unused)) void * arg ) {
    36         while(go == 0) { sched_yield(); }
    37         while(go == 1) { call(); }
     34        printf( "%g\n", result );
    3835        return NULL;
    3936}
    4037
    41 int main(__attribute__((unused)) int argc, __attribute__((unused)) char* argv[]) {
     38int main( int argc, char * argv[] ) {
     39        BENCH_START()
    4240        pthread_t thread;
    43         if (pthread_create(&thread, NULL, thread_main, NULL) < 0) {
     41        if ( pthread_create( &thread, NULL, thread_main, NULL ) < 0 ) {
    4442                perror( "failure" );
    4543                return 1;
    4644        }
    4745        wait();
    48         if (pthread_join( thread, NULL) < 0) {
     46        if ( pthread_join( thread, NULL ) < 0 ) {
    4947                perror( "failure" );
    5048                return 1;
    5149        }
    52         return 0;
    5350}
     51
     52// Local Variables: //
     53// tab-width: 4 //
     54// End: //
  • benchmark/schedint/upp.cc

    r71d6bd8 r7030dab  
    33#include "bench.h"
    44
    5 int argc;
    6 char** argv;
    75volatile int go = 0;
    86
     
    1311                cond.signal();
    1412        }
     13        void __attribute__((noinline)) wait() {
     14                go = 1;
     15                for ( size_t i = 0; i < times; i++ ) {
     16                        cond.wait();
     17                }
     18        }
     19} m;
    1520
    16         int __attribute__((noinline)) wait() {
    17                 go = 1;
     21_Task T {
     22        void main() {
     23                while ( go == 0 ) { yield(); } // waiter must start first
    1824                BENCH(
    19                         for (size_t i = 0; i < n; i++) {
    20                                 cond.wait();
     25                        for ( size_t i = 0; i < times; i++ ) {
     26                                m.call();
    2127                        },
    2228                        result
    2329                )
    24 
    25                 printf("%g\n", result);
    26                 go = 0;
    27                 return 0;
     30                printf( "%g\n", result );
    2831        }
    2932};
    3033
    31 M m;
     34int main( int argc, char * argv[] ) {
     35        BENCH_START()
     36        T t;
     37        m.wait();
     38}
    3239
    33 _Task T {
    34         void main() {
    35                 while(go == 0) { yield(); }
    36                 while(go == 1) { m.call(); }
    37 
    38         }
    39 };
    40 
    41 int main(__attribute__((unused)) int argc, __attribute__((unused)) char* argv[]) {
    42         T t;
    43         return m.wait();
    44 }
     40// Local Variables: //
     41// tab-width: 4 //
     42// End: //
  • configure

    r71d6bd8 r7030dab  
    25572557# don't use the default CFLAGS as they unconditonnaly add -O2
    25582558: ${CFLAGS=""}
     2559: ${CXXFLAGS=""}
    25592560
    25602561am__api_version='1.15'
  • configure.ac

    r71d6bd8 r7030dab  
    1414# don't use the default CFLAGS as they unconditonnaly add -O2
    1515: ${CFLAGS=""}
     16: ${CXXFLAGS=""}
    1617
    1718AM_INIT_AUTOMAKE([subdir-objects])
  • doc/bibliography/pl.bib

    r71d6bd8 r7030dab  
    99%    Predefined journal names:
    1010%  acmcs: Computing Surveys             acta: Acta Infomatica
    11 @string{acta="Acta Infomatica"}
    1211%  cacm: Communications of the ACM
    1312%  ibmjrd: IBM J. Research & Development ibmsj: IBM Systems Journal
     
    2221%  tcs: Theoretical Computer Science
    2322
     23@string{acta="Acta Infomatica"}
    2424string{ieeepds="IEEE Transactions on Parallel and Distributed Systems"}
    2525@string{ieeepds="IEEE Trans. Parallel Distrib. Syst."}
     
    124124    series      = {ACM Distinguished Dissertations},
    125125    year        = 1983,
     126}
     127
     128@article{Zhang19,
     129    keywords    = {Algebraic effects, dynamic scoping, exceptions, parametricity, type systems},
     130    author      = {Zhang, Yizhou and Myers, Andrew C.},
     131    title       = {Abstraction-safe Effect Handlers via Tunneling},
     132    journal     = {Proc. ACM Program. Lang.},
     133    issue_date  = {January 2019},
     134    volume      = {3},
     135    number      = {POPL},
     136    month       = jan,
     137    year        = {2019},
     138    issn        = {2475-1421},
     139    pages       = {5:1--5:29},
     140    articleno   = {5},
     141    publisher   = {ACM},
     142    address     = {New York, NY, USA},
     143}
     144
     145@inproceedings{Zhang16,
     146    keywords    = {Exception tunneling, Genus, exception handling},
     147    author      = {Zhang, Yizhou and Salvaneschi, Guido and Beightol, Quinn and Liskov, Barbara and Myers, Andrew C.},
     148    title       = {Accepting Blame for Safe Tunneled Exceptions},
     149    booktitle   = {Proceedings of the 37th ACM SIGPLAN Conference on Programming Language Design and Implementation},
     150    series      = {PLDI'16},
     151    year        = {2016},
     152    location    = {Santa Barbara, CA, USA},
     153    pages       = {281--295},
     154    publisher   = {ACM},
     155    address     = {New York, NY, USA},
    126156}
    127157
     
    398428    journal     = sigplan,
    399429    year        = 1981,
    400     month       = feb, volume = 16, number = 2, pages = {48-52},
     430    month       = feb,
     431    volume      = 16,
     432    number      = 2,
     433    pages       = {48-52},
    401434    comment     = {
    402435        A one-pass, top-down algorithm for overload resolution.  Input is a
     
    477510    title       = {An Alternative to Subclassing},
    478511    journal     = sigplan,
    479     volume      = {21},    number = {11},
     512    volume      = {21},
     513    number      = {11},
    480514    pages       = {424-428},
    481     month       = nov, year = 1986,
     515    month       = nov,
     516    year        = 1986,
    482517    comment     = {
    483518        The Smalltalk class hierarchy has three uses: factoring out code;
     
    533568    isbn        = {3-540-66538-2},
    534569    location    = {Toulouse, France},
    535     doi         = {http://doi.acm.org/10.1145/318773.319251},
    536570    publisher   = {Springer},
    537571    address     = {London, UK},
     
    631665    year        = 2010,
    632666    pages       = {39--50},
    633     numpages    = {12},
    634667    publisher   = {IEEE Computer Society},
    635668    address     = {Washington, DC, USA},
     
    922955}
    923956
     957@manual{C99,
     958    keywords    = {ISO/IEC C 9899},
     959    contributer = {pabuhr@plg},
     960    key         = {C99},
     961    title       = {C Programming Language {ISO/IEC} 9899:1999(E)},
     962    edition     = {2nd},
     963    publisher   = {International Standard Organization},
     964    address     = {\href{https://webstore.ansi.org/Standards/INCITS/INCITSISOIEC98991999R2005}{https://webstore.ansi.org/\-Standards/\-INCITS/\-INCITSISOIEC98991999R2005}},
     965    year        = 1999,
     966}
     967
    924968@manual{C11,
    925969    keywords    = {ISO/IEC C 11},
     
    13051349    location    = {London, United Kingdom},
    13061350    pages       = {41--53},
    1307     numpages    = {13},
    1308     url         = {http://doi.acm.org/10.1145/360204.360207},
    1309     doi         = {10.1145/360204.360207},
    1310     acmid       = {360207},
    13111351    publisher   = {ACM},
    13121352    address     = {New York, NY, USA},
     
    24082448    year        = 1993,
    24092449    pages       = {201--208},
    2410     url         = {http://doi.acm.org/10.1145/155360.155580},
    24112450    publisher   = {ACM},
    24122451    address     = {New York, NY, USA},
     
    26062645    location    = {Boulder, Colorado, USA},
    26072646    pages       = {91--97},
    2608     numpages    = {7},
    26092647    publisher   = {ACM},
    26102648    address     = {New York, NY, USA},
     
    26372675    issn        = {0004-5411},
    26382676    pages       = {215--225},
    2639     numpages    = {11},
    2640     url         = {http://doi.acm.org/10.1145/321879.321884},
    2641     doi         = {10.1145/321879.321884},
    2642     acmid       = {321884},
    26432677    publisher   = {ACM},
    26442678    address     = {New York, NY, USA},
     
    27082742}
    27092743
     2744@misc{Drepper13,
     2745    keywords    = {thread-local storage},
     2746    contributer = {pabuhr@plg},
     2747    author      = {Ulrich Drepper},
     2748    title       = {{ELF} Handling For Thread-Local Storage},
     2749    year        = 2013,
     2750    month       = aug,
     2751    note        = {WikipediA},
     2752    howpublished= {\href{http://www.akkadia.org/drepper/tls.pdf}
     2753                  {http://\-www.akkadia.org/\-drepper/\-tls.pdf}},
     2754}
     2755
    27102756@misc{Turley99,
    27112757    keywords    = {embedded system, micrprocessor},
     
    27182764    howpublished= {\href{https://www.eetimes.com/author.asp?sectionid=36&doc_id=1287712}
    27192765                  {https://\-www.eetimes.com/\-author.asp?sectionid=\-36&doc_id=1287712}},
     2766}
     2767
     2768@article{Xiao19,
     2769    keywords    = {bug classification, fault trigger, Linux operating system, regression bug},
     2770    contributer = {pabuhr@plg},
     2771    author      = {Guanping Xiao and Zheng Zheng and Beibei Yin and Kishor S. Trivedi and Xiaoting Du and Kai-Yuan Cai},
     2772    title       = {An Empirical Study of Fault Triggers in the Linux Operating System: An Evolutionary Perspective},
     2773    journal     = {IEEE Transactions on Reliability},
     2774    month       = dec,
     2775    year        = 2019,
     2776    volume      = 68,
     2777    number      = 4,
     2778    pages       = {1356-1383},
    27202779}
    27212780
     
    31373196}
    31383197
     3198@inproceedings{Palix11,
     3199    keywords    = {Linux, fault-finding tools},
     3200    contributer = {pabuhr@plg},
     3201    author      = {Nicolas Palix and Ga\"el Thomas and Suman Saha and Christophe Calv\`es and Julia Lawall and Gilles Muller},
     3202    title       = {Faults in Linux: Ten Years Later},
     3203    booktitle   = {Proc. of the 16 International Conf. on Arch. Support for Prog. Lang. and Oper. Sys.},
     3204    series      = {ASPLOS'11},
     3205    month       = mar,
     3206    year        = 2011,
     3207    location    = {Newport Beach, California, USA},
     3208    pages       = {305-318},
     3209    publisher   = {ACM},
     3210    address     = {New York, NY, USA},
     3211}
     3212
    31393213@article{Lamport87,
    31403214    keywords    = {software solutions, mutual exclusion, fast},
     
    32583332    issn        = {0001-0782},
    32593333    pages       = {107--115},
    3260     numpages    = {9},
    3261     url         = {http://doi.acm.org/10.1145/1538788.1538814},
    3262     doi         = {10.1145/1538788.1538814},
    3263     acmid       = {1538814},
    32643334    publisher   = {ACM},
    32653335    address     = {New York, NY, USA},
     
    36643734}
    36653735
     3736@mastersthesis{Radhakrishnan19,
     3737    author      = {Srihari Radhakrishnan},
     3738    title       = {High Performance Web Servers: A Study In Concurrent Programming Models},
     3739    school      = {School of Computer Sc., University of Waterloo},
     3740    year        = 2019,
     3741    optaddress  = {Waterloo, Ontario, Canada, N2L 3G1},
     3742    note        = {\href{https://uwspace.uwaterloo.ca/handle/10012/14706}{https://\-uwspace.uwaterloo.ca/\-handle/\-10012/\-14706}},
     3743}
     3744
    36663745@article{katzenelson83b,
    36673746    contributer = {gjditchfield@plg},
     
    36973776    pages       = {115-138},
    36983777    year        = 1971,
     3778}
     3779
     3780@inproceedings{Hagersten03,
     3781    keywords    = {cache storage, parallel architectures, performance evaluation, shared memory systems},
     3782    author      = {Zoran Radovi\'{c} and Erik Hagersten},
     3783    title       = {Hierarchical backoff locks for nonuniform communication architectures},
     3784    booktitle   = {Proceedings of the Ninth International Symposium on High-Performance Computer Architecture},
     3785    year        = {2003},
     3786    location    = {Anaheim, CA, USA},
     3787    pages       = {241-252},
     3788    publisher   = {IEEE},
    36993789}
    37003790
     
    43654455}
    43664456
     4457@misc{gccValueLabels,
     4458    keywords    = {gcc extension, value labels},
     4459    contributer = {pabuhr@plg},
     4460    key         = {Labels as Values},
     4461    author      = {{gcc Extension}},
     4462    title       = {Labels as Values},
     4463    year        = {since gcc-3},
     4464    howpublished= {\href{https://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html}
     4465                  {https:\-//gcc.gnu.org/\-onlinedocs/\-gcc/\-Labels-as-Values.html}},
     4466}
     4467
    43674468@mastersthesis{Clarke90,
    43684469    keywords    = {concurrency, postponing requests},
     
    44574558
    44584559@article{Pierce00,
    4459     keywords    = {Scala},
     4560    keywords    = {Scala, polymorphism, subtyping, type inference},
    44604561    contributer = {a3moss@uwaterloo.ca},
    44614562    author      = {Pierce, Benjamin C. and Turner, David N.},
     
    44694570    issn        = {0164-0925},
    44704571    pages       = {1--44},
    4471     numpages    = {44},
    4472     url         = {http://doi.acm.org/10.1145/345099.345100},
    4473     doi         = {10.1145/345099.345100},
    4474     acmid       = {345100},
    44754572    publisher   = {ACM},
    44764573    address     = {New York, NY, USA},
    4477     keywords    = {polymorphism, subtyping, type inference},
    44784574}
     4575
     4576@article{Dice15,
     4577    keywords    = {Concurrency, NUMA, hierarchical locks, locks, multicore, mutex, mutual exclusion, spin locks},
     4578    author      = {Dice, David and Marathe, Virendra J. and Shavit, Nir},
     4579    title       = {Lock Cohorting: A General Technique for Designing NUMA Locks},
     4580    journal     = {ACM Trans. Parallel Comput.},
     4581    issue_date  = {January 2015},
     4582    volume      = 1,
     4583    number      = 2,
     4584    month       = feb,
     4585    year        = 2015,
     4586    pages       = {13:1--13:42},
     4587    publisher   = {ACM},
     4588    address     = {New York, NY, USA},
     4589}
    44794590
    44804591@article{Sundell08,
     
    45544665    journal     = sigplan,
    45554666    year        = 1989,
    4556     month       = jun, volume = 24, number = 6, pages = {37-48},
     4667    month       = jun,
     4668    volume      = 24,
     4669    number      = 6,
     4670    pages       = {37-48},
    45574671    abstract    = {
    45584672        This paper describes a scheme we have used to manage a large
     
    49955109    year        = 1986,
    49965110    pages       = {313--326},
    4997     numpages    = {14},
    49985111    publisher   = {ACM},
    49995112    address     = {New York, NY, USA},
     
    50115124    year        = 1986,
    50125125    pages       = {327--348},
    5013     numpages    = {22},
    50145126    publisher   = {ACM},
    50155127    address     = {New York, NY, USA},
     
    52085320    year        = 2005,
    52095321    pages       = {146-196},
    5210     numpages    = {51},
    52115322    publisher   = {ACM},
    52125323    address     = {New York, NY, USA},
     
    53545465    year        = 2000,
    53555466    pages       = {29-46},
    5356     note        = {OOPSLA'00, Oct. 15--19, 2000, Minneapolis, Minnesota, U.S.A.},
     5467    note        = {OOPSLA'00, Oct. 15--19, 2000, Minneapolis, Minn., U.S.A.},
    53575468}
    53585469
     
    54685579    location    = {San Diego, California, USA},
    54695580    pages       = {101--112},
    5470     numpages    = {12},
    5471     url         = {http://doi.acm.org/10.1145/2535838.2535878},
    5472     doi         = {10.1145/2535838.2535878},
    5473     acmid       = {2535878},
    54745581    publisher   = {ACM},
    54755582    address     = {New York, NY, USA},
     
    55755682    issn        = {0362-1340},
    55765683    pages       = {30--42},
    5577     numpages    = {13},
    5578     url         = {http://doi.acm.org/10.1145/947586.947589},
    5579     doi         = {10.1145/947586.947589},
    55805684    publisher   = {ACM},
    55815685    address     = {New York, NY, USA}
     
    61126216    month       = 9,
    61136217    year        = 2005,
     6218}
     6219
     6220@article{Bauer15,
     6221    keywords    = {resumption exceptions, theory},
     6222    contributer = {pabuhr@plg},
     6223    author      = {Andrej Bauer and Matija Pretnar},
     6224    title       = {Programming with Algebraic Effects and Handlers},
     6225    journal     = {Journal of Logical and Algebraic Methods in Programming},
     6226    publisher   = {Elsevier BV},
     6227    volume      = 84,
     6228    number      = 1,
     6229    month       = jan,
     6230    year        = 2015,
     6231    pages       = {108-123},
    61146232}
    61156233
     
    64996617    issn        = {0164-0925},
    65006618    pages       = {429-475},
    6501     url         = {http://doi.acm.org/10.1145/1133651.1133653},
    6502     doi         = {10.1145/1133651.1133653},
    6503     acmid       = {1133653},
    65046619    publisher   = {ACM},
    65056620    address     = {New York, NY, USA},
     
    68796994    issn        = {0001-0782},
    68806995    pages       = {565--569},
    6881     numpages    = {5},
    6882     url         = {http://doi.acm.org/10.1145/359545.359566},
    6883     doi         = {10.1145/359545.359566},
    6884     acmid       = {359566},
    68856996    publisher   = {ACM},
    68866997    address     = {New York, NY, USA}
     
    69007011    issn        = {0362-1340},
    69017012    pages       = {145--147},
    6902     numpages    = {3},
    6903     url         = {http://doi.acm.org/10.1145/122598.122614},
    6904     doi         = {10.1145/122598.122614},
    6905     acmid       = {122614},
    69067013    publisher   = {ACM},
    69077014    address     = {New York, NY, USA},
     
    70067113    issn        = {0362-1340},
    70077114    pages       = {82--87},
    7008     numpages    = {6},
    7009     url         = {http://doi.acm.org/10.1145/947680.947688},
    7010     doi         = {10.1145/947680.947688},
    70117115    publisher   = {ACM},
    70127116    address     = {New York, NY, USA},
     
    71537257}
    71547258
     7259@article{Cascaval08,
     7260    author      = {Cascaval, Calin and Blundell, Colin and Michael, Maged and Cain, Harold W. and Wu, Peng and Chiras, Stefanie and Chatterjee, Siddhartha},
     7261    title       = {Software Transactional Memory: Why Is It Only a Research Toy?},
     7262    journal     = {Queue},
     7263    volume      = {6},
     7264    number      = {5},
     7265    month       = sep,
     7266    year        = {2008},
     7267    pages       = {40:46--40:58},
     7268    publisher   = {ACM},
     7269    address     = {New York, NY, USA},
     7270}
     7271
    71557272@article{Dijkstra65a,
    71567273    keywords    = {N-thread software-solution mutual exclusion},
     
    73637480    year        = 1974,
    73647481    pages       = {261-301},
    7365     issn        = {0360-0300},
    7366     doi         = {http://doi.acm.org/10.1145/356635.356640},
    73677482    publisher   = {ACM},
    73687483    address     = {New York, NY, USA},
     
    74547569    publisher   = {ACM Press},
    74557570    address     = {New York, NY, USA},
    7456     doi         = {http://doi.acm.org/10.1145/356586.356588},
    74577571}
    74587572
     
    77557869    howpublished= {\href{https://projects.eclipse.org/proposals/trace-compass}{https://\-projects.eclipse.org/\-proposals/\-trace-compass}},
    77567870}
    7757  
     7871
     7872@inproceedings{Boehm09,
     7873    author      = {Boehm, Hans-J.},
     7874    title       = {Transactional Memory Should Be an Implementation Technique, Not a Programming Interface},
     7875    booktitle   = {Proceedings of the First USENIX Conference on Hot Topics in Parallelism},
     7876    series      = {HotPar'09},
     7877    year        = {2009},
     7878    location    = {Berkeley, California},
     7879    publisher   = {USENIX Association},
     7880    address     = {Berkeley, CA, USA},
     7881}
     7882
    77587883@article{Leroy00,
    77597884    keywords    = {type-systems, exceptions},
     
    78057930    number      = {2},
    78067931    pages       = {204-214},
    7807     month       = apr, year = 1988,
     7932    month       = apr,
     7933    year        = 1988,
    78087934    comment     = {
    78097935        Extended record types add fields to their base record.  Assignment
     
    81108236    issn        = {0004-5411},
    81118237    pages       = {245--281},
    8112     numpages    = {37},
    8113     url         = {http://doi.acm.org/10.1145/62.2160},
    8114     doi         = {10.1145/62.2160},
    8115     acmid       = {2160},
    81168238    publisher   = {ACM},
    81178239    address     = {New York, NY, USA},
     
    81268248    contributer = {pabuhr@plg},
    81278249    author      = {Boehm, Hans-J. and Adve, Sarita V.},
    8128     title       = {You Don'T Know Jack About Shared Variables or Memory Models},
     8250    title       = {You Don't Know Jack About Shared Variables or Memory Models},
    81298251    journal     = cacm,
    81308252    volume      = 55,
  • doc/papers/concurrency/Paper.tex

    r71d6bd8 r7030dab  
    6161\newcommand{\CCseventeen}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}17\xspace} % C++17 symbolic name
    6262\newcommand{\CCtwenty}{\textrm{C}\kern-.1em\hbox{+\kern-.25em+}20\xspace} % C++20 symbolic name
    63 \newcommand{\Csharp}{C\raisebox{-0.7ex}{\Large$^\sharp$}\xspace} % C# symbolic name
     63\newcommand{\Csharp}{C\raisebox{-0.7ex}{\large$^\sharp$}\xspace} % C# symbolic name
    6464
    6565%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     
    127127\newcommand*{\etc}{%
    128128        \@ifnextchar{.}{\ETC}%
    129         {\ETC.\xspace}%
     129                {\ETC.\xspace}%
    130130}}{}%
    131131\@ifundefined{etal}{
    132132\newcommand{\ETAL}{\abbrevFont{et}~\abbrevFont{al}}
    133133\newcommand*{\etal}{%
    134         \@ifnextchar{.}{\protect\ETAL}%
    135                 {\protect\ETAL.\xspace}%
     134        \@ifnextchar{.}{\ETAL}%
     135                {\ETAL.\xspace}%
    136136}}{}%
    137137\@ifundefined{viz}{
     
    163163                __float80, float80, __float128, float128, forall, ftype, generator, _Generic, _Imaginary, __imag, __imag__,
    164164                inline, __inline, __inline__, __int128, int128, __label__, monitor, mutex, _Noreturn, one_t, or,
    165                 otype, restrict, __restrict, __restrict__, __signed, __signed__, _Static_assert, thread,
     165                otype, restrict, resume, __restrict, __restrict__, __signed, __signed__, _Static_assert, suspend, thread,
    166166                _Thread_local, throw, throwResume, timeout, trait, try, ttype, typeof, __typeof, __typeof__,
    167167                virtual, __volatile, __volatile__, waitfor, when, with, zero_t},
    168168        moredirectives={defined,include_next},
    169169        % replace/adjust listing characters that look bad in sanserif
    170         literate={-}{\makebox[1ex][c]{\raisebox{0.4ex}{\rule{0.8ex}{0.1ex}}}}1 {^}{\raisebox{0.6ex}{$\scriptstyle\land\,$}}1
     170        literate={-}{\makebox[1ex][c]{\raisebox{0.5ex}{\rule{0.8ex}{0.1ex}}}}1 {^}{\raisebox{0.6ex}{$\scriptstyle\land\,$}}1
    171171                {~}{\raisebox{0.3ex}{$\scriptstyle\sim\,$}}1 % {`}{\ttfamily\upshape\hspace*{-0.1ex}`}1
    172172                {<}{\textrm{\textless}}1 {>}{\textrm{\textgreater}}1
     
    197197                _Else, _Enable, _Event, _Finally, _Monitor, _Mutex, _Nomutex, _PeriodicTask, _RealTimeTask,
    198198                _Resume, _Select, _SporadicTask, _Task, _Timeout, _When, _With, _Throw},
    199 }
    200 \lstdefinelanguage{Golang}{
    201         morekeywords=[1]{package,import,func,type,struct,return,defer,panic,recover,select,var,const,iota,},
    202         morekeywords=[2]{string,uint,uint8,uint16,uint32,uint64,int,int8,int16,int32,int64,
    203                 bool,float32,float64,complex64,complex128,byte,rune,uintptr, error,interface},
    204         morekeywords=[3]{map,slice,make,new,nil,len,cap,copy,close,true,false,delete,append,real,imag,complex,chan,},
    205         morekeywords=[4]{for,break,continue,range,goto,switch,case,fallthrough,if,else,default,},
    206         morekeywords=[5]{Println,Printf,Error,},
    207         sensitive=true,
    208         morecomment=[l]{//},
    209         morecomment=[s]{/*}{*/},
    210         morestring=[b]',
    211         morestring=[b]",
    212         morestring=[s]{`}{`},
    213199}
    214200
     
    241227{}
    242228\lstnewenvironment{uC++}[1][]
    243 {\lstset{#1}}
     229{\lstset{language=uC++,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}
    244230{}
    245231\lstnewenvironment{Go}[1][]
     
    262248}
    263249
    264 \newbox\myboxA
    265 \newbox\myboxB
    266 \newbox\myboxC
    267 \newbox\myboxD
     250\newsavebox{\myboxA}
     251\newsavebox{\myboxB}
     252\newsavebox{\myboxC}
     253\newsavebox{\myboxD}
    268254
    269255\title{\texorpdfstring{Advanced Control-flow and Concurrency in \protect\CFA}{Advanced Control-flow in Cforall}}
     
    282268\CFA is a polymorphic, non-object-oriented, concurrent, backwards-compatible extension of the C programming language.
    283269This paper discusses the design philosophy and implementation of its advanced control-flow and concurrent/parallel features, along with the supporting runtime written in \CFA.
    284 These features are created from scratch as ISO C has only low-level and/or unimplemented concurrency, so C programmers continue to rely on library features like pthreads.
     270These features are created from scratch as ISO C has only low-level and/or unimplemented concurrency, so C programmers continue to rely on library approaches like pthreads.
    285271\CFA introduces modern language-level control-flow mechanisms, like generators, coroutines, user-level threading, and monitors for mutual exclusion and synchronization.
    286272% Library extension for executors, futures, and actors are built on these basic mechanisms.
     
    295281
    296282\begin{document}
    297 \linenumbers                                            % comment out to turn off line numbering
     283\linenumbers                            % comment out to turn off line numbering
    298284
    299285\maketitle
     
    302288\section{Introduction}
    303289
    304 This paper discusses the design philosophy and implementation of advanced language-level control-flow and concurrent/parallel features in \CFA~\cite{Moss18,Cforall} and its runtime, which is written entirely in \CFA.
    305 \CFA is a modern, polymorphic, non-object-oriented\footnote{
    306 \CFA has features often associated with object-oriented programming languages, such as constructors, destructors, virtuals and simple inheritance.
     290\CFA~\cite{Moss18,Cforall} is a modern, polymorphic, non-object-oriented\footnote{
     291\CFA has object-oriented features, such as constructors, destructors, virtuals and simple trait/interface inheritance.
     292% Go interfaces, Rust traits, Swift Protocols, Haskell Type Classes and Java Interfaces.
     293% "Trait inheritance" works for me. "Interface inheritance" might also be a good choice, and distinguish clearly from implementation inheritance.
     294% You'll want to be a little bit careful with terms like "structural" and "nominal" inheritance as well. CFA has structural inheritance (I think Go as well) -- it's inferred based on the structure of the code. Java, Rust, and Haskell (not sure about Swift) have nominal inheritance, where there needs to be a specific statement that "this type inherits from this type".
    307295However, functions \emph{cannot} be nested in structures, so there is no lexical binding between a structure and set of functions (member/method) implemented by an implicit \lstinline@this@ (receiver) parameter.},
    308296backwards-compatible extension of the C programming language.
    309 In many ways, \CFA is to C as Scala~\cite{Scala} is to Java, providing a \emph{research vehicle} for new typing and control-flow capabilities on top of a highly popular programming language allowing immediate dissemination.
    310 Within the \CFA framework, new control-flow features are created from scratch because ISO \Celeven defines only a subset of the \CFA extensions, where the overlapping features are concurrency~\cite[\S~7.26]{C11}.
    311 However, \Celeven concurrency is largely wrappers for a subset of the pthreads library~\cite{Butenhof97,Pthreads}, and \Celeven and pthreads concurrency is simple, based on thread fork/join in a function and mutex/condition locks, which is low-level and error-prone;
    312 no high-level language concurrency features are defined.
    313 Interestingly, almost a decade after publication of the \Celeven standard, neither gcc-8, clang-9 nor msvc-19 (most recent versions) support the \Celeven include @threads.h@, indicating little interest in the C11 concurrency approach (possibly because the effort to add concurrency to \CC).
    314 Finally, while the \Celeven standard does not state a threading model, the historical association with pthreads suggests implementations would adopt kernel-level threading (1:1)~\cite{ThreadModel}.
    315 
     297In many ways, \CFA is to C as Scala~\cite{Scala} is to Java, providing a \emph{research vehicle} for new typing and control-flow capabilities on top of a highly popular programming language\footnote{
     298The TIOBE index~\cite{TIOBE} for December 2019 ranks the top five \emph{popular} programming languages as Java 17\%, C 16\%, Python 10\%, and \CC 6\%, \Csharp 5\% = 54\%, and over the past 30 years, C has always ranked either first or second in popularity.}
     299allowing immediate dissemination.
     300This paper discusses the design philosophy and implementation of advanced language-level control-flow and concurrent/parallel features in \CFA and its runtime, which is written entirely in \CFA.
     301The \CFA control-flow framework extends ISO \Celeven~\cite{C11} with new call/return and concurrent/parallel control-flow.
     302
     303% The call/return extensions retain state between callee and caller versus losing the callee's state on return;
     304% the concurrency extensions allow high-level management of threads.
     305
     306Call/return control-flow with argument/parameter passing appeared in the first programming languages.
     307Over the past 50 years, call/return has been augmented with features like static/dynamic call, exceptions (multi-level return) and generators/coroutines (retain state between calls).
     308While \CFA has mechanisms for dynamic call (algebraic effects) and exceptions\footnote{
     309\CFA exception handling will be presented in a separate paper.
     310The key feature that dovetails with this paper is nonlocal exceptions allowing exceptions to be raised across stacks, with synchronous exceptions raised among coroutines and asynchronous exceptions raised among threads, similar to that in \uC~\cite[\S~5]{uC++}}, this work only discusses retaining state between calls via generators/coroutines.
     311\newterm{Coroutining} was introduced by Conway~\cite{Conway63} (1963), discussed by Knuth~\cite[\S~1.4.2]{Knuth73V1}, implemented in Simula67~\cite{Simula67}, formalized by Marlin~\cite{Marlin80}, and is now popular and appears in old and new programming languages: CLU~\cite{CLU}, \Csharp~\cite{Csharp}, Ruby~\cite{Ruby}, Python~\cite{Python}, JavaScript~\cite{JavaScript}, Lua~\cite{Lua}, \CCtwenty~\cite{C++20Coroutine19}.
     312Coroutining is sequential execution requiring direct handoff among coroutines, \ie only the programmer is controlling execution order.
     313If coroutines transfer to an internal event-engine for scheduling the next coroutines, the program transitions into the realm of concurrency~\cite[\S~3]{Buhr05a}.
     314Coroutines are only a stepping stone towards concurrency where the commonality is that coroutines and threads retain state between calls.
     315
     316\Celeven/\CCeleven define concurrency~\cite[\S~7.26]{C11}, but it is largely wrappers for a subset of the pthreads library~\cite{Pthreads}.\footnote{Pthreads concurrency is based on simple thread fork/join in a function and mutex/condition locks, which is low-level and error-prone}
     317Interestingly, almost a decade after the \Celeven standard, neither gcc-9, clang-9 nor msvc-19 (most recent versions) support the \Celeven include @threads.h@, indicating no interest in the C11 concurrency approach (possibly because of the recent effort to add concurrency to \CC).
     318While the \Celeven standard does not state a threading model, the historical association with pthreads suggests implementations would adopt kernel-level threading (1:1)~\cite{ThreadModel}, as for \CC.
    316319In contrast, there has been a renewed interest during the past decade in user-level (M:N, green) threading in old and new programming languages.
    317320As multi-core hardware became available in the 1980/90s, both user and kernel threading were examined.
    318321Kernel threading was chosen, largely because of its simplicity and fit with the simpler operating systems and hardware architectures at the time, which gave it a performance advantage~\cite{Drepper03}.
    319322Libraries like pthreads were developed for C, and the Solaris operating-system switched from user (JDK 1.1~\cite{JDK1.1}) to kernel threads.
    320 As a result, languages like Java, Scala, Objective-C~\cite{obj-c-book}, \CCeleven~\cite{C11}, and C\#~\cite{Csharp} adopt the 1:1 kernel-threading model, with a variety of presentation mechanisms.
    321 From 2000 onwards, languages like Go~\cite{Go}, Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, D~\cite{D}, and \uC~\cite{uC++,uC++book} have championed the M:N user-threading model, and many user-threading libraries have appeared~\cite{Qthreads,MPC,Marcel}, including putting green threads back into Java~\cite{Quasar}.
    322 The main argument for user-level threading is that it is lighter weight than kernel threading (locking and context switching do not cross the kernel boundary), so there is less restriction on programming styles that encourage large numbers of threads performing medium work units to facilitate load balancing by the runtime~\cite{Verch12}.
     323As a result, many current languages implementations adopt the 1:1 kernel-threading model, like Java (Scala), Objective-C~\cite{obj-c-book}, \CCeleven~\cite{C11}, C\#~\cite{Csharp} and Rust~\cite{Rust}, with a variety of presentation mechanisms.
     324From 2000 onwards, several language implementations have championed the M:N user-threading model, like Go~\cite{Go}, Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, D~\cite{D}, and \uC~\cite{uC++,uC++book}, including putting green threads back into Java~\cite{Quasar}, and many user-threading libraries have appeared~\cite{Qthreads,MPC,Marcel}.
     325The main argument for user-level threading is that it is lighter weight than kernel threading (locking and context switching do not cross the kernel boundary), so there is less restriction on programming styles that encourages large numbers of threads performing medium-sized work to facilitate load balancing by the runtime~\cite{Verch12}.
    323326As well, user-threading facilitates a simpler concurrency approach using thread objects that leverage sequential patterns versus events with call-backs~\cite{Adya02,vonBehren03}.
    324327Finally, performant user-threading implementations (both time and space) meet or exceed direct kernel-threading implementations, while achieving the programming advantages of high concurrency levels and safety.
    325328
    326 A further effort over the past two decades is the development of language memory models to deal with the conflict between language features and compiler/hardware optimizations, \ie some language features are unsafe in the presence of aggressive sequential optimizations~\cite{Buhr95a,Boehm05}.
     329A further effort over the past two decades is the development of language memory models to deal with the conflict between language features and compiler/hardware optimizations, \eg some language features are unsafe in the presence of aggressive sequential optimizations~\cite{Buhr95a,Boehm05}.
    327330The consequence is that a language must provide sufficient tools to program around safety issues, as inline and library code is all sequential to the compiler.
    328331One solution is low-level qualifiers and functions (\eg @volatile@ and atomics) allowing \emph{programmers} to explicitly write safe (race-free~\cite{Boehm12}) programs.
    329 A safer solution is high-level language constructs so the \emph{compiler} knows the optimization boundaries, and hence, provides implicit safety.
    330 This problem is best known with respect to concurrency, but applies to other complex control-flow, like exceptions\footnote{
    331 \CFA exception handling will be presented in a separate paper.
    332 The key feature that dovetails with this paper is nonlocal exceptions allowing exceptions to be raised across stacks, with synchronous exceptions raised among coroutines and asynchronous exceptions raised among threads, similar to that in \uC~\cite[\S~5]{uC++}
    333 } and coroutines.
    334 Finally, language solutions allow matching constructs with language paradigm, \ie imperative and functional languages often have different presentations of the same concept to fit their programming model.
    335 
    336 Finally, it is important for a language to provide safety over performance \emph{as the default}, allowing careful reduction of safety for performance when necessary.
    337 Two concurrency violations of this philosophy are \emph{spurious wakeup} (random wakeup~\cite[\S~8]{Buhr05a}) and \emph{barging}\footnote{
    338 The notion of competitive succession instead of direct handoff, \ie a lock owner releases the lock and an arriving thread acquires it ahead of preexisting waiter threads.
     332A safer solution is high-level language constructs so the \emph{compiler} knows the concurrency boundaries (where mutual exclusion and synchronization are acquired/released) and provide implicit safety at and across these boundaries.
     333While the optimization problem is best known with respect to concurrency, it applies to other complex control-flow, like exceptions and coroutines.
     334As well, language solutions allow matching the language paradigm with the approach, \eg matching the functional paradigm with data-flow programming or the imperative paradigm with thread programming.
     335
     336Finally, it is important for a language to provide safety over performance \emph{as the default}, allowing careful reduction of safety (unsafe code) for performance when necessary.
     337Two concurrency violations of this philosophy are \emph{spurious wakeup} (random wakeup~\cite[\S~9]{Buhr05a}) and \emph{barging}\footnote{
     338Barging is competitive succession instead of direct handoff, \ie after a lock is released both arriving and preexisting waiter threads compete to acquire the lock.
     339Hence, an arriving thread can temporally \emph{barge} ahead of threads already waiting for an event, which can repeat indefinitely leading to starvation of waiter threads.
    339340} (signals-as-hints~\cite[\S~8]{Buhr05a}), where one is a consequence of the other, \ie once there is spurious wakeup, signals-as-hints follow.
    340 However, spurious wakeup is \emph{not} a foundational concurrency property~\cite[\S~8]{Buhr05a}, it is a performance design choice.
    341 Similarly, signals-as-hints are often a performance decision.
    342 We argue removing spurious wakeup and signals-as-hints make concurrent programming significantly safer because it removes local non-determinism and matches with programmer expectation.
    343 (Author experience teaching concurrency is that students are highly confused by these semantics.)
    344 Clawing back performance, when local non-determinism is unimportant, should be an option not the default.
    345 
    346 \begin{comment}
    347 Most augmented traditional (Fortran 18~\cite{Fortran18}, Cobol 14~\cite{Cobol14}, Ada 12~\cite{Ada12}, Java 11~\cite{Java11}) and new languages (Go~\cite{Go}, Rust~\cite{Rust}, and D~\cite{D}), except \CC, diverge from C with different syntax and semantics, only interoperate indirectly with C, and are not systems languages, for those with managed memory.
    348 As a result, there is a significant learning curve to move to these languages, and C legacy-code must be rewritten.
    349 While \CC, like \CFA, takes an evolutionary approach to extend C, \CC's constantly growing complex and interdependent features-set (\eg objects, inheritance, templates, etc.) mean idiomatic \CC code is difficult to use from C, and C programmers must expend significant effort learning \CC.
    350 Hence, rewriting and retraining costs for these languages, even \CC, are prohibitive for companies with a large C software-base.
    351 \CFA with its orthogonal feature-set, its high-performance runtime, and direct access to all existing C libraries circumvents these problems.
    352 \end{comment}
    353 
    354 \CFA embraces user-level threading, language extensions for advanced control-flow, and safety as the default.
    355 We present comparative examples so the reader can judge if the \CFA control-flow extensions are better and safer than those in other concurrent, imperative programming languages, and perform experiments to show the \CFA runtime is competitive with other similar mechanisms.
     341(Author experience teaching concurrency is that students are confused by these semantics.)
     342However, spurious wakeup is \emph{not} a foundational concurrency property~\cite[\S~9]{Buhr05a};
     343it is a performance design choice.
     344We argue removing spurious wakeup and signals-as-hints make concurrent programming simpler and safer as there is less local non-determinism to manage.
     345If barging acquisition is allowed, its specialized performance advantage should be available as an option not the default.
     346
     347\CFA embraces language extensions for advanced control-flow, user-level threading, and safety as the default.
     348We present comparative examples to support our argument that the \CFA control-flow extensions are as expressive and safe as those in other concurrent imperative programming languages, and perform experiments to show the \CFA runtime is competitive with other similar mechanisms.
    356349The main contributions of this work are:
    357 \begin{itemize}[topsep=3pt,itemsep=1pt]
     350\begin{itemize}[topsep=3pt,itemsep=0pt]
    358351\item
    359 language-level generators, coroutines and user-level threading, which respect the expectations of C programmers.
     352a set of fundamental execution properties that dictate which language-level control-flow features need to be supported,
     353
    360354\item
    361 monitor synchronization without barging, and the ability to safely acquiring multiple monitors \emph{simultaneously} (deadlock free), while seamlessly integrating these capabilities with all monitor synchronization mechanisms.
     355integration of these language-level control-flow features, while respecting the style and expectations of C programmers,
     356
    362357\item
    363 providing statically type-safe interfaces that integrate with the \CFA polymorphic type-system and other language features.
     358monitor synchronization without barging, and the ability to safely acquiring multiple monitors \emph{simultaneously} (deadlock free), while seamlessly integrating these capabilities with all monitor synchronization mechanisms,
     359
     360\item
     361providing statically type-safe interfaces that integrate with the \CFA polymorphic type-system and other language features,
     362
    364363% \item
    365364% library extensions for executors, futures, and actors built on the basic mechanisms.
     365
    366366\item
    367 a runtime system with no spurious wakeup.
     367a runtime system without spurious wake-up and no performance loss,
     368
    368369\item
    369 a dynamic partitioning mechanism to segregate the execution environment for specialized requirements.
     370a dynamic partitioning mechanism to segregate groups of executing user and kernel threads performing specialized work (\eg web-server or compute engine) or requiring different scheduling (\eg NUMA or real-time).
     371
    370372% \item
    371373% a non-blocking I/O library
     374
    372375\item
    373 experimental results showing comparable performance of the new features with similar mechanisms in other programming languages.
     376experimental results showing comparable performance of the \CFA features with similar mechanisms in other languages.
    374377\end{itemize}
    375378
    376 Section~\ref{s:StatefulFunction} begins advanced control by introducing sequential functions that retain data and execution state between calls, which produces constructs @generator@ and @coroutine@.
    377 Section~\ref{s:Concurrency} begins concurrency, or how to create (fork) and destroy (join) a thread, which produces the @thread@ construct.
     379Section~\ref{s:FundamentalExecutionProperties} presents the compositional hierarchy of execution properties directing the design of control-flow features in \CFA.
     380Section~\ref{s:StatefulFunction} begins advanced control by introducing sequential functions that retain data and execution state between calls producing constructs @generator@ and @coroutine@.
     381Section~\ref{s:Concurrency} begins concurrency, or how to create (fork) and destroy (join) a thread producing the @thread@ construct.
    378382Section~\ref{s:MutualExclusionSynchronization} discusses the two mechanisms to restricted nondeterminism when controlling shared access to resources (mutual exclusion) and timing relationships among threads (synchronization).
    379383Section~\ref{s:Monitor} shows how both mutual exclusion and synchronization are safely embedded in the @monitor@ and @thread@ constructs.
    380384Section~\ref{s:CFARuntimeStructure} describes the large-scale mechanism to structure (cluster) threads and virtual processors (kernel threads).
    381 Section~\ref{s:Performance} uses a series of microbenchmarks to compare \CFA threading with pthreads, Java OpenJDK-9, Go 1.12.6 and \uC 7.0.0.
     385Section~\ref{s:Performance} uses a series of microbenchmarks to compare \CFA threading with pthreads, Java 11.0.6, Go 1.12.6, Rust 1.37.0, Python 3.7.6, Node.js 12.14.1, and \uC 7.0.0.
     386
     387
     388\section{Fundamental Execution Properties}
     389\label{s:FundamentalExecutionProperties}
     390
     391The features in a programming language should be composed from a set of fundamental properties rather than an ad hoc collection chosen by the designers.
     392To this end, the control-flow features created for \CFA are based on the fundamental properties of any language with function-stack control-flow (see also \uC~\cite[pp.~140-142]{uC++}).
     393The fundamental properties are execution state, thread, and mutual-exclusion/synchronization (MES).
     394These independent properties can be used alone, in pairs, or in triplets to compose different language features, forming a compositional hierarchy where the most advanced feature has all the properties (state/thread/MES).
     395While it is possible for a language to only support the most advanced feature~\cite{Hermes90}, this unnecessarily complicates and makes inefficient solutions to certain classes of problems.
     396As is shown, each of the (non-rejected) composed features solves a particular set of problems, and hence, has a defensible position in a programming language.
     397If a compositional feature is missing, a programmer has too few/many fundamental properties resulting in a complex and/or is inefficient solution.
     398
     399In detail, the fundamental properties are:
     400\begin{description}[leftmargin=\parindent,topsep=3pt,parsep=0pt]
     401\item[\newterm{execution state}:]
     402is the state information needed by a control-flow feature to initialize, manage compute data and execution location(s), and de-initialize.
     403State is retained in fixed-sized aggregate structures and dynamic-sized stack(s), often allocated in the heap(s) managed by the runtime system.
     404The lifetime of the state varies with the control-flow feature, where longer life-time and dynamic size provide greater power but also increase usage complexity and cost.
     405Control-flow transfers among execution states occurs in multiple ways, such as function call, context switch, asynchronous await, etc.
     406Because the programming language determines what constitutes an execution state, implicitly manages this state, and defines movement mechanisms among states, execution state is an elementary property of the semantics of a programming language.
     407% An execution-state is related to the notion of a process continuation \cite{Hieb90}.
     408
     409\item[\newterm{threading}:]
     410is execution of code that occurs independently of other execution, \ie the execution resulting from a thread is sequential.
     411Multiple threads provide \emph{concurrent execution};
     412concurrent execution becomes parallel when run on multiple processing units (hyper-threading, cores, sockets).
     413There must be language mechanisms to create, block/unblock, and join with a thread.
     414
     415\item[\newterm{MES}:]
     416is the concurrency mechanisms to perform an action without interruption and establish timing relationships among multiple threads.
     417These two properties are independent, \ie mutual exclusion cannot provide synchronization and vice versa without introducing additional threads~\cite[\S~4]{Buhr05a}.
     418Limiting MES, \eg no access to shared data, results in contrived solutions and inefficiency on multi-core von Neumann computers where shared memory is a foundational aspect of its design.
     419\end{description}
     420These properties are fundamental because they cannot be built from existing language features, \eg a basic programming language like C99~\cite{C99} cannot create new control-flow features, concurrency, or provide MES using atomic hardware mechanisms.
     421
     422
     423\subsection{Execution Properties}
     424
     425Table~\ref{t:ExecutionPropertyComposition} shows how the three fundamental execution properties: state, thread, and mutual exclusion compose a hierarchy of control-flow features needed in a programming language.
     426(When doing case analysis, not all combinations are meaningful.)
     427Note, basic von Neumann execution requires at least one thread and an execution state providing some form of call stack.
     428For table entries missing these minimal components, the property is borrowed from the invoker (caller).
     429
     430Case 1 is a function that borrows storage for its state (stack frame/activation) and a thread from its invoker and retains this state across \emph{callees}, \ie function local-variables are retained on the stack across calls.
     431Case 2 is case 1 with access to shared state so callers are restricted during update (mutual exclusion) and scheduling for other threads (synchronization).
     432Case 3 is a stateful function supporting resume/suspend along with call/return to retain state across \emph{callers}, but has some restrictions because the function's state is stackless.
     433Note, stackless functions still borrow the caller's stack and thread, where the stack is used to preserve state across its callees.
     434Case 4 is cases 2 and 3 with protection to shared state for stackless functions.
     435Cases 5 and 6 are the same as 3 and 4 but only the thread is borrowed as the function state is stackful, so resume/suspend is a context switch from the caller's to the function's stack.
     436Cases 7 and 8 are rejected because a function that is given a new thread must have its own stack where the thread begins and stack frames are stored for calls, \ie there is no stack to borrow.
     437Cases 9 and 10 are rejected because a thread with a fixed state (no stack) cannot accept calls, make calls, block, or be preempted, all of which require an unknown amount of additional dynamic state.
     438Hence, once started, this kind of thread must execute to completion, \ie computation only, which severely restricts runtime management.
     439Cases 11 and 12 have a stackful thread with and without safe access to shared state.
     440Execution properties increase the cost of creation and execution along with complexity of usage.
     441
     442\begin{table}
     443\caption{Execution property composition}
     444\centering
     445\label{t:ExecutionPropertyComposition}
     446\renewcommand{\arraystretch}{1.25}
     447%\setlength{\tabcolsep}{5pt}
     448\begin{tabular}{c|c||l|l}
     449\multicolumn{2}{c||}{execution properties} & \multicolumn{2}{c}{mutual exclusion / synchronization} \\
     450\hline
     451stateful                        & thread        & \multicolumn{1}{c|}{No} & \multicolumn{1}{c}{Yes} \\
     452\hline   
     453\hline   
     454No                                      & No            & \textbf{1}\ \ \ function                              & \textbf{2}\ \ \ @monitor@ function    \\
     455\hline   
     456Yes (stackless)         & No            & \textbf{3}\ \ \ @generator@                   & \textbf{4}\ \ \ @monitor@ @generator@ \\
     457\hline   
     458Yes (stackful)          & No            & \textbf{5}\ \ \ @coroutine@                   & \textbf{6}\ \ \ @monitor@ @coroutine@ \\
     459\hline   
     460No                                      & Yes           & \textbf{7}\ \ \ {\color{red}rejected} & \textbf{8}\ \ \ {\color{red}rejected} \\
     461\hline   
     462Yes (stackless)         & Yes           & \textbf{9}\ \ \ {\color{red}rejected} & \textbf{10}\ \ \ {\color{red}rejected} \\
     463\hline   
     464Yes (stackful)          & Yes           & \textbf{11}\ \ \ @thread@                             & \textbf{12}\ \ @monitor@ @thread@             \\
     465\end{tabular}
     466\end{table}
     467
     468Given the execution-properties taxonomy, programmers can now answer three basic questions: is state necessary across calls and how much, is a separate thread necessary, is access to shared state necessary.
     469The answers define the optimal language feature need for implementing a programming problem.
     470The next sections discusses how \CFA fills in the table with language features, while other programming languages may only provide a subset of the table.
     471
     472
     473\subsection{Design Requirements}
     474
     475The following design requirements largely stem from building \CFA on top of C.
     476\begin{itemize}[topsep=3pt,parsep=0pt]
     477\item
     478All communication must be statically type checkable for early detection of errors and efficient code generation.
     479This requirement is consistent with the fact that C is a statically-typed programming-language.
     480
     481\item
     482Direct interaction among language features must be possible allowing any feature to be selected without restricting comm\-unication.
     483For example, many concurrent languages do not provide direct communication (calls) among threads, \ie threads only communicate indirectly through monitors, channels, messages, and/or futures.
     484Indirect communication increases the number of objects, consuming more resources, and require additional synchronization and possibly data transfer.
     485
     486\item
     487All communication is performed using function calls, \ie data is transmitted from argument to parameter and results are returned from function calls.
     488Alternative forms of communication, such as call-backs, message passing, channels, or communication ports, step outside of C's normal form of communication.
     489
     490\item
     491All stateful features must follow the same declaration scopes and lifetimes as other language data.
     492For C that means at program startup, during block and function activation, and on demand using dynamic allocation.
     493
     494\item
     495MES must be available implicitly in language constructs as well as explicitly for specialized requirements, because requiring programmers to build MES using low-level locks often leads to incorrect programs.
     496Furthermore, reducing synchronization scope by encapsulating it within language constructs further reduces errors in concurrent programs.
     497
     498\item
     499Both synchronous and asynchronous communication are needed.
     500However, we believe the best way to provide asynchrony, such as call-buffering/chaining and/or returning futures~\cite{multilisp}, is building it from expressive synchronous features.
     501
     502\item
     503Synchronization must be able to control the service order of requests including prioritizing selection from different kinds of outstanding requests, and postponing a request for an unspecified time while continuing to accept new requests.
     504Otherwise, certain concurrency problems are difficult, e.g.\ web server, disk scheduling, and the amount of concurrency is inhibited~\cite{Gentleman81}.
     505\end{itemize}
     506We have satisfied these requirements in \CFA while maintaining backwards compatibility with the huge body of legacy C programs.
     507% In contrast, other new programming languages must still access C programs (\eg operating-system service routines), but do so through fragile C interfaces.
     508
     509
     510\subsection{Asynchronous Await / Call}
     511
     512Asynchronous await/call is a caller mechanism for structuring programs and/or increasing concurrency, where the caller (client) postpones an action into the future, which is subsequently executed by a callee (server).
     513The caller detects the action's completion through a \newterm{future}/\newterm{promise}.
     514The benefit is asynchronous caller execution with respect to the callee until future resolution.
     515For single-threaded languages like JavaScript, an asynchronous call passes a callee action, which is queued in the event-engine, and continues execution with a promise.
     516When the caller needs the promise to be fulfilled, it executes @await@.
     517A promise-completion call-back can be part of the callee action or the caller is rescheduled;
     518in either case, the call back is executed after the promise is fulfilled.
     519While asynchronous calls generate new callee (server) events, we content this mechanism is insufficient for advanced control-flow mechanisms like generators or coroutines (which are discussed next).
     520Specifically, control between caller and callee occurs indirectly through the event-engine precluding direct handoff and cycling among events, and requires complex resolution of a control promise and data.
     521Note, @async-await@ is just syntactic-sugar over the event engine so it does not solve these deficiencies.
     522For multi-threaded languages like Java, the asynchronous call queues a callee action with an executor (server), which subsequently executes the work by a thread in the executor thread-pool.
     523The problem is when concurrent work-units need to interact and/or block as this effects the executor, \eg stops threads.
     524While it is possible to extend this approach to support the necessary mechanisms, \eg message passing in Actors, we show monitors and threads provide an equally competitive approach that does not deviate from normal call communication and can be used to build asynchronous call, as is done in Java.
    382525
    383526
     
    385528\label{s:StatefulFunction}
    386529
    387 The stateful function is an old idea~\cite{Conway63,Marlin80} that is new again~\cite{C++20Coroutine19}, where execution is temporarily suspended and later resumed, \eg plugin, device driver, finite-state machine.
    388 Hence, a stateful function may not end when it returns to its caller, allowing it to be restarted with the data and execution location present at the point of suspension.
    389 This capability is accomplished by retaining a data/execution \emph{closure} between invocations.
    390 If the closure is fixed size, we call it a \emph{generator} (or \emph{stackless}), and its control flow is restricted, \eg suspending outside the generator is prohibited.
    391 If the closure is variable size, we call it a \emph{coroutine} (or \emph{stackful}), and as the names implies, often implemented with a separate stack with no programming restrictions.
    392 Hence, refactoring a stackless coroutine may require changing it to stackful.
    393 A foundational property of all \emph{stateful functions} is that resume/suspend \emph{do not} cause incremental stack growth, \ie resume/suspend operations are remembered through the closure not the stack.
    394 As well, activating a stateful function is \emph{asymmetric} or \emph{symmetric}, identified by resume/suspend (no cycles) and resume/resume (cycles).
    395 A fixed closure activated by modified call/return is faster than a variable closure activated by context switching.
    396 Additionally, any storage management for the closure (especially in unmanaged languages, \ie no garbage collection) must also be factored into design and performance.
    397 Therefore, selecting between stackless and stackful semantics is a tradeoff between programming requirements and performance, where stackless is faster and stackful is more general.
    398 Note, creation cost is amortized across usage, so activation cost is usually the dominant factor.
     530A \emph{stateful function} has the ability to remember state between calls, where state can be either data or execution, \eg plugin, device driver, finite-state machine (FSM).
     531A simple technique to retain data state between calls is @static@ declarations within a function, which is often implemented by hoisting the declarations to the global scope but hiding the names within the function using name mangling.
     532However, each call starts the function at the top making it difficult to determine the last point of execution in an algorithm, and requiring multiple flag variables and testing to reestablish the continuation point.
     533Hence, the next step of generalizing function state is implicitly remembering the return point between calls and reentering the function at this point rather than the top, called \emph{generators}\,/\,\emph{iterators} or \emph{stackless coroutines}.
     534For example, a Fibonacci generator retains data and execution state allowing it to remember prior values needed to generate the next value and the location in the algorithm to compute that value.
     535The next step of generalization is instantiating the function to allow multiple named instances, \eg multiple Fibonacci generators, where each instance has its own state, and hence, can generate an independent sequence of values.
     536Note, a subset of generator state is a function \emph{closure}, \ie the technique of capturing lexical references when returning a nested function.
     537A further generalization is adding a stack to a generator's state, called a \emph{coroutine}, so it can suspend outside of itself, \eg call helper functions to arbitrary depth before suspending back to its resumer without unwinding these calls.
     538For example, a coroutine iterator for a binary tree can stop the traversal at the visit point (pre, infix, post traversal), return the node value to the caller, and then continue the recursive traversal from the current node on the next call.
     539
     540There are two styles of activating a stateful function, \emph{asymmetric} or \emph{symmetric}, identified by resume/suspend (no cycles) and resume/resume (cycles).
     541These styles \emph{do not} cause incremental stack growth, \eg a million resume/suspend or resume/resume cycles do not remember each cycle just the last resumer for each cycle.
     542Selecting between stackless/stackful semantics and asymmetric/symmetric style is a tradeoff between programming requirements, performance, and design, where stackless is faster and smaller (modified call/return between closures), stackful is more general but slower and larger (context switching between distinct stacks), and asymmetric is simpler control-flow than symmetric.
     543Additionally, storage management for the closure/stack (especially in unmanaged languages, \ie no garbage collection) must be factored into design and performance.
     544Note, creation cost (closure/stack) is amortized across usage, so activation cost (resume/suspend) is usually the dominant factor.
     545
     546% The stateful function is an old idea~\cite{Conway63,Marlin80} that is new again~\cite{C++20Coroutine19}, where execution is temporarily suspended and later resumed, \eg plugin, device driver, finite-state machine.
     547% Hence, a stateful function may not end when it returns to its caller, allowing it to be restarted with the data and execution location present at the point of suspension.
     548% If the closure is fixed size, we call it a \emph{generator} (or \emph{stackless}), and its control flow is restricted, \eg suspending outside the generator is prohibited.
     549% If the closure is variable size, we call it a \emph{coroutine} (or \emph{stackful}), and as the names implies, often implemented with a separate stack with no programming restrictions.
     550% Hence, refactoring a stackless coroutine may require changing it to stackful.
     551% A foundational property of all \emph{stateful functions} is that resume/suspend \emph{do not} cause incremental stack growth, \ie resume/suspend operations are remembered through the closure not the stack.
     552% As well, activating a stateful function is \emph{asymmetric} or \emph{symmetric}, identified by resume/suspend (no cycles) and resume/resume (cycles).
     553% A fixed closure activated by modified call/return is faster than a variable closure activated by context switching.
     554% Additionally, any storage management for the closure (especially in unmanaged languages, \ie no garbage collection) must also be factored into design and performance.
     555% Therefore, selecting between stackless and stackful semantics is a tradeoff between programming requirements and performance, where stackless is faster and stackful is more general.
     556% nppNote, creation cost is amortized across usage, so activation cost is usually the dominant factor.
     557
     558For example, Python presents asymmetric generators as a function object, \uC presents symmetric coroutines as a \lstinline[language=C++]|class|-like object, and many languages present threading using function pointers, @pthreads@~\cite{Butenhof97}, \Csharp~\cite{Csharp}, Go~\cite{Go}, and Scala~\cite{Scala}.
     559\begin{center}
     560\begin{tabular}{@{}l|l|l@{}}
     561\multicolumn{1}{@{}c|}{Python asymmetric generator} & \multicolumn{1}{c|}{\uC symmetric coroutine} & \multicolumn{1}{c@{}}{Pthreads thread} \\
     562\hline
     563\begin{python}
     564`def Gen():` $\LstCommentStyle{\color{red}// function}$
     565        ... yield val ...
     566gen = Gen()
     567for i in range( 10 ):
     568        print( next( gen ) )
     569\end{python}
     570&
     571\begin{uC++}
     572`_Coroutine Cycle {` $\LstCommentStyle{\color{red}// class}$
     573        Cycle * p;
     574        void main() { p->cycle(); }
     575        void cycle() { resume(); }  `};`
     576Cycle c1, c2; c1.p=&c2; c2.p=&c1; c1.cycle();
     577\end{uC++}
     578&
     579\begin{cfa}
     580void * rtn( void * arg ) { ... }
     581int i = 3, rc;
     582pthread_t t; $\C{// thread id}$
     583$\LstCommentStyle{\color{red}// function pointer}$
     584rc=pthread_create(&t, `rtn`, (void *)i);
     585\end{cfa}
     586\end{tabular}
     587\end{center}
     588\CFA's preferred presentation model for generators/coroutines/threads is a hybrid of functions and classes, giving an object-oriented flavour.
     589Essentially, the generator/coroutine/thread function is semantically coupled with a generator/coroutine/thread custom type via the type's name.
     590The custom type solves several issues, while accessing the underlying mechanisms used by the custom types is still allowed for flexibility reasons.
     591Each custom type is discussed in detail in the following sections.
     592
     593
     594\subsection{Generator}
     595
     596Stackless generators (Table~\ref{t:ExecutionPropertyComposition} case 3) have the potential to be very small and fast, \ie as small and fast as function call/return for both creation and execution.
     597The \CFA goal is to achieve this performance target, possibly at the cost of some semantic complexity.
     598A series of different kinds of generators and their implementation demonstrate how this goal is accomplished.\footnote{
     599The \CFA operator syntax uses \lstinline|?| to denote operands, which allows precise definitions for pre, post, and infix operators, \eg \lstinline|?++|, \lstinline|++?|, and \lstinline|?+?|, in addition \lstinline|?\{\}| denotes a constructor, as in \lstinline|foo `f` = `\{`...`\}`|, \lstinline|^?\{\}| denotes a destructor, and \lstinline|?()| is \CC function call \lstinline|operator()|.
     600Operator \lstinline+|+ is overloaded for printing, like bit-shift \lstinline|<<| in \CC.
     601The \CFA \lstinline|with| clause opens an aggregate scope making its fields directly accessible, like Pascal \lstinline|with|, but using parallel semantics;
     602multiple aggregates may be opened.
     603\CFA has rebindable references \lstinline|int i, & ip = i, j; `&ip = &j;`| and non-rebindable references \lstinline|int i, & `const` ip = i, j; `&ip = &j;` // disallowed|.
     604}%
    399605
    400606\begin{figure}
     
    410616
    411617
     618
     619
    412620        int fn = f->fn; f->fn = f->fn1;
    413621                f->fn1 = f->fn + fn;
    414622        return fn;
    415 
    416623}
    417624int main() {
     
    432639void `main(Fib & fib)` with(fib) {
    433640
     641
    434642        [fn1, fn] = [1, 0];
    435643        for () {
     
    451659\begin{cfa}[aboveskip=0pt,belowskip=0pt]
    452660typedef struct {
    453         int fn1, fn;  void * `next`;
     661        int `restart`, fn1, fn;
    454662} Fib;
    455 #define FibCtor { 1, 0, NULL }
     663#define FibCtor { `0`, 1, 0 }
    456664Fib * comain( Fib * f ) {
    457         if ( f->next ) goto *f->next;
    458         f->next = &&s1;
     665        `static void * states[] = {&&s0, &&s1};`
     666        `goto *states[f->restart];`
     667  s0: f->`restart` = 1;
    459668        for ( ;; ) {
    460669                return f;
    461670          s1:; int fn = f->fn + f->fn1;
    462                         f->fn1 = f->fn; f->fn = fn;
     671                f->fn1 = f->fn; f->fn = fn;
    463672        }
    464673}
     
    472681\end{lrbox}
    473682
    474 \subfloat[C asymmetric generator]{\label{f:CFibonacci}\usebox\myboxA}
     683\subfloat[C]{\label{f:CFibonacci}\usebox\myboxA}
    475684\hspace{3pt}
    476685\vrule
    477686\hspace{3pt}
    478 \subfloat[\CFA asymmetric generator]{\label{f:CFAFibonacciGen}\usebox\myboxB}
     687\subfloat[\CFA]{\label{f:CFAFibonacciGen}\usebox\myboxB}
    479688\hspace{3pt}
    480689\vrule
    481690\hspace{3pt}
    482 \subfloat[C generator implementation]{\label{f:CFibonacciSim}\usebox\myboxC}
     691\subfloat[C generated code for \CFA version]{\label{f:CFibonacciSim}\usebox\myboxC}
    483692\caption{Fibonacci (output) asymmetric generator}
    484693\label{f:FibonacciAsymmetricGenerator}
     
    493702};
    494703void ?{}( Fmt & fmt ) { `resume(fmt);` } // constructor
    495 void ^?{}( Fmt & f ) with(f) { $\C[1.75in]{// destructor}$
     704void ^?{}( Fmt & f ) with(f) { $\C[2.25in]{// destructor}$
    496705        if ( g != 0 || b != 0 ) sout | nl; }
    497706void `main( Fmt & f )` with(f) {
     
    499708                for ( ; g < 5; g += 1 ) { $\C{// groups}$
    500709                        for ( ; b < 4; b += 1 ) { $\C{// blocks}$
    501                                 `suspend;` $\C{// wait for character}$
    502                                 while ( ch == '\n' ) `suspend;` // ignore
    503                                 sout | ch;                                              // newline
    504                         } sout | " ";  // block spacer
    505                 } sout | nl; // group newline
     710                                do { `suspend;` $\C{// wait for character}$
     711                                while ( ch == '\n' ); // ignore newline
     712                                sout | ch;                      $\C{// print character}$
     713                        } sout | " ";  $\C{// block separator}$
     714                } sout | nl; $\C{// group separator}$
    506715        }
    507716}
     
    521730\begin{cfa}[aboveskip=0pt,belowskip=0pt]
    522731typedef struct {
    523         void * next;
     732        int `restart`, g, b;
    524733        char ch;
    525         int g, b;
    526734} Fmt;
    527735void comain( Fmt * f ) {
    528         if ( f->next ) goto *f->next;
    529         f->next = &&s1;
     736        `static void * states[] = {&&s0, &&s1};`
     737        `goto *states[f->restart];`
     738  s0: f->`restart` = 1;
    530739        for ( ;; ) {
    531740                for ( f->g = 0; f->g < 5; f->g += 1 ) {
    532741                        for ( f->b = 0; f->b < 4; f->b += 1 ) {
    533                                 return;
    534                           s1:;  while ( f->ch == '\n' ) return;
     742                                do { return;  s1: ;
     743                                } while ( f->ch == '\n' );
    535744                                printf( "%c", f->ch );
    536745                        } printf( " " );
     
    539748}
    540749int main() {
    541         Fmt fmt = { NULL };  comain( &fmt ); // prime
     750        Fmt fmt = { `0` };  comain( &fmt ); // prime
    542751        for ( ;; ) {
    543752                scanf( "%c", &fmt.ch );
     
    550759\end{lrbox}
    551760
    552 \subfloat[\CFA asymmetric generator]{\label{f:CFAFormatGen}\usebox\myboxA}
    553 \hspace{3pt}
     761\subfloat[\CFA]{\label{f:CFAFormatGen}\usebox\myboxA}
     762\hspace{35pt}
    554763\vrule
    555764\hspace{3pt}
    556 \subfloat[C generator simulation]{\label{f:CFormatSim}\usebox\myboxB}
     765\subfloat[C generated code for \CFA version]{\label{f:CFormatGenImpl}\usebox\myboxB}
    557766\hspace{3pt}
    558767\caption{Formatter (input) asymmetric generator}
     
    560769\end{figure}
    561770
    562 Stateful functions appear as generators, coroutines, and threads, where presentations are based on function objects or pointers~\cite{Butenhof97, C++14, MS:VisualC++, BoostCoroutines15}.
    563 For example, Python presents generators as a function object:
    564 \begin{python}
    565 def Gen():
    566         ... `yield val` ...
    567 gen = Gen()
    568 for i in range( 10 ):
    569         print( next( gen ) )
    570 \end{python}
    571 Boost presents coroutines in terms of four functor object-types:
    572 \begin{cfa}
    573 asymmetric_coroutine<>::pull_type
    574 asymmetric_coroutine<>::push_type
    575 symmetric_coroutine<>::call_type
    576 symmetric_coroutine<>::yield_type
    577 \end{cfa}
    578 and many languages present threading using function pointers, @pthreads@~\cite{Butenhof97}, \Csharp~\cite{Csharp}, Go~\cite{Go}, and Scala~\cite{Scala}, \eg pthreads:
    579 \begin{cfa}
    580 void * rtn( void * arg ) { ... }
    581 int i = 3, rc;
    582 pthread_t t; $\C{// thread id}$
    583 `rc = pthread_create( &t, rtn, (void *)i );` $\C{// create and initialized task, type-unsafe input parameter}$
    584 \end{cfa}
    585 % void mycor( pthread_t cid, void * arg ) {
    586 %       int * value = (int *)arg;                               $\C{// type unsafe, pointer-size only}$
    587 %       // thread body
    588 % }
    589 % int main() {
    590 %       int input = 0, output;
    591 %       coroutine_t cid = coroutine_create( &mycor, (void *)&input ); $\C{// type unsafe, pointer-size only}$
    592 %       coroutine_resume( cid, (void *)input, (void **)&output ); $\C{// type unsafe, pointer-size only}$
    593 % }
    594 \CFA's preferred presentation model for generators/coroutines/threads is a hybrid of objects and functions, with an object-oriented flavour.
    595 Essentially, the generator/coroutine/thread function is semantically coupled with a generator/coroutine/thread custom type.
    596 The custom type solves several issues, while accessing the underlying mechanisms used by the custom types is still allowed.
    597 
    598 
    599 \subsection{Generator}
    600 
    601 Stackless generators have the potential to be very small and fast, \ie as small and fast as function call/return for both creation and execution.
    602 The \CFA goal is to achieve this performance target, possibly at the cost of some semantic complexity.
    603 A series of different kinds of generators and their implementation demonstrate how this goal is accomplished.
    604 
    605 Figure~\ref{f:FibonacciAsymmetricGenerator} shows an unbounded asymmetric generator for an infinite sequence of Fibonacci numbers written in C and \CFA, with a simple C implementation for the \CFA version.
     771Figure~\ref{f:FibonacciAsymmetricGenerator} shows an unbounded asymmetric generator for an infinite sequence of Fibonacci numbers written (left to right) in C, \CFA, and showing the underlying C implementation for the \CFA version.
    606772This generator is an \emph{output generator}, producing a new result on each resumption.
    607773To compute Fibonacci, the previous two values in the sequence are retained to generate the next value, \ie @fn1@ and @fn@, plus the execution location where control restarts when the generator is resumed, \ie top or middle.
     
    611777The C version only has the middle execution state because the top execution state is declaration initialization.
    612778Figure~\ref{f:CFAFibonacciGen} shows the \CFA approach, which also has a manual closure, but replaces the structure with a custom \CFA @generator@ type.
    613 This generator type is then connected to a function that \emph{must be named \lstinline|main|},\footnote{
    614 The name \lstinline|main| has special meaning in C, specifically the function where a program starts execution.
    615 Hence, overloading this name for other starting points (generator/coroutine/thread) is a logical extension.}
    616 called a \emph{generator main},which takes as its only parameter a reference to the generator type.
     779Each generator type must have a function named \lstinline|main|,
     780% \footnote{
     781% The name \lstinline|main| has special meaning in C, specifically the function where a program starts execution.
     782% Leveraging starting semantics to this name for generator/coroutine/thread is a logical extension.}
     783called a \emph{generator main} (leveraging the starting semantics for program @main@ in C), which is connected to the generator type via its single reference parameter.
    617784The generator main contains @suspend@ statements that suspend execution without ending the generator versus @return@.
    618 For the Fibonacci generator-main,\footnote{
    619 The \CFA \lstinline|with| opens an aggregate scope making its fields directly accessible, like Pascal \lstinline|with|, but using parallel semantics.
    620 Multiple aggregates may be opened.}
     785For the Fibonacci generator-main,
    621786the top initialization state appears at the start and the middle execution state is denoted by statement @suspend@.
    622787Any local variables in @main@ \emph{are not retained} between calls;
     
    627792Resuming an ended (returned) generator is undefined.
    628793Function @resume@ returns its argument generator so it can be cascaded in an expression, in this case to print the next Fibonacci value @fn@ computed in the generator instance.
    629 Figure~\ref{f:CFibonacciSim} shows the C implementation of the \CFA generator only needs one additional field, @next@, to handle retention of execution state.
    630 The computed @goto@ at the start of the generator main, which branches after the previous suspend, adds very little cost to the resume call.
    631 Finally, an explicit generator type provides both design and performance benefits, such as multiple type-safe interface functions taking and returning arbitrary types.\footnote{
    632 The \CFA operator syntax uses \lstinline|?| to denote operands, which allows precise definitions for pre, post, and infix operators, \eg \lstinline|++?|, \lstinline|?++|, and \lstinline|?+?|, in addition \lstinline|?\{\}| denotes a constructor, as in \lstinline|foo `f` = `\{`...`\}`|, \lstinline|^?\{\}| denotes a destructor, and \lstinline|?()| is \CC function call \lstinline|operator()|.
    633 }%
     794Figure~\ref{f:CFibonacciSim} shows the C implementation of the \CFA asymmetric generator.
     795Only one execution-state field, @restart@, is needed to subscript the suspension points in the generator.
     796At the start of the generator main, the @static@ declaration, @states@, is initialized to the N suspend points in the generator (where operator @&&@ dereferences/references a label~\cite{gccValueLabels}).
     797Next, the computed @goto@ selects the last suspend point and branches to it.
     798The  cost of setting @restart@ and branching via the computed @goto@ adds very little cost to the suspend/resume calls.
     799
     800An advantage of the \CFA explicit generator type is the ability to allow multiple type-safe interface functions taking and returning arbitrary types.
    634801\begin{cfa}
    635802int ?()( Fib & fib ) { return `resume( fib )`.fn; } $\C[3.9in]{// function-call interface}$
    636 int ?()( Fib & fib, int N ) { for ( N - 1 ) `fib()`; return `fib()`; } $\C{// use function-call interface to skip N values}$
    637 double ?()( Fib & fib ) { return (int)`fib()` / 3.14159; } $\C{// different return type, cast prevents recursive call}\CRT$
    638 sout | (int)f1() | (double)f1() | f2( 2 ); // alternative interface, cast selects call based on return type, step 2 values
     803int ?()( Fib & fib, int N ) { for ( N - 1 ) `fib()`; return `fib()`; } $\C{// add parameter to skip N values}$
     804double ?()( Fib & fib ) { return (int)`fib()` / 3.14159; } $\C{// different return type, cast prevents recursive call}$
     805Fib f;  int i;  double d;
     806i = f();  i = f( 2 );  d = f();                                         $\C{// alternative interfaces}\CRT$
    639807\end{cfa}
    640808Now, the generator can be a separately compiled opaque-type only accessed through its interface functions.
    641809For contrast, Figure~\ref{f:PythonFibonacci} shows the equivalent Python Fibonacci generator, which does not use a generator type, and hence only has a single interface, but an implicit closure.
    642810
    643 Having to manually create the generator closure by moving local-state variables into the generator type is an additional programmer burden.
    644 (This restriction is removed by the coroutine in Section~\ref{s:Coroutine}.)
    645 This requirement follows from the generality of variable-size local-state, \eg local state with a variable-length array requires dynamic allocation because the array size is unknown at compile time.
     811\begin{figure}
     812%\centering
     813\newbox\myboxA
     814\begin{lrbox}{\myboxA}
     815\begin{python}[aboveskip=0pt,belowskip=0pt]
     816def Fib():
     817        fn1, fn = 0, 1
     818        while True:
     819                `yield fn1`
     820                fn1, fn = fn, fn1 + fn
     821f1 = Fib()
     822f2 = Fib()
     823for i in range( 10 ):
     824        print( next( f1 ), next( f2 ) )
     825
     826
     827
     828
     829
     830
     831
     832
     833
     834
     835\end{python}
     836\end{lrbox}
     837
     838\newbox\myboxB
     839\begin{lrbox}{\myboxB}
     840\begin{python}[aboveskip=0pt,belowskip=0pt]
     841def Fmt():
     842        try:
     843                while True:                                             $\C[2.5in]{\# until destructor call}$
     844                        for g in range( 5 ):            $\C{\# groups}$
     845                                for b in range( 4 ):    $\C{\# blocks}$
     846                                        while True:
     847                                                ch = (yield)    $\C{\# receive from send}$
     848                                                if '\n' not in ch: $\C{\# ignore newline}$
     849                                                        break
     850                                        print( ch, end='' )     $\C{\# print character}$
     851                                print( '  ', end='' )   $\C{\# block separator}$
     852                        print()                                         $\C{\# group separator}$
     853        except GeneratorExit:                           $\C{\# destructor}$
     854                if g != 0 | b != 0:                             $\C{\# special case}$
     855                        print()
     856fmt = Fmt()
     857`next( fmt )`                                                   $\C{\# prime, next prewritten}$
     858for i in range( 41 ):
     859        `fmt.send( 'a' );`                                      $\C{\# send to yield}$
     860\end{python}
     861\end{lrbox}
     862
     863\hspace{30pt}
     864\subfloat[Fibonacci]{\label{f:PythonFibonacci}\usebox\myboxA}
     865\hspace{3pt}
     866\vrule
     867\hspace{3pt}
     868\subfloat[Formatter]{\label{f:PythonFormatter}\usebox\myboxB}
     869\caption{Python generator}
     870\label{f:PythonGenerator}
     871\end{figure}
     872
     873Having to manually create the generator closure by moving local-state variables into the generator type is an additional programmer burden (removed by the coroutine in Section~\ref{s:Coroutine}).
     874This manual requirement follows from the generality of allowing variable-size local-state, \eg local state with a variable-length array requires dynamic allocation as the array size is unknown at compile time.
    646875However, dynamic allocation significantly increases the cost of generator creation/destruction and is a showstopper for embedded real-time programming.
    647876But more importantly, the size of the generator type is tied to the local state in the generator main, which precludes separate compilation of the generator main, \ie a generator must be inlined or local state must be dynamically allocated.
    648 With respect to safety, we believe static analysis can discriminate local state from temporary variables in a generator, \ie variable usage spanning @suspend@, and generate a compile-time error.
    649 Finally, our current experience is that most generator problems have simple data state, including local state, but complex execution state, so the burden of creating the generator type is small.
     877With respect to safety, we believe static analysis can discriminate persistent generator state from temporary generator-main state and raise a compile-time error for temporary usage spanning suspend points.
     878Our experience using generators is that the problems have simple data state, including local state, but complex execution state, so the burden of creating the generator type is small.
    650879As well, C programmers are not afraid of this kind of semantic programming requirement, if it results in very small, fast generators.
    651880
     
    669898The example takes advantage of resuming a generator in the constructor to prime the loops so the first character sent for formatting appears inside the nested loops.
    670899The destructor provides a newline, if formatted text ends with a full line.
    671 Figure~\ref{f:CFormatSim} shows the C implementation of the \CFA input generator with one additional field and the computed @goto@.
    672 For contrast, Figure~\ref{f:PythonFormatter} shows the equivalent Python format generator with the same properties as the Fibonacci generator.
    673 
    674 Figure~\ref{f:DeviceDriverGen} shows a \emph{killer} asymmetric generator, a device-driver, because device drivers caused 70\%-85\% of failures in Windows/Linux~\cite{Swift05}.
    675 Device drives follow the pattern of simple data state but complex execution state, \ie finite state-machine (FSM) parsing a protocol.
    676 For example, the following protocol:
     900Figure~\ref{f:CFormatGenImpl} shows the C implementation of the \CFA input generator with one additional field and the computed @goto@.
     901For contrast, Figure~\ref{f:PythonFormatter} shows the equivalent Python format generator with the same properties as the format generator.
     902
     903% https://dl-acm-org.proxy.lib.uwaterloo.ca/
     904
     905Figure~\ref{f:DeviceDriverGen} shows an important application for an asymmetric generator, a device-driver, because device drivers are a significant source of operating-system errors: 85\% in Windows XP~\cite[p.~78]{Swift05} and 51.6\% in Linux~\cite[p.~1358,]{Xiao19}. %\cite{Palix11}
     906Swift \etal~\cite[p.~86]{Swift05} restructure device drivers using the Extension Procedure Call (XPC) within the kernel via functions @nooks_driver_call@ and @nooks_kernel_call@, which have coroutine properties context switching to separate stacks with explicit hand-off calls;
     907however, the calls do not retain execution state, and hence always start from the top.
     908The alternative approach for implementing device drivers is using stack-ripping.
     909However, Adya \etal~\cite{Adya02} argue against stack ripping in Section 3.2 and suggest a hybrid approach in Section 4 using cooperatively scheduled \emph{fibers}, which is coroutining.
     910
     911As an example, the following protocol:
    677912\begin{center}
    678913\ldots\, STX \ldots\, message \ldots\, ESC ETX \ldots\, message \ldots\, ETX 2-byte crc \ldots
    679914\end{center}
    680 is a network message beginning with the control character STX, ending with an ETX, and followed by a 2-byte cyclic-redundancy check.
     915is for a simple network message beginning with the control character STX, ending with an ETX, and followed by a 2-byte cyclic-redundancy check.
    681916Control characters may appear in a message if preceded by an ESC.
    682917When a message byte arrives, it triggers an interrupt, and the operating system services the interrupt by calling the device driver with the byte read from a hardware register.
    683 The device driver returns a status code of its current state, and when a complete message is obtained, the operating system knows the message is in the message buffer.
    684 Hence, the device driver is an input/output generator.
    685 
    686 Note, the cost of creating and resuming the device-driver generator, @Driver@, is virtually identical to call/return, so performance in an operating-system kernel is excellent.
    687 As well, the data state is small, where variables @byte@ and @msg@ are communication variables for passing in message bytes and returning the message, and variables @lnth@, @crc@, and @sum@ are local variable that must be retained between calls and are manually hoisted into the generator type.
    688 % Manually, detecting and hoisting local-state variables is easy when the number is small.
    689 In contrast, the execution state is large, with one @resume@ and seven @suspend@s.
    690 Hence, the key benefits of the generator are correctness, safety, and maintenance because the execution states are transcribed directly into the programming language rather than using a table-driven approach.
    691 Because FSMs can be complex and frequently occur in important domains, direct generator support is important in a system programming language.
     918The device driver returns a status code of its current state, and when a complete message is obtained, the operating system read the message accumulated in the supplied buffer.
     919Hence, the device driver is an input/output generator, where the cost of resuming the device-driver generator is the same as call/return, so performance in an operating-system kernel is excellent.
     920The key benefits of using a generator are correctness, safety, and maintenance because the execution states are transcribed directly into the programming language rather than table lookup or stack ripping.
     921The conclusion is that FSMs are complex and occur in important domains, so direct generator support is important in a system programming language.
    692922
    693923\begin{figure}
    694924\centering
    695 \newbox\myboxA
    696 \begin{lrbox}{\myboxA}
    697