Changes in / [6a9d4b4:933f32f]


Ignore:
Files:
152 added
18 deleted
224 edited

Legend:

Unmodified
Added
Removed
  • .gitignore

    r6a9d4b4 r933f32f  
    1010config.py
    1111stamp-h1
     12libtool
    1213/Makefile
    1314**/Makefile
     
    4950libcfa/arm-nolib/
    5051
    51 
    5252# generated by bison and lex from parser.yy and lex.ll
    5353src/Parser/parser.output
  • Jenkins/FullBuild

    r6a9d4b4 r933f32f  
    1717
    1818                                parallel (
    19                                         gcc_6_x64: { trigger_build( 'gcc-6',   'x64', true ) },
    20                                         gcc_6_x86: { trigger_build( 'gcc-6',   'x86', true ) },
    21                                         gcc_5_x64: { trigger_build( 'gcc-5',   'x64', false ) },
    22                                         gcc_5_x86: { trigger_build( 'gcc-5',   'x86', false ) },
    23                                         clang_x64: { trigger_build( 'clang',   'x64', false ) },
    24                                         clang_x86: { trigger_build( 'clang',   'x86', false ) },
     19                                        gcc_6_x64: { trigger_build( 'gcc-6',   'x64' ) },
     20                                        gcc_6_x86: { trigger_build( 'gcc-6',   'x86' ) },
     21                                        gcc_5_x64: { trigger_build( 'gcc-5',   'x64' ) },
     22                                        gcc_5_x86: { trigger_build( 'gcc-5',   'x86' ) },
     23                                        clang_x64: { trigger_build( 'clang',   'x64' ) },
     24                                        clang_x86: { trigger_build( 'clang',   'x86' ) },
    2525                                )
    2626                        }
     27                }
    2728
    28                         //Push latest changes to do-lang repo
    29                         push_build()
    30                 }
     29                promote_email(true)
    3130        }
    3231
     
    4342
    4443                //Send email to notify the failure
    45                 promote_failure_email()
     44                promote_email(false)
    4645        }
    4746
     
    5756//===========================================================================================================
    5857
    59 def trigger_build(String cc, String arch, Boolean publish) {
     58def trigger_build(String cc, String arch) {
    6059        def result = build job: 'Cforall/master',               \
    6160                parameters: [                                           \
     
    7776                        [$class: 'BooleanParameterValue',               \
    7877                          name: 'Publish',                              \
    79                           value: publish],                              \
     78                          value: true],                                 \
    8079                        [$class: 'BooleanParameterValue',               \
    8180                          name: 'Silent',                               \
     
    8988                sh("wget -q -O - http://localhost:8084/jenkins/job/Cforall/job/master/${result.number}/consoleText")
    9089                error(result.result)
    91         }
    92 }
    93 
    94 def push_build() {
    95         //Don't use the build_stage function which outputs the compiler
    96         stage('Push') {
    97 
    98                 status_prefix = 'Push'
    99 
    100                 def out_dir = pwd tmp: true
    101                 sh "mkdir -p ${out_dir}"
    102 
    103                 //checkout the code to make sure this is a valid git repo
    104                 checkout scm
    105 
    106                 collect_git_info()
    107 
    108                 //parse git logs to find what changed
    109                 sh "git remote > ${out_dir}/GIT_REMOTE"
    110                 git_remote = readFile("${out_dir}/GIT_REMOTE")
    111                 remoteDoLangExists = git_remote.contains("DoLang")
    112 
    113                 if( !remoteDoLangExists ) {
    114                         sh 'git remote add DoLang git@gitlab.do-lang.org:internal/cfa-cc.git'
    115                 }
    116 
    117                 //sh "GIT_SSH_COMMAND=\"ssh -v\" git push DoLang ${gitRefNewValue}:master"
    118                 echo('BUILD NOT PUSH SINCE DO-LANG SERVER WAS DOWN')
    11990        }
    12091}
     
    141112
    142113//Email notification on a full build failure
    143 def promote_failure_email() {
     114def promote_email(boolean success) {
    144115        echo('notifying users')
     116
     117        def result = success ? "PROMOTE - SUCCESS" : "PROMOTE - FAILURE"
    145118
    146119        //Since tokenizer doesn't work, figure stuff out from the environnement variables and command line
    147120        //Configurations for email format
    148         def email_subject = "[cforall git][PROMOTE - FAILURE]"
    149         def email_body = """This is an automated email from the Jenkins build machine. It was
    150 generated because of a git hooks/post-receive script following
    151 a ref change was pushed to the repository containing
    152 the project "UNNAMED PROJECT".
     121        def email_subject = "[cforall git][${result}]"
     122        def email_body = """<p>This is an automated email from the Jenkins build machine. It was
     123generated following the result of the C\u2200 nightly build.</p>
    153124
    154 Check console output at ${env.BUILD_URL} to view the results.
     125<p>Check console output at ${env.BUILD_URL} to view the results.</p>
    155126
    156 - Status --------------------------------------------------------------
     127<p>- Status --------------------------------------------------------------</p>
    157128
    158 PROMOTE FAILURE
     129<p>${result}</p>
     130
     131<p>- Performance ---------------------------------------------------------</p>
     132
     133<img src="https://cforall.uwaterloo.ca/jenkins/job/Cforall/job/master/plot/Compilation/getPlot?index=0" >
     134
     135<p>- Logs ----------------------------------------------------------------</p>
    159136"""
    160137
     
    162139
    163140        //send email notification
    164         emailext body: email_body, subject: email_subject, to: email_to, attachLog: true
     141        emailext body: email_body, subject: email_subject, to: email_to, attachLog: !success
    165142}
  • Jenkinsfile

    r6a9d4b4 r933f32f  
    11#!groovy
     2
     3import groovy.transform.Field
    24
    35//===========================================================================================================
     
    2224                wrap([$class: 'TimestamperBuildWrapper']) {
    2325
    24                         notify_server(0)
    25 
    2626                        Settings = prepare_build()
    2727
     
    3434                                checkout()
    3535
    36                                 notify_server(0)
    37 
    3836                                build()
    3937
     
    5048                        BuildDir  = pwd tmp: true
    5149                        SrcDir    = pwd tmp: false
    52 
    53                         notify_server(45)
    5450                }
    5551        }
     
    7268        finally {
    7369                //Send email with final results if this is not a full build
    74                 if( Settings && !Settings.Silent ) {
    75                         email(log_needed, Settings.IsSandbox)
    76                 }
     70                email(log_needed)
    7771
    7872                echo 'Build Completed'
     
    116110                        //Also specify the compiler by hand
    117111                        targets=""
    118                         if( Settings.RunAllTests ) {
     112                        if( Settings.RunAllTests || Settings.RunBenchmark ) {
    119113                                targets="--with-target-hosts='host:debug,host:nodebug'"
    120114                        } else {
     
    153147                dir (BuildDir) {
    154148                        //Append bench results
    155                         sh "${SrcDir}/benchmark/jenkins.sh ${Settings.GitNewRef} ${Settings.Architecture} ${BuildDir}/bench.json"
     149                        sh "make --no-print-directory -C benchmark jenkins"
    156150                }
    157151        }
     
    176170        build_stage('Publish') {
    177171
    178                 if( !Settings.Publish ) return
     172                if( Settings.Publish && !Settings.RunBenchmark ) { echo 'No results to publish!!!' }
     173
     174                def groupCompile = new PlotGroup('Compilation', 'seconds', true)
     175                def groupConcurrency = new PlotGroup('Concurrency', 'nanoseconds', false)
    179176
    180177                //Then publish the results
    181                 sh 'curl --silent --show-error -H \'Content-Type: application/json\' --data @${BuildDir}/bench.json https://cforall.uwaterloo.ca:8082/jenkins/publish > /dev/null || true'
     178                do_plot(Settings.RunBenchmark && Settings.Publish, 'compile'  , groupCompile    , 'Compilation')
     179                do_plot(Settings.RunBenchmark && Settings.Publish, 'ctxswitch', groupConcurrency, 'Context Switching')
     180                do_plot(Settings.RunBenchmark && Settings.Publish, 'mutex'    , groupConcurrency, 'Mutual Exclusion')
     181                do_plot(Settings.RunBenchmark && Settings.Publish, 'signal'   , groupConcurrency, 'Internal and External Scheduling')
    182182        }
    183183}
     
    196196
    197197        return """
     198<pre>
    198199The branch ${env.BRANCH_NAME} has been updated.
    199200${gitUpdate}
    200 
    201 Check console output at ${env.BUILD_URL} to view the results.
    202 
    203 - Status --------------------------------------------------------------
    204 
    205 BUILD# ${env.BUILD_NUMBER} - ${currentBuild.result}
    206 
    207 - Log -----------------------------------------------------------------
     201</pre>
     202
     203<p>Check console output at ${env.BUILD_URL} to view the results.</p>
     204
     205<p>- Status --------------------------------------------------------------</p>
     206
     207<p>BUILD# ${env.BUILD_NUMBER} - ${currentBuild.result}</p>
     208
     209<p>- Log -----------------------------------------------------------------</p>
     210
     211<pre>
    208212${gitLog}
    209 -----------------------------------------------------------------------
     213</pre>
     214
     215<p>-----------------------------------------------------------------------</p>
     216<pre>
    210217Summary of changes:
    211218${gitDiff}
     219</pre>
    212220"""
    213221}
    214222
    215223//Standard build email notification
    216 def email(boolean log, boolean bIsSandbox) {
     224def email(boolean log) {
    217225        //Since tokenizer doesn't work, figure stuff out from the environnement variables and command line
    218226        //Configurations for email format
     
    221229        def project_name = (env.JOB_NAME =~ /(.+)\/.+/)[0][1].toLowerCase()
    222230        def email_subject = "[${project_name} git][BUILD# ${env.BUILD_NUMBER} - ${currentBuild.result}] - branch ${env.BRANCH_NAME}"
    223         def email_body = """This is an automated email from the Jenkins build machine. It was
     231        def email_body = """<p>This is an automated email from the Jenkins build machine. It was
    224232generated because of a git hooks/post-receive script following
    225 a ref change which was pushed to the Cforall repository.
     233a ref change which was pushed to the C\u2200 repository.</p>
    226234""" + GitLogMessage()
    227235
    228         def email_to = "cforall@lists.uwaterloo.ca"
    229 
    230         if( Settings && !Settings.IsSandbox ) {
     236        def email_to = !Settings.IsSandbox ? "cforall@lists.uwaterloo.ca" : "tdelisle@uwaterloo.ca"
     237
     238        if( Settings && !Settings.Silent ) {
    231239                //send email notification
    232240                emailext body: email_body, subject: email_subject, to: email_to, attachLog: log
     
    311319                }
    312320
     321                this.IsSandbox          = (branch == "jenkins-sandbox")
    313322                this.RunAllTests        = param.RunAllTests
    314323                this.RunBenchmark       = param.RunBenchmark
     
    316325                this.Publish            = param.Publish
    317326                this.Silent             = param.Silent
    318                 this.IsSandbox          = (branch == "jenkins-sandbox")
    319327
    320328                def full = param.RunAllTests ? " (Full)" : ""
     
    333341                this.GitNewRef = ''
    334342                this.GitOldRef = ''
     343        }
     344}
     345
     346class PlotGroup implements Serializable {
     347        public String name
     348        public String unit
     349        public boolean log
     350
     351        PlotGroup(String name, String unit, boolean log) {
     352                this.name = name
     353                this.unit = unit
     354                this.log = log
    335355        }
    336356}
     
    398418}
    399419
    400 def notify_server(int wait) {
    401         sh """curl --silent --show-error --data "wait=${wait}" -X POST https://cforall.uwaterloo.ca:8082/jenkins/notify > /dev/null || true"""
    402         return
    403 }
    404 
    405420def make_doc() {
    406421        def err = null
     
    417432        }
    418433}
     434
     435def do_plot(boolean new_data, String file, PlotGroup group, String title) {
     436
     437        if(new_data) {
     438                echo "Publishing new data"
     439        }
     440
     441        def series = new_data ? [[
     442                                file: "${file}.csv",
     443                                exclusionValues: '',
     444                                displayTableFlag: false,
     445                                inclusionFlag: 'OFF',
     446                                url: ''
     447                        ]] : [];
     448
     449        echo "file is ${BuildDir}/benchmark/${file}.csv, group ${group}, title ${title}"
     450        dir("${BuildDir}/benchmark/") {
     451                plot csvFileName: "cforall-${env.BRANCH_NAME}-${file}.csv",
     452                        csvSeries: series,
     453                        group: "${group.name}",
     454                        title: "${title}",
     455                        style: 'lineSimple',
     456                        exclZero: false,
     457                        keepRecords: false,
     458                        logarithmic: group.log,
     459                        numBuilds: '120',
     460                        useDescr: true,
     461                        yaxis: group.unit,
     462                        yaxisMaximum: '',
     463                        yaxisMinimum: ''
     464        }
     465}
  • Makefile.am

    r6a9d4b4 r933f32f  
    1111## Created On       : Sun May 31 22:14:18 2015
    1212## Last Modified By : Peter A. Buhr
    13 ## Last Modified On : Wed Dec 14 14:20:48 2016
    14 ## Update Count     : 15
     13## Last Modified On : Sat Feb  2 16:54:42 2019
     14## Update Count     : 21
    1515###############################################################################
    1616
     
    1818ACLOCAL_AMFLAGS  = -I automake
    1919
    20 MAINTAINERCLEANFILES = lib/* bin/* tests/.deps/* tests/.out/*
    21  # order important
     20MAINTAINERCLEANFILES = lib/* bin/* tests/.deps/* tests/.out/* # order important
    2221
    2322SUBDIRS = driver src . @LIBCFA_TARGET_DIRS@
  • Makefile.in

    r6a9d4b4 r933f32f  
    250250distcleancheck_listfiles = find . -type f -print
    251251ACLOCAL = @ACLOCAL@
    252 ALLOCA = @ALLOCA@
    253252AMTAR = @AMTAR@
    254253AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
     
    396395AUTOMAKE_OPTIONS = foreign    # do not require all the GNU file names
    397396ACLOCAL_AMFLAGS = -I automake
    398 MAINTAINERCLEANFILES = lib/* bin/* tests/.deps/* tests/.out/*
     397MAINTAINERCLEANFILES = lib/* bin/* tests/.deps/* tests/.out/* # order important
    399398SUBDIRS = driver src . @LIBCFA_TARGET_DIRS@
    400399noinst_DATA = @LIBCFA_TARGET_MAKEFILES@
     
    928927.PRECIOUS: Makefile
    929928
    930  # order important
    931929
    932930@LIBCFA_TARGET_MAKEFILES@ : Makefile $(srcdir)/libcfa/configure
  • automake/cfa.m4

    r6a9d4b4 r933f32f  
    8080        esac
    8181])
     82
     83# http://git.savannah.gnu.org/gitweb/?p=autoconf-archive.git;a=blob_plain;f=m4/ax_check_compile_flag.m4
     84AC_DEFUN([M4CFA_CHECK_COMPILE_FLAG],
     85[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF
     86AS_VAR_PUSHDEF([CACHEVAR],[m4cfa_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
     87AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
     88        m4cfa_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
     89        _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
     90        AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])],
     91                [AS_VAR_SET(CACHEVAR,[yes])],
     92                [AS_VAR_SET(CACHEVAR,[no])])
     93        _AC_LANG_PREFIX[]FLAGS=$m4cfa_check_save_flags])
     94AS_VAR_IF(CACHEVAR,yes,
     95        [m4_default([$2], :)],
     96        [m4_default([$3], :)])
     97AS_VAR_POPDEF([CACHEVAR])dnl
     98])dnl M4CFA_CHECK_COMPILE_FLAGS
  • benchmark/Makefile.am

    r6a9d4b4 r933f32f  
    2121include $(top_srcdir)/src/cfa.make
    2222
    23 
    24 
    25 AM_CFLAGS = -O2 -Wall -I$(srcdir) -lrt -pthread
    26 AM_CFAFLAGS = -quiet -in-tree -nodebug
    27 AM_UPPFLAGS = -quiet -nodebug -multi
     23AM_CFLAGS = -O2 -Wall -Wextra -Werror -I$(srcdir) -lrt -pthread
     24AM_CFAFLAGS = -quiet -nodebug -in-tree
     25AM_UPPFLAGS = -quiet -nodebug -multi -std=c++14
     26
     27BENCH_V_CC = $(__bench_v_CC_$(__quiet))
     28BENCH_V_CFA = $(__bench_v_CFA_$(__quiet))
     29BENCH_V_CXX = $(__bench_v_CXX_$(__quiet))
     30BENCH_V_GOC = $(__bench_v_GOC_$(__quiet))
     31BENCH_V_JAVAC = $(__bench_v_JAVAC_$(__quiet))
     32BENCH_V_UPP = $(__bench_v_UPP_$(__quiet))
     33
     34__quiet = verbose
     35__bench_v_CC_quiet = @
     36__bench_v_CFA_quiet = @
     37__bench_v_CXX_quiet = @
     38__bench_v_GOC_quiet = @
     39__bench_v_JAVAC_quiet = @
     40__bench_v_UPP_quiet = @
     41__bench_v_CC_verbose = $(AM_V_CC)
     42__bench_v_CFA_verbose = $(AM_V_CFA)
     43__bench_v_CXX_verbose = $(AM_V_CXX)
     44__bench_v_GOC_verbose = $(AM_V_GOC)
     45__bench_v_JAVAC_verbose = $(AM_V_JAVAC)
     46__bench_v_UPP_verbose = $(AM_V_UPP)
     47
     48
    2849
    2950TOOLSDIR = ${abs_top_builddir}/tools/
     
    4667
    4768.NOTPARALLEL:
     69.PHONY: compile.csv ctxswitch.csv mutex.csv signal.csv
    4870
    4971## =========================================================================================================
     
    6082
    6183%.runquiet :
    62         @+make $(basename $@) CFLAGS="-w"
     84        @+make $(basename $@) CFLAGS="-w" __quiet=quiet
    6385        @taskset -c 1 ./a.out
    6486        @rm -f a.out
     
    7395## =========================================================================================================
    7496
     97FIX_NEW_LINES = cat $@ | tr "\n" "\t" | sed -r 's/\t,/,/' | tr "\t" "\n" > $@
     98
    7599jenkins$(EXEEXT):
    76         @echo "{"
    77         @echo -e '\t"githash": "'${githash}'",'
    78         @echo -e '\t"arch": "'   ${arch}   '",'
    79100@DOifskipcompile@
    80         @echo -e '\t"compile": {'
    81         @+make compile TIME_FORMAT='%e,' PRINT_FORMAT='\t\t\"%s\" :'
    82         @echo -e '\t\t"dummy" : {}'
    83         @echo -e '\t},'
     101        @+make compile.csv
    84102@DOendif@
    85         @echo -e '\t"ctxswitch": {'
    86         @echo -en '\t\t"coroutine":'
    87         @+make ctxswitch-cfa_coroutine.runquiet
    88         @echo -en '\t\t,"thread":'
    89         @+make ctxswitch-cfa_thread.runquiet
    90         @echo -e '\t},'
    91         @echo -e '\t"mutex": ['
    92         @echo -en '\t\t'
    93         @+make mutex-cfa1.runquiet
    94         @echo -en '\t\t,'
    95         @+make mutex-cfa2.runquiet
    96         @echo -e '\t],'
    97         @echo -e '\t"scheduling": ['
    98         @echo -en '\t\t'
    99         @+make signal-cfa1.runquiet
    100         @echo -en '\t\t,'
    101         @+make signal-cfa2.runquiet
    102         @echo -en '\t\t,'
    103         @+make waitfor-cfa1.runquiet
    104         @echo -en '\t\t,'
    105         @+make waitfor-cfa2.runquiet
    106         @echo -e '\n\t],'
    107         @echo -e '\t"epoch": ' $(shell date +%s)
    108         @echo "}"
     103        @+make ctxswitch.csv
     104        @+make mutex.csv
     105        @+make signal.csv
     106@DOifskipcompile@
     107        @cat compile.csv
     108@DOendif@
     109        @cat ctxswitch.csv
     110        @cat mutex.csv
     111        @cat signal.csv
     112
     113compile.csv:
     114        @echo "array,attributes,empty,expression,io,monitor,operators,typeof" > $@
     115        @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-array.make >> $@
     116        @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-attributes.make >> $@
     117        @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-empty.make >> $@
     118        @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-expression.make >> $@
     119        @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-io.make >> $@
     120        @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-monitor.make >> $@
     121        @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-operators.make >> $@
     122        @+make TIME_FORMAT='%e' PRINT_FORMAT='' compile-typeof.make >> $@
     123        @$(srcdir)/fixcsv.sh $@
     124
     125ctxswitch.csv:
     126        @echo "coroutine,thread" > $@
     127        @+make ctxswitch-cfa_coroutine.runquiet >> $@ && echo -n ',' >> $@
     128        @+make ctxswitch-cfa_thread.runquiet >> $@
     129        @$(srcdir)/fixcsv.sh $@
     130
     131mutex.csv:
     132        @echo "1-monitor,2-monitor" > $@
     133        @+make mutex-cfa1.runquiet >> $@ && echo -n ',' >> $@
     134        @+make mutex-cfa2.runquiet >> $@
     135        @$(srcdir)/fixcsv.sh $@
     136
     137signal.csv:
     138        @echo "signal-1,signal-2,waitfor-1,waitfor-2" > $@
     139        @+make signal-cfa1.runquiet >> $@ && echo -n ',' >> $@
     140        @+make signal-cfa2.runquiet >> $@ && echo -n ',' >> $@
     141        @+make waitfor-cfa1.runquiet >> $@ && echo -n ',' >> $@
     142        @+make waitfor-cfa2.runquiet >> $@
     143        @$(srcdir)/fixcsv.sh $@
    109144
    110145## =========================================================================================================
    111146loop$(EXEEXT):
    112         $(AM_V_CC)$(COMPILE) -DBENCH_N=5000000000 $(srcdir)/loop.c
     147        $(BENCH_V_CC)$(COMPILE) -DBENCH_N=5000000000 $(srcdir)/loop.c
    113148
    114149function$(EXEEXT):
    115         $(AM_V_CC)$(COMPILE) -DBENCH_N=5000000000 $(srcdir)/function.c
     150        $(BENCH_V_CC)$(COMPILE) -DBENCH_N=5000000000 $(srcdir)/function.c
    116151
    117152fetch_add$(EXEEXT):
    118         $(AM_V_CC)$(COMPILE) -DBENCH_N=500000000  $(srcdir)/fetch_add.c
     153        $(BENCH_V_CC)$(COMPILE) -DBENCH_N=500000000  $(srcdir)/fetch_add.c
     154
     155tls-fetch_add$(EXEEXT):
     156        $(BENCH_V_CC)$(COMPILE) -DBENCH_N=500000000  $(srcdir)/tls-fetch_add.c
    119157
    120158## =========================================================================================================
     
    123161        function.run                    \
    124162        fetch_add.run                   \
     163        tls-fetch_add.run                       \
    125164        ctxswitch-pthread.run           \
    126165        ctxswitch-cfa_coroutine.run     \
     
    139178
    140179ctxswitch-kos_fibre$(EXEEXT):
    141         $(AM_V_CXX)$(CXXCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/kos_fibre.cpp  -I$(LIBFIBRE_DIR) -lfibre
     180        $(BENCH_V_CXX)$(CXXCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/kos_fibre.cpp  -I$(LIBFIBRE_DIR) -lfibre
    142181
    143182ctxswitch-kos_fibre2$(EXEEXT):
    144         $(AM_V_CXX)$(CXXCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/kos_fibre2.cpp -I$(LIBFIBRE_DIR) -lfibre
     183        $(BENCH_V_CXX)$(CXXCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/kos_fibre2.cpp -I$(LIBFIBRE_DIR) -lfibre
    145184endif
    146185
     
    148187
    149188ctxswitch-pthread$(EXEEXT):
    150         $(AM_V_CC)$(COMPILE)    -DBENCH_N=50000000 $(srcdir)/ctxswitch/pthreads.c
     189        $(BENCH_V_CC)$(COMPILE)    -DBENCH_N=50000000 $(srcdir)/ctxswitch/pthreads.c
    151190
    152191ctxswitch-cfa_coroutine$(EXEEXT):
    153         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_cor.cfa
     192        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_cor.cfa
    154193
    155194ctxswitch-cfa_thread$(EXEEXT):
    156         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_thrd.cfa
     195        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_thrd.cfa
    157196
    158197ctxswitch-cfa_thread2$(EXEEXT):
    159         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_thrd2.cfa
     198        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_thrd2.cfa
    160199
    161200ctxswitch-upp_coroutine$(EXEEXT):
    162         $(AM_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/upp_cor.cc
     201        $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/upp_cor.cc
    163202
    164203ctxswitch-upp_thread$(EXEEXT):
    165         $(AM_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/upp_thrd.cc
     204        $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/upp_thrd.cc
    166205
    167206ctxswitch-goroutine$(EXEEXT):
    168         $(AM_V_GOC)go build -o a.out $(srcdir)/ctxswitch/goroutine.go
     207        $(BENCH_V_GOC)go build -o a.out $(srcdir)/ctxswitch/goroutine.go
    169208
    170209ctxswitch-java_thread$(EXEEXT):
    171         $(AM_V_JAVAC)javac -d $(builddir) $(srcdir)/ctxswitch/JavaThread.java
     210        $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/ctxswitch/JavaThread.java
    172211        @echo "#!/bin/sh" > a.out
    173212        @echo "java JavaThread" >> a.out
     
    187226
    188227mutex-pthread_lock$(EXEEXT):
    189         $(AM_V_CC)$(COMPILE)    -DBENCH_N=50000000 $(srcdir)/mutex/pthreads.c
     228        $(BENCH_V_CC)$(COMPILE)    -DBENCH_N=50000000 $(srcdir)/mutex/pthreads.c
    190229
    191230mutex-upp$(EXEEXT):
    192         $(AM_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/mutex/upp.cc
     231        $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/mutex/upp.cc
    193232
    194233mutex-cfa1$(EXEEXT):
    195         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=5000000  $(srcdir)/mutex/cfa1.cfa
     234        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=5000000  $(srcdir)/mutex/cfa1.cfa
    196235
    197236mutex-cfa2$(EXEEXT):
    198         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=5000000  $(srcdir)/mutex/cfa2.cfa
     237        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=5000000  $(srcdir)/mutex/cfa2.cfa
    199238
    200239mutex-cfa4$(EXEEXT):
    201         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=5000000  $(srcdir)/mutex/cfa4.cfa
     240        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=5000000  $(srcdir)/mutex/cfa4.cfa
    202241
    203242mutex-java_thread$(EXEEXT):
    204         $(AM_V_JAVAC)javac -d $(builddir) $(srcdir)/mutex/JavaThread.java
     243        $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/mutex/JavaThread.java
    205244        @echo "#!/bin/sh" > a.out
    206245        @echo "java JavaThread" >> a.out
     
    217256
    218257signal-pthread_cond$(EXEEXT):
    219         $(AM_V_CC)$(COMPILE)    -DBENCH_N=500000  $(srcdir)/schedint/pthreads.c
     258        $(BENCH_V_CC)$(COMPILE)    -DBENCH_N=500000  $(srcdir)/schedint/pthreads.c
    220259
    221260signal-upp$(EXEEXT):
    222         $(AM_V_UPP)$(UPPCOMPILE) -DBENCH_N=5000000 $(srcdir)/schedint/upp.cc
     261        $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=5000000 $(srcdir)/schedint/upp.cc
    223262
    224263signal-cfa1$(EXEEXT):
    225         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedint/cfa1.cfa
     264        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedint/cfa1.cfa
    226265
    227266signal-cfa2$(EXEEXT):
    228         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedint/cfa2.cfa
     267        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedint/cfa2.cfa
    229268
    230269signal-cfa4$(EXEEXT):
    231         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedint/cfa4.cfa
     270        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedint/cfa4.cfa
    232271
    233272signal-java_thread$(EXEEXT):
    234         $(AM_V_JAVAC)javac -d $(builddir) $(srcdir)/schedint/JavaThread.java
     273        $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/schedint/JavaThread.java
    235274        @echo "#!/bin/sh" > a.out
    236275        @echo "java JavaThread" >> a.out
     
    246285
    247286waitfor-upp$(EXEEXT):
    248         $(AM_V_UPP)$(UPPCOMPILE) -DBENCH_N=5000000 $(srcdir)/schedext/upp.cc
     287        $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=5000000 $(srcdir)/schedext/upp.cc
    249288
    250289waitfor-cfa1$(EXEEXT):
    251         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedext/cfa1.cfa
     290        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedext/cfa1.cfa
    252291
    253292waitfor-cfa2$(EXEEXT):
    254         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedext/cfa2.cfa
     293        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedext/cfa2.cfa
    255294
    256295waitfor-cfa4$(EXEEXT):
    257         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedext/cfa4.cfa
     296        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedext/cfa4.cfa
    258297
    259298## =========================================================================================================
     
    269308
    270309creation-cfa_coroutine$(EXEEXT):
    271         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=10000000 $(srcdir)/creation/cfa_cor.cfa
     310        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=10000000 $(srcdir)/creation/cfa_cor.cfa
    272311
    273312creation-cfa_coroutine_eager$(EXEEXT):
    274         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=10000000 $(srcdir)/creation/cfa_cor.cfa  -DEAGER
     313        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=10000000 $(srcdir)/creation/cfa_cor.cfa  -DEAGER
    275314
    276315creation-cfa_thread$(EXEEXT):
    277         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=10000000 $(srcdir)/creation/cfa_thrd.cfa
     316        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=10000000 $(srcdir)/creation/cfa_thrd.cfa
    278317
    279318creation-upp_coroutine$(EXEEXT):
    280         $(AM_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/creation/upp_cor.cc
     319        $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/creation/upp_cor.cc
    281320
    282321creation-upp_thread$(EXEEXT):
    283         $(AM_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/creation/upp_thrd.cc
     322        $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/creation/upp_thrd.cc
    284323
    285324creation-pthread$(EXEEXT):
    286         $(AM_V_CC)$(COMPILE)    -DBENCH_N=250000   $(srcdir)/creation/pthreads.c
     325        $(BENCH_V_CC)$(COMPILE)    -DBENCH_N=250000   $(srcdir)/creation/pthreads.c
    287326
    288327creation-goroutine$(EXEEXT):
    289         $(AM_V_GOC)go build -o a.out $(srcdir)/creation/goroutine.go
     328        $(BENCH_V_GOC)go build -o a.out $(srcdir)/creation/goroutine.go
    290329
    291330creation-java_thread$(EXEEXT):
    292         $(AM_V_JAVAC)javac -d $(builddir) $(srcdir)/creation/JavaThread.java
     331        $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/creation/JavaThread.java
    293332        @echo "#!/bin/sh" > a.out
    294333        @echo "java JavaThread" >> a.out
     
    311350
    312351compile-array$(EXEEXT):
    313         $(AM_V_CFA)$(CFACOMPILE) -fsyntax-only -w $(testdir)/array.cfa
     352        @$(CFACOMPILE) -fsyntax-only -w $(testdir)/array.cfa
    314353
    315354compile-attributes$(EXEEXT):
    316         $(AM_V_CFA)$(CFACOMPILE) -fsyntax-only -w $(testdir)/attributes.cfa
     355        @$(CFACOMPILE) -fsyntax-only -w $(testdir)/attributes.cfa
    317356
    318357compile-empty$(EXEEXT):
    319         $(AM_V_CFA)$(CFACOMPILE) -fsyntax-only -w $(srcdir)/compile/empty.cfa
     358        @$(CFACOMPILE) -fsyntax-only -w $(srcdir)/compile/empty.cfa
    320359
    321360compile-expression$(EXEEXT):
    322         $(AM_V_CFA)$(CFACOMPILE) -fsyntax-only -w $(testdir)/expression.cfa
     361        @$(CFACOMPILE) -fsyntax-only -w $(testdir)/expression.cfa
    323362
    324363compile-io$(EXEEXT):
    325         $(AM_V_CFA)$(CFACOMPILE) -fsyntax-only -w $(testdir)/io1.cfa
     364        @$(CFACOMPILE) -fsyntax-only -w $(testdir)/io1.cfa
    326365
    327366compile-monitor$(EXEEXT):
    328         $(AM_V_CFA)$(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/monitor.cfa
     367        @$(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/monitor.cfa
    329368
    330369compile-operators$(EXEEXT):
    331         $(AM_V_CFA)$(CFACOMPILE) -fsyntax-only -w $(testdir)/operators.cfa
     370        @$(CFACOMPILE) -fsyntax-only -w $(testdir)/operators.cfa
    332371
    333372compile-thread$(EXEEXT):
    334         $(AM_V_CFA)$(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/thread.cfa
     373        @$(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/thread.cfa
    335374
    336375compile-typeof$(EXEEXT):
    337         $(AM_V_CFA)$(CFACOMPILE) -fsyntax-only -w $(testdir)/typeof.cfa
    338 
     376        @$(CFACOMPILE) -fsyntax-only -w $(testdir)/typeof.cfa
     377
  • benchmark/Makefile.in

    r6a9d4b4 r933f32f  
    200200DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
    201201ACLOCAL = @ACLOCAL@
    202 ALLOCA = @ALLOCA@
    203202AMTAR = @AMTAR@
    204203AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
     
    372371
    373372# applies to both programs
    374 AM_CFLAGS = -O2 -Wall -I$(srcdir) -lrt -pthread
    375 AM_CFAFLAGS = -quiet -in-tree -nodebug
    376 AM_UPPFLAGS = -quiet -nodebug -multi
     373AM_CFLAGS = -O2 -Wall -Wextra -Werror -I$(srcdir) -lrt -pthread
     374AM_CFAFLAGS = -quiet -nodebug -in-tree
     375AM_UPPFLAGS = -quiet -nodebug -multi -std=c++14
     376BENCH_V_CC = $(__bench_v_CC_$(__quiet))
     377BENCH_V_CFA = $(__bench_v_CFA_$(__quiet))
     378BENCH_V_CXX = $(__bench_v_CXX_$(__quiet))
     379BENCH_V_GOC = $(__bench_v_GOC_$(__quiet))
     380BENCH_V_JAVAC = $(__bench_v_JAVAC_$(__quiet))
     381BENCH_V_UPP = $(__bench_v_UPP_$(__quiet))
     382__quiet = verbose
     383__bench_v_CC_quiet = @
     384__bench_v_CFA_quiet = @
     385__bench_v_CXX_quiet = @
     386__bench_v_GOC_quiet = @
     387__bench_v_JAVAC_quiet = @
     388__bench_v_UPP_quiet = @
     389__bench_v_CC_verbose = $(AM_V_CC)
     390__bench_v_CFA_verbose = $(AM_V_CFA)
     391__bench_v_CXX_verbose = $(AM_V_CXX)
     392__bench_v_GOC_verbose = $(AM_V_GOC)
     393__bench_v_JAVAC_verbose = $(AM_V_JAVAC)
     394__bench_v_UPP_verbose = $(AM_V_UPP)
    377395TOOLSDIR = ${abs_top_builddir}/tools/
    378396REPEAT = ${abs_top_builddir}/tools/repeat
     
    383401PRINT_FORMAT = %20s: #Comments needed for spacing
    384402dummy_SOURCES = dummyC.c dummyCXX.cpp
     403FIX_NEW_LINES = cat $@ | tr "\n" "\t" | sed -r 's/\t,/,/' | tr "\t" "\n" > $@
    385404CTXSWITCH_DEPEND = loop.run function.run fetch_add.run \
    386         ctxswitch-pthread.run ctxswitch-cfa_coroutine.run \
    387         ctxswitch-cfa_thread.run ctxswitch-cfa_thread2.run \
    388         ctxswitch-upp_coroutine.run ctxswitch-upp_thread.run \
    389         ctxswitch-goroutine.run ctxswitch-java_thread.run \
    390         $(am__append_1)
     405        tls-fetch_add.run ctxswitch-pthread.run \
     406        ctxswitch-cfa_coroutine.run ctxswitch-cfa_thread.run \
     407        ctxswitch-cfa_thread2.run ctxswitch-upp_coroutine.run \
     408        ctxswitch-upp_thread.run ctxswitch-goroutine.run \
     409        ctxswitch-java_thread.run $(am__append_1)
    391410testdir = $(top_srcdir)/tests
    392411all: all-am
     
    713732
    714733.NOTPARALLEL:
     734.PHONY: compile.csv ctxswitch.csv mutex.csv signal.csv
    715735
    716736all : ctxswitch$(EXEEXT) mutex$(EXEEXT) signal$(EXEEXT) waitfor$(EXEEXT) creation$(EXEEXT)
     
    726746
    727747%.runquiet :
    728         @+make $(basename $@) CFLAGS="-w"
     748        @+make $(basename $@) CFLAGS="-w" __quiet=quiet
    729749        @taskset -c 1 ./a.out
    730750        @rm -f a.out
     
    738758
    739759jenkins$(EXEEXT):
    740         @echo "{"
    741         @echo -e '\t"githash": "'${githash}'",'
    742         @echo -e '\t"arch": "'   ${arch}   '",'
    743760@DOifskipcompile@
    744         @echo -e '\t"compile": {'
    745         @+make compile TIME_FORMAT='%e,' PRINT_FORMAT='\t\t\"%s\" :'
    746         @echo -e '\t\t"dummy" : {}'
    747         @echo -e '\t},'
     761        @+make compile.csv
    748762@DOendif@
    749         @echo -e '\t"ctxswitch": {'
    750         @echo -en '\t\t"coroutine":'
    751         @+make ctxswitch-cfa_coroutine.runquiet
    752         @echo -en '\t\t,"thread":'
    753         @+make ctxswitch-cfa_thread.runquiet
    754         @echo -e '\t},'
    755         @echo -e '\t"mutex": ['
    756         @echo -en '\t\t'
    757         @+make mutex-cfa1.runquiet
    758         @echo -en '\t\t,'
    759         @+make mutex-cfa2.runquiet
    760         @echo -e '\t],'
    761         @echo -e '\t"scheduling": ['
    762         @echo -en '\t\t'
    763         @+make signal-cfa1.runquiet
    764         @echo -en '\t\t,'
    765         @+make signal-cfa2.runquiet
    766         @echo -en '\t\t,'
    767         @+make waitfor-cfa1.runquiet
    768         @echo -en '\t\t,'
    769         @+make waitfor-cfa2.runquiet
    770         @echo -e '\n\t],'
    771         @echo -e '\t"epoch": ' $(shell date +%s)
    772         @echo "}"
     763        @+make ctxswitch.csv
     764        @+make mutex.csv
     765        @+make signal.csv
     766@DOifskipcompile@
     767        @cat compile.csv
     768@DOendif@
     769        @cat ctxswitch.csv
     770        @cat mutex.csv
     771        @cat signal.csv
     772
     773compile.csv:
     774        @echo "array,attributes,empty,expression,io,monitor,operators,typeof" > $@
     775        @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-array.make >> $@
     776        @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-attributes.make >> $@
     777        @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-empty.make >> $@
     778        @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-expression.make >> $@
     779        @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-io.make >> $@
     780        @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-monitor.make >> $@
     781        @+make TIME_FORMAT='%e,' PRINT_FORMAT='' compile-operators.make >> $@
     782        @+make TIME_FORMAT='%e' PRINT_FORMAT='' compile-typeof.make >> $@
     783        @$(srcdir)/fixcsv.sh $@
     784
     785ctxswitch.csv:
     786        @echo "coroutine,thread" > $@
     787        @+make ctxswitch-cfa_coroutine.runquiet >> $@ && echo -n ',' >> $@
     788        @+make ctxswitch-cfa_thread.runquiet >> $@
     789        @$(srcdir)/fixcsv.sh $@
     790
     791mutex.csv:
     792        @echo "1-monitor,2-monitor" > $@
     793        @+make mutex-cfa1.runquiet >> $@ && echo -n ',' >> $@
     794        @+make mutex-cfa2.runquiet >> $@
     795        @$(srcdir)/fixcsv.sh $@
     796
     797signal.csv:
     798        @echo "signal-1,signal-2,waitfor-1,waitfor-2" > $@
     799        @+make signal-cfa1.runquiet >> $@ && echo -n ',' >> $@
     800        @+make signal-cfa2.runquiet >> $@ && echo -n ',' >> $@
     801        @+make waitfor-cfa1.runquiet >> $@ && echo -n ',' >> $@
     802        @+make waitfor-cfa2.runquiet >> $@
     803        @$(srcdir)/fixcsv.sh $@
    773804
    774805loop$(EXEEXT):
    775         $(AM_V_CC)$(COMPILE) -DBENCH_N=5000000000 $(srcdir)/loop.c
     806        $(BENCH_V_CC)$(COMPILE) -DBENCH_N=5000000000 $(srcdir)/loop.c
    776807
    777808function$(EXEEXT):
    778         $(AM_V_CC)$(COMPILE) -DBENCH_N=5000000000 $(srcdir)/function.c
     809        $(BENCH_V_CC)$(COMPILE) -DBENCH_N=5000000000 $(srcdir)/function.c
    779810
    780811fetch_add$(EXEEXT):
    781         $(AM_V_CC)$(COMPILE) -DBENCH_N=500000000  $(srcdir)/fetch_add.c
     812        $(BENCH_V_CC)$(COMPILE) -DBENCH_N=500000000  $(srcdir)/fetch_add.c
     813
     814tls-fetch_add$(EXEEXT):
     815        $(BENCH_V_CC)$(COMPILE) -DBENCH_N=500000000  $(srcdir)/tls-fetch_add.c
    782816
    783817@WITH_LIBFIBRE_TRUE@ctxswitch-kos_fibre$(EXEEXT):
    784 @WITH_LIBFIBRE_TRUE@    $(AM_V_CXX)$(CXXCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/kos_fibre.cpp  -I$(LIBFIBRE_DIR) -lfibre
     818@WITH_LIBFIBRE_TRUE@    $(BENCH_V_CXX)$(CXXCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/kos_fibre.cpp  -I$(LIBFIBRE_DIR) -lfibre
    785819
    786820@WITH_LIBFIBRE_TRUE@ctxswitch-kos_fibre2$(EXEEXT):
    787 @WITH_LIBFIBRE_TRUE@    $(AM_V_CXX)$(CXXCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/kos_fibre2.cpp -I$(LIBFIBRE_DIR) -lfibre
     821@WITH_LIBFIBRE_TRUE@    $(BENCH_V_CXX)$(CXXCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/kos_fibre2.cpp -I$(LIBFIBRE_DIR) -lfibre
    788822
    789823ctxswitch$(EXEEXT): $(CTXSWITCH_DEPEND)
    790824
    791825ctxswitch-pthread$(EXEEXT):
    792         $(AM_V_CC)$(COMPILE)    -DBENCH_N=50000000 $(srcdir)/ctxswitch/pthreads.c
     826        $(BENCH_V_CC)$(COMPILE)    -DBENCH_N=50000000 $(srcdir)/ctxswitch/pthreads.c
    793827
    794828ctxswitch-cfa_coroutine$(EXEEXT):
    795         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_cor.cfa
     829        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_cor.cfa
    796830
    797831ctxswitch-cfa_thread$(EXEEXT):
    798         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_thrd.cfa
     832        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_thrd.cfa
    799833
    800834ctxswitch-cfa_thread2$(EXEEXT):
    801         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_thrd2.cfa
     835        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/cfa_thrd2.cfa
    802836
    803837ctxswitch-upp_coroutine$(EXEEXT):
    804         $(AM_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/upp_cor.cc
     838        $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/upp_cor.cc
    805839
    806840ctxswitch-upp_thread$(EXEEXT):
    807         $(AM_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/upp_thrd.cc
     841        $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/ctxswitch/upp_thrd.cc
    808842
    809843ctxswitch-goroutine$(EXEEXT):
    810         $(AM_V_GOC)go build -o a.out $(srcdir)/ctxswitch/goroutine.go
     844        $(BENCH_V_GOC)go build -o a.out $(srcdir)/ctxswitch/goroutine.go
    811845
    812846ctxswitch-java_thread$(EXEEXT):
    813         $(AM_V_JAVAC)javac -d $(builddir) $(srcdir)/ctxswitch/JavaThread.java
     847        $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/ctxswitch/JavaThread.java
    814848        @echo "#!/bin/sh" > a.out
    815849        @echo "java JavaThread" >> a.out
     
    828862
    829863mutex-pthread_lock$(EXEEXT):
    830         $(AM_V_CC)$(COMPILE)    -DBENCH_N=50000000 $(srcdir)/mutex/pthreads.c
     864        $(BENCH_V_CC)$(COMPILE)    -DBENCH_N=50000000 $(srcdir)/mutex/pthreads.c
    831865
    832866mutex-upp$(EXEEXT):
    833         $(AM_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/mutex/upp.cc
     867        $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/mutex/upp.cc
    834868
    835869mutex-cfa1$(EXEEXT):
    836         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=5000000  $(srcdir)/mutex/cfa1.cfa
     870        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=5000000  $(srcdir)/mutex/cfa1.cfa
    837871
    838872mutex-cfa2$(EXEEXT):
    839         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=5000000  $(srcdir)/mutex/cfa2.cfa
     873        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=5000000  $(srcdir)/mutex/cfa2.cfa
    840874
    841875mutex-cfa4$(EXEEXT):
    842         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=5000000  $(srcdir)/mutex/cfa4.cfa
     876        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=5000000  $(srcdir)/mutex/cfa4.cfa
    843877
    844878mutex-java_thread$(EXEEXT):
    845         $(AM_V_JAVAC)javac -d $(builddir) $(srcdir)/mutex/JavaThread.java
     879        $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/mutex/JavaThread.java
    846880        @echo "#!/bin/sh" > a.out
    847881        @echo "java JavaThread" >> a.out
     
    857891
    858892signal-pthread_cond$(EXEEXT):
    859         $(AM_V_CC)$(COMPILE)    -DBENCH_N=500000  $(srcdir)/schedint/pthreads.c
     893        $(BENCH_V_CC)$(COMPILE)    -DBENCH_N=500000  $(srcdir)/schedint/pthreads.c
    860894
    861895signal-upp$(EXEEXT):
    862         $(AM_V_UPP)$(UPPCOMPILE) -DBENCH_N=5000000 $(srcdir)/schedint/upp.cc
     896        $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=5000000 $(srcdir)/schedint/upp.cc
    863897
    864898signal-cfa1$(EXEEXT):
    865         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedint/cfa1.cfa
     899        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedint/cfa1.cfa
    866900
    867901signal-cfa2$(EXEEXT):
    868         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedint/cfa2.cfa
     902        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedint/cfa2.cfa
    869903
    870904signal-cfa4$(EXEEXT):
    871         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedint/cfa4.cfa
     905        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedint/cfa4.cfa
    872906
    873907signal-java_thread$(EXEEXT):
    874         $(AM_V_JAVAC)javac -d $(builddir) $(srcdir)/schedint/JavaThread.java
     908        $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/schedint/JavaThread.java
    875909        @echo "#!/bin/sh" > a.out
    876910        @echo "java JavaThread" >> a.out
     
    884918
    885919waitfor-upp$(EXEEXT):
    886         $(AM_V_UPP)$(UPPCOMPILE) -DBENCH_N=5000000 $(srcdir)/schedext/upp.cc
     920        $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=5000000 $(srcdir)/schedext/upp.cc
    887921
    888922waitfor-cfa1$(EXEEXT):
    889         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedext/cfa1.cfa
     923        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedext/cfa1.cfa
    890924
    891925waitfor-cfa2$(EXEEXT):
    892         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedext/cfa2.cfa
     926        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedext/cfa2.cfa
    893927
    894928waitfor-cfa4$(EXEEXT):
    895         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedext/cfa4.cfa
     929        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=500000  $(srcdir)/schedext/cfa4.cfa
    896930
    897931creation$(EXEEXT) :\
     
    906940
    907941creation-cfa_coroutine$(EXEEXT):
    908         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=10000000 $(srcdir)/creation/cfa_cor.cfa
     942        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=10000000 $(srcdir)/creation/cfa_cor.cfa
    909943
    910944creation-cfa_coroutine_eager$(EXEEXT):
    911         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=10000000 $(srcdir)/creation/cfa_cor.cfa  -DEAGER
     945        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=10000000 $(srcdir)/creation/cfa_cor.cfa  -DEAGER
    912946
    913947creation-cfa_thread$(EXEEXT):
    914         $(AM_V_CFA)$(CFACOMPILE) -DBENCH_N=10000000 $(srcdir)/creation/cfa_thrd.cfa
     948        $(BENCH_V_CFA)$(CFACOMPILE) -DBENCH_N=10000000 $(srcdir)/creation/cfa_thrd.cfa
    915949
    916950creation-upp_coroutine$(EXEEXT):
    917         $(AM_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/creation/upp_cor.cc
     951        $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/creation/upp_cor.cc
    918952
    919953creation-upp_thread$(EXEEXT):
    920         $(AM_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/creation/upp_thrd.cc
     954        $(BENCH_V_UPP)$(UPPCOMPILE) -DBENCH_N=50000000 $(srcdir)/creation/upp_thrd.cc
    921955
    922956creation-pthread$(EXEEXT):
    923         $(AM_V_CC)$(COMPILE)    -DBENCH_N=250000   $(srcdir)/creation/pthreads.c
     957        $(BENCH_V_CC)$(COMPILE)    -DBENCH_N=250000   $(srcdir)/creation/pthreads.c
    924958
    925959creation-goroutine$(EXEEXT):
    926         $(AM_V_GOC)go build -o a.out $(srcdir)/creation/goroutine.go
     960        $(BENCH_V_GOC)go build -o a.out $(srcdir)/creation/goroutine.go
    927961
    928962creation-java_thread$(EXEEXT):
    929         $(AM_V_JAVAC)javac -d $(builddir) $(srcdir)/creation/JavaThread.java
     963        $(BENCH_V_JAVAC)javac -d $(builddir) $(srcdir)/creation/JavaThread.java
    930964        @echo "#!/bin/sh" > a.out
    931965        @echo "java JavaThread" >> a.out
     
    943977
    944978compile-array$(EXEEXT):
    945         $(AM_V_CFA)$(CFACOMPILE) -fsyntax-only -w $(testdir)/array.cfa
     979        @$(CFACOMPILE) -fsyntax-only -w $(testdir)/array.cfa
    946980
    947981compile-attributes$(EXEEXT):
    948         $(AM_V_CFA)$(CFACOMPILE) -fsyntax-only -w $(testdir)/attributes.cfa
     982        @$(CFACOMPILE) -fsyntax-only -w $(testdir)/attributes.cfa
    949983
    950984compile-empty$(EXEEXT):
    951         $(AM_V_CFA)$(CFACOMPILE) -fsyntax-only -w $(srcdir)/compile/empty.cfa
     985        @$(CFACOMPILE) -fsyntax-only -w $(srcdir)/compile/empty.cfa
    952986
    953987compile-expression$(EXEEXT):
    954         $(AM_V_CFA)$(CFACOMPILE) -fsyntax-only -w $(testdir)/expression.cfa
     988        @$(CFACOMPILE) -fsyntax-only -w $(testdir)/expression.cfa
    955989
    956990compile-io$(EXEEXT):
    957         $(AM_V_CFA)$(CFACOMPILE) -fsyntax-only -w $(testdir)/io1.cfa
     991        @$(CFACOMPILE) -fsyntax-only -w $(testdir)/io1.cfa
    958992
    959993compile-monitor$(EXEEXT):
    960         $(AM_V_CFA)$(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/monitor.cfa
     994        @$(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/monitor.cfa
    961995
    962996compile-operators$(EXEEXT):
    963         $(AM_V_CFA)$(CFACOMPILE) -fsyntax-only -w $(testdir)/operators.cfa
     997        @$(CFACOMPILE) -fsyntax-only -w $(testdir)/operators.cfa
    964998
    965999compile-thread$(EXEEXT):
    966         $(AM_V_CFA)$(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/thread.cfa
     1000        @$(CFACOMPILE) -fsyntax-only -w $(testdir)/concurrent/thread.cfa
    9671001
    9681002compile-typeof$(EXEEXT):
    969         $(AM_V_CFA)$(CFACOMPILE) -fsyntax-only -w $(testdir)/typeof.cfa
     1003        @$(CFACOMPILE) -fsyntax-only -w $(testdir)/typeof.cfa
    9701004
    9711005# Tell versions [3.59,3.63) of GNU make to not export all variables.
  • benchmark/ctxswitch/cfa_cor.cfa

    r6a9d4b4 r933f32f  
    1111}
    1212
    13 void main( GreatSuspender & this ) {
     13void main( __attribute__((unused)) GreatSuspender & this ) {
    1414        while( true ) {
    1515                suspend();
  • benchmark/ctxswitch/cfa_thrd2.cfa

    r6a9d4b4 r933f32f  
    88thread Fibre {};
    99
    10 void main(Fibre & this) {
     10void main(__attribute__((unused)) Fibre & this) {
    1111        while(!done) {
    1212                yield();
  • configure

    r6a9d4b4 r933f32f  
    637637LIBOBJS
    638638CFA_BACKEND_CC
    639 ALLOCA
     639WITH_LIBTCMALLOC_FALSE
     640WITH_LIBTCMALLOC_TRUE
     641WITH_LIBPROFILER_FALSE
     642WITH_LIBPROFILER_TRUE
    640643WITH_LIBFIBRE_FALSE
    641644WITH_LIBFIBRE_TRUE
     
    19611964} # ac_fn_cxx_try_link
    19621965
    1963 # ac_fn_c_check_type LINENO TYPE VAR INCLUDES
    1964 # -------------------------------------------
    1965 # Tests whether TYPE exists after having included INCLUDES, setting cache
    1966 # variable VAR accordingly.
    1967 ac_fn_c_check_type ()
    1968 {
    1969   as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
    1970   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
    1971 $as_echo_n "checking for $2... " >&6; }
    1972 if eval \${$3+:} false; then :
    1973   $as_echo_n "(cached) " >&6
    1974 else
    1975   eval "$3=no"
    1976   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
    1977 /* end confdefs.h.  */
    1978 $4
    1979 int
    1980 main ()
    1981 {
    1982 if (sizeof ($2))
    1983          return 0;
    1984   ;
    1985   return 0;
    1986 }
    1987 _ACEOF
    1988 if ac_fn_c_try_compile "$LINENO"; then :
    1989   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
    1990 /* end confdefs.h.  */
    1991 $4
    1992 int
    1993 main ()
    1994 {
    1995 if (sizeof (($2)))
    1996             return 0;
    1997   ;
    1998   return 0;
    1999 }
    2000 _ACEOF
    2001 if ac_fn_c_try_compile "$LINENO"; then :
    2002 
    2003 else
    2004   eval "$3=yes"
    2005 fi
    2006 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
    2007 fi
    2008 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
    2009 fi
    2010 eval ac_res=\$$3
    2011                { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
    2012 $as_echo "$ac_res" >&6; }
    2013   eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
    2014 
    2015 } # ac_fn_c_check_type
    2016 
    20171966# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
    20181967# -------------------------------------------------------
     
    21062055} # ac_fn_c_check_header_mongrel
    21072056
    2108 # ac_fn_c_find_intX_t LINENO BITS VAR
    2109 # -----------------------------------
    2110 # Finds a signed integer type with width BITS, setting cache variable VAR
    2111 # accordingly.
    2112 ac_fn_c_find_intX_t ()
     2057# ac_fn_c_check_type LINENO TYPE VAR INCLUDES
     2058# -------------------------------------------
     2059# Tests whether TYPE exists after having included INCLUDES, setting cache
     2060# variable VAR accordingly.
     2061ac_fn_c_check_type ()
    21132062{
    21142063  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
    2115   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for int$2_t" >&5
    2116 $as_echo_n "checking for int$2_t... " >&6; }
     2064  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
     2065$as_echo_n "checking for $2... " >&6; }
    21172066if eval \${$3+:} false; then :
    21182067  $as_echo_n "(cached) " >&6
    21192068else
    21202069  eval "$3=no"
    2121      # Order is important - never check a type that is potentially smaller
    2122      # than half of the expected target width.
    2123      for ac_type in int$2_t 'int' 'long int' \
    2124          'long long int' 'short int' 'signed char'; do
    2125        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
     2070  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
    21262071/* end confdefs.h.  */
    2127 $ac_includes_default
    2128              enum { N = $2 / 2 - 1 };
     2072$4
    21292073int
    21302074main ()
    21312075{
    2132 static int test_array [1 - 2 * !(0 < ($ac_type) ((((($ac_type) 1 << N) << N) - 1) * 2 + 1))];
    2133 test_array [0] = 0;
    2134 return test_array [0];
    2135 
     2076if (sizeof ($2))
     2077         return 0;
    21362078  ;
    21372079  return 0;
     
    21412083  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
    21422084/* end confdefs.h.  */
    2143 $ac_includes_default
    2144                 enum { N = $2 / 2 - 1 };
     2085$4
    21452086int
    21462087main ()
    21472088{
    2148 static int test_array [1 - 2 * !(($ac_type) ((((($ac_type) 1 << N) << N) - 1) * 2 + 1)
    2149                  < ($ac_type) ((((($ac_type) 1 << N) << N) - 1) * 2 + 2))];
    2150 test_array [0] = 0;
    2151 return test_array [0];
    2152 
     2089if (sizeof (($2)))
     2090            return 0;
    21532091  ;
    21542092  return 0;
     
    21582096
    21592097else
    2160   case $ac_type in #(
    2161   int$2_t) :
    2162     eval "$3=yes" ;; #(
    2163   *) :
    2164     eval "$3=\$ac_type" ;;
    2165 esac
     2098  eval "$3=yes"
    21662099fi
    21672100rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
    21682101fi
    21692102rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
    2170        if eval test \"x\$"$3"\" = x"no"; then :
    2171 
    2172 else
    2173   break
    2174 fi
    2175      done
    21762103fi
    21772104eval ac_res=\$$3
     
    21802107  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
    21812108
    2182 } # ac_fn_c_find_intX_t
    2183 
    2184 # ac_fn_c_find_uintX_t LINENO BITS VAR
    2185 # ------------------------------------
    2186 # Finds an unsigned integer type with width BITS, setting cache variable VAR
    2187 # accordingly.
    2188 ac_fn_c_find_uintX_t ()
    2189 {
    2190   as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
    2191   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for uint$2_t" >&5
    2192 $as_echo_n "checking for uint$2_t... " >&6; }
    2193 if eval \${$3+:} false; then :
    2194   $as_echo_n "(cached) " >&6
    2195 else
    2196   eval "$3=no"
    2197      # Order is important - never check a type that is potentially smaller
    2198      # than half of the expected target width.
    2199      for ac_type in uint$2_t 'unsigned int' 'unsigned long int' \
    2200          'unsigned long long int' 'unsigned short int' 'unsigned char'; do
    2201        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
    2202 /* end confdefs.h.  */
    2203 $ac_includes_default
    2204 int
    2205 main ()
    2206 {
    2207 static int test_array [1 - 2 * !((($ac_type) -1 >> ($2 / 2 - 1)) >> ($2 / 2 - 1) == 3)];
    2208 test_array [0] = 0;
    2209 return test_array [0];
    2210 
    2211   ;
    2212   return 0;
    2213 }
    2214 _ACEOF
    2215 if ac_fn_c_try_compile "$LINENO"; then :
    2216   case $ac_type in #(
    2217   uint$2_t) :
    2218     eval "$3=yes" ;; #(
    2219   *) :
    2220     eval "$3=\$ac_type" ;;
    2221 esac
    2222 fi
    2223 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
    2224        if eval test \"x\$"$3"\" = x"no"; then :
    2225 
    2226 else
    2227   break
    2228 fi
    2229      done
    2230 fi
    2231 eval ac_res=\$$3
    2232                { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
    2233 $as_echo "$ac_res" >&6; }
    2234   eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
    2235 
    2236 } # ac_fn_c_find_uintX_t
     2109} # ac_fn_c_check_type
    22372110cat >config.log <<_ACEOF
    22382111This file contains any messages produced by compilers while
     
    26672540
    26682541
     2542# http://git.savannah.gnu.org/gitweb/?p=autoconf-archive.git;a=blob_plain;f=m4/ax_check_compile_flag.m4
     2543
    26692544
    26702545# don't use the default CFLAGS as they unconditonnaly add -O2
     
    35273402                "debug") ;;
    35283403                "nolib") ;;
     3404                "profile") ;;
    35293405                *)
    35303406                        >&2 echo "Configuration must be 'debug', 'nodebug' or 'nolib'"
     
    51835059
    51845060
    5185         # deprecated
    51865061# These are often not installed and people miss seeing the "no", so stop the configure.
    51875062for ac_prog in 'bison -y' byacc
     
    1673416609
    1673516610
    16736 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5
    16737 $as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; }
    16738 set x ${MAKE-make}
    16739 ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'`
    16740 if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then :
    16741   $as_echo_n "(cached) " >&6
    16742 else
    16743   cat >conftest.make <<\_ACEOF
    16744 SHELL = /bin/sh
    16745 all:
    16746         @echo '@@@%%%=$(MAKE)=@@@%%%'
    16747 _ACEOF
    16748 # GNU make sometimes prints "make[1]: Entering ...", which would confuse us.
    16749 case `${MAKE-make} -f conftest.make 2>/dev/null` in
    16750   *@@@%%%=?*=@@@%%%*)
    16751     eval ac_cv_prog_make_${ac_make}_set=yes;;
    16752   *)
    16753     eval ac_cv_prog_make_${ac_make}_set=no;;
    16754 esac
    16755 rm -f conftest.make
    16756 fi
    16757 if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then
    16758   { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
    16759 $as_echo "yes" >&6; }
    16760   SET_MAKE=
    16761 else
    16762   { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
    16763 $as_echo "no" >&6; }
    16764   SET_MAKE="MAKE=${MAKE-make}"
    16765 fi
    16766 
    1676716611
    1676816612# Checks for libraries.
     
    1681816662
    1681916663
    16820 # Checks for header files.
    16821 ac_fn_c_check_type "$LINENO" "size_t" "ac_cv_type_size_t" "$ac_includes_default"
    16822 if test "x$ac_cv_type_size_t" = xyes; then :
    16823 
    16824 else
    16825 
    16826 cat >>confdefs.h <<_ACEOF
    16827 #define size_t unsigned int
    16828 _ACEOF
    16829 
    16830 fi
    16831 
    16832 # The Ultrix 4.2 mips builtin alloca declared by alloca.h only works
    16833 # for constant arguments.  Useless!
    16834 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for working alloca.h" >&5
    16835 $as_echo_n "checking for working alloca.h... " >&6; }
    16836 if ${ac_cv_working_alloca_h+:} false; then :
     16664{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ProfilingIsEnabledForAllThreads in -lprofiler" >&5
     16665$as_echo_n "checking for ProfilingIsEnabledForAllThreads in -lprofiler... " >&6; }
     16666if ${ac_cv_lib_profiler_ProfilingIsEnabledForAllThreads+:} false; then :
    1683716667  $as_echo_n "(cached) " >&6
    1683816668else
    16839   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
     16669  ac_check_lib_save_LIBS=$LIBS
     16670LIBS="-lprofiler  $LIBS"
     16671cat confdefs.h - <<_ACEOF >conftest.$ac_ext
    1684016672/* end confdefs.h.  */
    16841 #include <alloca.h>
     16673
     16674/* Override any GCC internal prototype to avoid an error.
     16675   Use char because int might match the return type of a GCC
     16676   builtin and then its argument prototype would still apply.  */
     16677#ifdef __cplusplus
     16678extern "C"
     16679#endif
     16680char ProfilingIsEnabledForAllThreads ();
    1684216681int
    1684316682main ()
    1684416683{
    16845 char *p = (char *) alloca (2 * sizeof (int));
    16846                           if (p) return 0;
     16684return ProfilingIsEnabledForAllThreads ();
    1684716685  ;
    1684816686  return 0;
     
    1685016688_ACEOF
    1685116689if ac_fn_c_try_link "$LINENO"; then :
    16852   ac_cv_working_alloca_h=yes
    16853 else
    16854   ac_cv_working_alloca_h=no
     16690  ac_cv_lib_profiler_ProfilingIsEnabledForAllThreads=yes
     16691else
     16692  ac_cv_lib_profiler_ProfilingIsEnabledForAllThreads=no
    1685516693fi
    1685616694rm -f core conftest.err conftest.$ac_objext \
    1685716695    conftest$ac_exeext conftest.$ac_ext
    16858 fi
    16859 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_working_alloca_h" >&5
    16860 $as_echo "$ac_cv_working_alloca_h" >&6; }
    16861 if test $ac_cv_working_alloca_h = yes; then
    16862 
    16863 $as_echo "#define HAVE_ALLOCA_H 1" >>confdefs.h
    16864 
    16865 fi
    16866 
    16867 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for alloca" >&5
    16868 $as_echo_n "checking for alloca... " >&6; }
    16869 if ${ac_cv_func_alloca_works+:} false; then :
     16696LIBS=$ac_check_lib_save_LIBS
     16697fi
     16698{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_profiler_ProfilingIsEnabledForAllThreads" >&5
     16699$as_echo "$ac_cv_lib_profiler_ProfilingIsEnabledForAllThreads" >&6; }
     16700if test "x$ac_cv_lib_profiler_ProfilingIsEnabledForAllThreads" = xyes; then :
     16701  HAVE_LIBPROFILER=1
     16702else
     16703  HAVE_LIBPROFILER=0
     16704fi
     16705
     16706 if test "$HAVE_LIBPROFILER" -eq 1; then
     16707  WITH_LIBPROFILER_TRUE=
     16708  WITH_LIBPROFILER_FALSE='#'
     16709else
     16710  WITH_LIBPROFILER_TRUE='#'
     16711  WITH_LIBPROFILER_FALSE=
     16712fi
     16713
     16714
     16715{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for malloc in -ltcmalloc" >&5
     16716$as_echo_n "checking for malloc in -ltcmalloc... " >&6; }
     16717if ${ac_cv_lib_tcmalloc_malloc+:} false; then :
    1687016718  $as_echo_n "(cached) " >&6
    1687116719else
    16872   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
     16720  ac_check_lib_save_LIBS=$LIBS
     16721LIBS="-ltcmalloc  $LIBS"
     16722cat confdefs.h - <<_ACEOF >conftest.$ac_ext
    1687316723/* end confdefs.h.  */
    16874 #ifdef __GNUC__
    16875 # define alloca __builtin_alloca
    16876 #else
    16877 # ifdef _MSC_VER
    16878 #  include <malloc.h>
    16879 #  define alloca _alloca
    16880 # else
    16881 #  ifdef HAVE_ALLOCA_H
    16882 #   include <alloca.h>
    16883 #  else
    16884 #   ifdef _AIX
    16885  #pragma alloca
    16886 #   else
    16887 #    ifndef alloca /* predefined by HP cc +Olibcalls */
    16888 void *alloca (size_t);
    16889 #    endif
    16890 #   endif
    16891 #  endif
    16892 # endif
     16724
     16725/* Override any GCC internal prototype to avoid an error.
     16726   Use char because int might match the return type of a GCC
     16727   builtin and then its argument prototype would still apply.  */
     16728#ifdef __cplusplus
     16729extern "C"
    1689316730#endif
    16894 
     16731char malloc ();
    1689516732int
    1689616733main ()
    1689716734{
    16898 char *p = (char *) alloca (1);
    16899                                     if (p) return 0;
     16735return malloc ();
    1690016736  ;
    1690116737  return 0;
     
    1690316739_ACEOF
    1690416740if ac_fn_c_try_link "$LINENO"; then :
    16905   ac_cv_func_alloca_works=yes
    16906 else
    16907   ac_cv_func_alloca_works=no
     16741  ac_cv_lib_tcmalloc_malloc=yes
     16742else
     16743  ac_cv_lib_tcmalloc_malloc=no
    1690816744fi
    1690916745rm -f core conftest.err conftest.$ac_objext \
    1691016746    conftest$ac_exeext conftest.$ac_ext
    16911 fi
    16912 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_alloca_works" >&5
    16913 $as_echo "$ac_cv_func_alloca_works" >&6; }
    16914 
    16915 if test $ac_cv_func_alloca_works = yes; then
    16916 
    16917 $as_echo "#define HAVE_ALLOCA 1" >>confdefs.h
    16918 
    16919 else
    16920   # The SVR3 libPW and SVR4 libucb both contain incompatible functions
    16921 # that cause trouble.  Some versions do not even contain alloca or
    16922 # contain a buggy version.  If you still want to use their alloca,
    16923 # use ar to extract alloca.o from them instead of compiling alloca.c.
    16924 
    16925 ALLOCA=\${LIBOBJDIR}alloca.$ac_objext
    16926 
    16927 $as_echo "#define C_ALLOCA 1" >>confdefs.h
    16928 
    16929 
    16930 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether \`alloca.c' needs Cray hooks" >&5
    16931 $as_echo_n "checking whether \`alloca.c' needs Cray hooks... " >&6; }
    16932 if ${ac_cv_os_cray+:} false; then :
    16933   $as_echo_n "(cached) " >&6
    16934 else
    16935   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
    16936 /* end confdefs.h.  */
    16937 #if defined CRAY && ! defined CRAY2
    16938 webecray
    16939 #else
    16940 wenotbecray
    16941 #endif
    16942 
    16943 _ACEOF
    16944 if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
    16945   $EGREP "webecray" >/dev/null 2>&1; then :
    16946   ac_cv_os_cray=yes
    16947 else
    16948   ac_cv_os_cray=no
    16949 fi
    16950 rm -f conftest*
    16951 
    16952 fi
    16953 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_os_cray" >&5
    16954 $as_echo "$ac_cv_os_cray" >&6; }
    16955 if test $ac_cv_os_cray = yes; then
    16956   for ac_func in _getb67 GETB67 getb67; do
    16957     as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
    16958 ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
    16959 if eval test \"x\$"$as_ac_var"\" = x"yes"; then :
    16960 
    16961 cat >>confdefs.h <<_ACEOF
    16962 #define CRAY_STACKSEG_END $ac_func
    16963 _ACEOF
    16964 
    16965     break
    16966 fi
    16967 
    16968   done
    16969 fi
    16970 
    16971 { $as_echo "$as_me:${as_lineno-$LINENO}: checking stack direction for C alloca" >&5
    16972 $as_echo_n "checking stack direction for C alloca... " >&6; }
    16973 if ${ac_cv_c_stack_direction+:} false; then :
    16974   $as_echo_n "(cached) " >&6
    16975 else
    16976   if test "$cross_compiling" = yes; then :
    16977   ac_cv_c_stack_direction=0
    16978 else
    16979   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
    16980 /* end confdefs.h.  */
    16981 $ac_includes_default
    16982 int
    16983 find_stack_direction (int *addr, int depth)
    16984 {
    16985   int dir, dummy = 0;
    16986   if (! addr)
    16987     addr = &dummy;
    16988   *addr = addr < &dummy ? 1 : addr == &dummy ? 0 : -1;
    16989   dir = depth ? find_stack_direction (addr, depth - 1) : 0;
    16990   return dir + dummy;
    16991 }
    16992 
    16993 int
    16994 main (int argc, char **argv)
    16995 {
    16996   return find_stack_direction (0, argc + !argv + 20) < 0;
    16997 }
    16998 _ACEOF
    16999 if ac_fn_c_try_run "$LINENO"; then :
    17000   ac_cv_c_stack_direction=1
    17001 else
    17002   ac_cv_c_stack_direction=-1
    17003 fi
    17004 rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
    17005   conftest.$ac_objext conftest.beam conftest.$ac_ext
    17006 fi
    17007 
    17008 fi
    17009 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_stack_direction" >&5
    17010 $as_echo "$ac_cv_c_stack_direction" >&6; }
    17011 cat >>confdefs.h <<_ACEOF
    17012 #define STACK_DIRECTION $ac_cv_c_stack_direction
    17013 _ACEOF
    17014 
    17015 
    17016 fi
    17017 
    17018 for ac_header in fenv.h float.h inttypes.h libintl.h limits.h malloc.h stddef.h stdlib.h string.h unistd.h
     16747LIBS=$ac_check_lib_save_LIBS
     16748fi
     16749{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_tcmalloc_malloc" >&5
     16750$as_echo "$ac_cv_lib_tcmalloc_malloc" >&6; }
     16751if test "x$ac_cv_lib_tcmalloc_malloc" = xyes; then :
     16752  HAVE_LIBTCMALLOC=1
     16753else
     16754  HAVE_LIBTCMALLOC=0
     16755fi
     16756
     16757 if test "$HAVE_LIBTCMALLOC" -eq 1; then
     16758  WITH_LIBTCMALLOC_TRUE=
     16759  WITH_LIBTCMALLOC_FALSE='#'
     16760else
     16761  WITH_LIBTCMALLOC_TRUE='#'
     16762  WITH_LIBTCMALLOC_FALSE=
     16763fi
     16764
     16765
     16766# Checks for header files.
     16767for ac_header in libintl.h malloc.h unistd.h
    1701916768do :
    1702016769  as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
     
    1702516774_ACEOF
    1702616775
     16776else
     16777  echo "Error: Missing required header"; exit 1
    1702716778fi
    1702816779
     
    1703116782
    1703216783# Checks for typedefs, structures, and compiler characteristics.
    17033 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for stdbool.h that conforms to C99" >&5
    17034 $as_echo_n "checking for stdbool.h that conforms to C99... " >&6; }
    17035 if ${ac_cv_header_stdbool_h+:} false; then :
     16784ac_fn_c_check_type "$LINENO" "_Float32" "ac_cv_type__Float32" "
     16785"
     16786if test "x$ac_cv_type__Float32" = xyes; then :
     16787
     16788cat >>confdefs.h <<_ACEOF
     16789#define HAVE__FLOAT32 1
     16790_ACEOF
     16791
     16792
     16793$as_echo "#define HAVE_KEYWORDS_FLOATXX /**/" >>confdefs.h
     16794
     16795fi
     16796
     16797
     16798# Checks for compiler flags.
     16799{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -Wcast-function-type" >&5
     16800$as_echo_n "checking whether C compiler accepts -Wcast-function-type... " >&6; }
     16801if ${m4cfa_cv_check_cflags___Wcast_function_type+:} false; then :
    1703616802  $as_echo_n "(cached) " >&6
    1703716803else
    17038   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
     16804
     16805        m4cfa_check_save_flags=$CFLAGS
     16806        CFLAGS="$CFLAGS  -Wcast-function-type"
     16807        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
    1703916808/* end confdefs.h.  */
    17040 
    17041              #include <stdbool.h>
    17042              #ifndef bool
    17043               "error: bool is not defined"
    17044              #endif
    17045              #ifndef false
    17046               "error: false is not defined"
    17047              #endif
    17048              #if false
    17049               "error: false is not 0"
    17050              #endif
    17051              #ifndef true
    17052               "error: true is not defined"
    17053              #endif
    17054              #if true != 1
    17055               "error: true is not 1"
    17056              #endif
    17057              #ifndef __bool_true_false_are_defined
    17058               "error: __bool_true_false_are_defined is not defined"
    17059              #endif
    17060 
    17061              struct s { _Bool s: 1; _Bool t; } s;
    17062 
    17063              char a[true == 1 ? 1 : -1];
    17064              char b[false == 0 ? 1 : -1];
    17065              char c[__bool_true_false_are_defined == 1 ? 1 : -1];
    17066              char d[(bool) 0.5 == true ? 1 : -1];
    17067              /* See body of main program for 'e'.  */
    17068              char f[(_Bool) 0.0 == false ? 1 : -1];
    17069              char g[true];
    17070              char h[sizeof (_Bool)];
    17071              char i[sizeof s.t];
    17072              enum { j = false, k = true, l = false * true, m = true * 256 };
    17073              /* The following fails for
    17074                 HP aC++/ANSI C B3910B A.05.55 [Dec 04 2003]. */
    17075              _Bool n[m];
    17076              char o[sizeof n == m * sizeof n[0] ? 1 : -1];
    17077              char p[-1 - (_Bool) 0 < 0 && -1 - (bool) 0 < 0 ? 1 : -1];
    17078              /* Catch a bug in an HP-UX C compiler.  See
    17079                 http://gcc.gnu.org/ml/gcc-patches/2003-12/msg02303.html
    17080                 http://lists.gnu.org/archive/html/bug-coreutils/2005-11/msg00161.html
    17081               */
    17082              _Bool q = true;
    17083              _Bool *pq = &q;
    1708416809
    1708516810int
    1708616811main ()
    1708716812{
    17088 
    17089              bool e = &s;
    17090              *pq |= q;
    17091              *pq |= ! q;
    17092              /* Refer to every declared value, to avoid compiler optimizations.  */
    17093              return (!a + !b + !c + !d + !e + !f + !g + !h + !i + !!j + !k + !!l
    17094                      + !m + !n + !o + !p + !q + !pq);
    1709516813
    1709616814  ;
     
    1709916817_ACEOF
    1710016818if ac_fn_c_try_compile "$LINENO"; then :
    17101   ac_cv_header_stdbool_h=yes
    17102 else
    17103   ac_cv_header_stdbool_h=no
     16819  m4cfa_cv_check_cflags___Wcast_function_type=yes
     16820else
     16821  m4cfa_cv_check_cflags___Wcast_function_type=no
    1710416822fi
    1710516823rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
    17106 fi
    17107 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdbool_h" >&5
    17108 $as_echo "$ac_cv_header_stdbool_h" >&6; }
    17109    ac_fn_c_check_type "$LINENO" "_Bool" "ac_cv_type__Bool" "$ac_includes_default"
    17110 if test "x$ac_cv_type__Bool" = xyes; then :
    17111 
    17112 cat >>confdefs.h <<_ACEOF
    17113 #define HAVE__BOOL 1
    17114 _ACEOF
    17115 
    17116 
    17117 fi
    17118 
    17119 
    17120 if test $ac_cv_header_stdbool_h = yes; then
    17121 
    17122 $as_echo "#define HAVE_STDBOOL_H 1" >>confdefs.h
    17123 
    17124 fi
    17125 
    17126 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for inline" >&5
    17127 $as_echo_n "checking for inline... " >&6; }
    17128 if ${ac_cv_c_inline+:} false; then :
    17129   $as_echo_n "(cached) " >&6
    17130 else
    17131   ac_cv_c_inline=no
    17132 for ac_kw in inline __inline__ __inline; do
    17133   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
    17134 /* end confdefs.h.  */
    17135 #ifndef __cplusplus
    17136 typedef int foo_t;
    17137 static $ac_kw foo_t static_foo () {return 0; }
    17138 $ac_kw foo_t foo () {return 0; }
    17139 #endif
    17140 
    17141 _ACEOF
    17142 if ac_fn_c_try_compile "$LINENO"; then :
    17143   ac_cv_c_inline=$ac_kw
    17144 fi
    17145 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
    17146   test "$ac_cv_c_inline" != no && break
    17147 done
    17148 
    17149 fi
    17150 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_inline" >&5
    17151 $as_echo "$ac_cv_c_inline" >&6; }
    17152 
    17153 case $ac_cv_c_inline in
    17154   inline | yes) ;;
    17155   *)
    17156     case $ac_cv_c_inline in
    17157       no) ac_val=;;
    17158       *) ac_val=$ac_cv_c_inline;;
    17159     esac
    17160     cat >>confdefs.h <<_ACEOF
    17161 #ifndef __cplusplus
    17162 #define inline $ac_val
    17163 #endif
    17164 _ACEOF
    17165     ;;
    17166 esac
    17167 
    17168 ac_fn_c_find_intX_t "$LINENO" "16" "ac_cv_c_int16_t"
    17169 case $ac_cv_c_int16_t in #(
    17170   no|yes) ;; #(
    17171   *)
    17172 
    17173 cat >>confdefs.h <<_ACEOF
    17174 #define int16_t $ac_cv_c_int16_t
    17175 _ACEOF
    17176 ;;
    17177 esac
    17178 
    17179 ac_fn_c_find_intX_t "$LINENO" "32" "ac_cv_c_int32_t"
    17180 case $ac_cv_c_int32_t in #(
    17181   no|yes) ;; #(
    17182   *)
    17183 
    17184 cat >>confdefs.h <<_ACEOF
    17185 #define int32_t $ac_cv_c_int32_t
    17186 _ACEOF
    17187 ;;
    17188 esac
    17189 
    17190 ac_fn_c_find_intX_t "$LINENO" "8" "ac_cv_c_int8_t"
    17191 case $ac_cv_c_int8_t in #(
    17192   no|yes) ;; #(
    17193   *)
    17194 
    17195 cat >>confdefs.h <<_ACEOF
    17196 #define int8_t $ac_cv_c_int8_t
    17197 _ACEOF
    17198 ;;
    17199 esac
    17200 
    17201 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for C/C++ restrict keyword" >&5
    17202 $as_echo_n "checking for C/C++ restrict keyword... " >&6; }
    17203 if ${ac_cv_c_restrict+:} false; then :
    17204   $as_echo_n "(cached) " >&6
    17205 else
    17206   ac_cv_c_restrict=no
    17207    # The order here caters to the fact that C++ does not require restrict.
    17208    for ac_kw in __restrict __restrict__ _Restrict restrict; do
    17209      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
    17210 /* end confdefs.h.  */
    17211 typedef int * int_ptr;
    17212         int foo (int_ptr $ac_kw ip) {
    17213         return ip[0];
    17214        }
    17215 int
    17216 main ()
    17217 {
    17218 int s[1];
    17219         int * $ac_kw t = s;
    17220         t[0] = 0;
    17221         return foo(t)
    17222   ;
    17223   return 0;
    17224 }
    17225 _ACEOF
    17226 if ac_fn_c_try_compile "$LINENO"; then :
    17227   ac_cv_c_restrict=$ac_kw
    17228 fi
    17229 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
    17230      test "$ac_cv_c_restrict" != no && break
    17231    done
    17232 
    17233 fi
    17234 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_restrict" >&5
    17235 $as_echo "$ac_cv_c_restrict" >&6; }
    17236 
    17237  case $ac_cv_c_restrict in
    17238    restrict) ;;
    17239    no) $as_echo "#define restrict /**/" >>confdefs.h
    17240  ;;
    17241    *)  cat >>confdefs.h <<_ACEOF
    17242 #define restrict $ac_cv_c_restrict
    17243 _ACEOF
    17244  ;;
    17245  esac
    17246 
    17247 ac_fn_c_check_type "$LINENO" "size_t" "ac_cv_type_size_t" "$ac_includes_default"
    17248 if test "x$ac_cv_type_size_t" = xyes; then :
    17249 
    17250 else
    17251 
    17252 cat >>confdefs.h <<_ACEOF
    17253 #define size_t unsigned int
    17254 _ACEOF
    17255 
    17256 fi
    17257 
    17258 ac_fn_c_find_uintX_t "$LINENO" "16" "ac_cv_c_uint16_t"
    17259 case $ac_cv_c_uint16_t in #(
    17260   no|yes) ;; #(
    17261   *)
    17262 
    17263 
    17264 cat >>confdefs.h <<_ACEOF
    17265 #define uint16_t $ac_cv_c_uint16_t
    17266 _ACEOF
    17267 ;;
    17268   esac
    17269 
    17270 ac_fn_c_find_uintX_t "$LINENO" "32" "ac_cv_c_uint32_t"
    17271 case $ac_cv_c_uint32_t in #(
    17272   no|yes) ;; #(
    17273   *)
    17274 
    17275 $as_echo "#define _UINT32_T 1" >>confdefs.h
    17276 
    17277 
    17278 cat >>confdefs.h <<_ACEOF
    17279 #define uint32_t $ac_cv_c_uint32_t
    17280 _ACEOF
    17281 ;;
    17282   esac
    17283 
    17284 ac_fn_c_find_uintX_t "$LINENO" "8" "ac_cv_c_uint8_t"
    17285 case $ac_cv_c_uint8_t in #(
    17286   no|yes) ;; #(
    17287   *)
    17288 
    17289 $as_echo "#define _UINT8_T 1" >>confdefs.h
    17290 
    17291 
    17292 cat >>confdefs.h <<_ACEOF
    17293 #define uint8_t $ac_cv_c_uint8_t
    17294 _ACEOF
    17295 ;;
    17296   esac
    17297 
    17298 
    17299 # Checks for library functions.
    17300 for ac_func in memset putenv strchr strtol
    17301 do :
    17302   as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
    17303 ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
    17304 if eval test \"x\$"$as_ac_var"\" = x"yes"; then :
    17305   cat >>confdefs.h <<_ACEOF
    17306 #define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1
    17307 _ACEOF
    17308 
    17309 fi
    17310 done
     16824        CFLAGS=$m4cfa_check_save_flags
     16825fi
     16826{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $m4cfa_cv_check_cflags___Wcast_function_type" >&5
     16827$as_echo "$m4cfa_cv_check_cflags___Wcast_function_type" >&6; }
     16828if test "x$m4cfa_cv_check_cflags___Wcast_function_type" = xyes; then :
     16829
     16830$as_echo "#define HAVE_CAST_FUNCTION_TYPE /**/" >>confdefs.h
     16831
     16832else
     16833  :
     16834fi
    1731116835
    1731216836
     
    1732116845
    1732216846#==============================================================================
    17323 ac_config_files="$ac_config_files Makefile driver/Makefile src/Makefile benchmark/Makefile tests/Makefile tests/preempt_longrun/Makefile tools/Makefile tools/prettyprinter/Makefile"
     16847ac_config_files="$ac_config_files Makefile driver/Makefile src/Makefile benchmark/Makefile tests/Makefile longrun_tests/Makefile tools/Makefile tools/prettyprinter/Makefile"
    1732416848
    1732516849
     
    1747216996if test -z "${WITH_LIBFIBRE_TRUE}" && test -z "${WITH_LIBFIBRE_FALSE}"; then
    1747316997  as_fn_error $? "conditional \"WITH_LIBFIBRE\" was never defined.
     16998Usually this means the macro was only invoked conditionally." "$LINENO" 5
     16999fi
     17000if test -z "${WITH_LIBPROFILER_TRUE}" && test -z "${WITH_LIBPROFILER_FALSE}"; then
     17001  as_fn_error $? "conditional \"WITH_LIBPROFILER\" was never defined.
     17002Usually this means the macro was only invoked conditionally." "$LINENO" 5
     17003fi
     17004if test -z "${WITH_LIBTCMALLOC_TRUE}" && test -z "${WITH_LIBTCMALLOC_FALSE}"; then
     17005  as_fn_error $? "conditional \"WITH_LIBTCMALLOC\" was never defined.
    1747417006Usually this means the macro was only invoked conditionally." "$LINENO" 5
    1747517007fi
     
    1845917991    "benchmark/Makefile") CONFIG_FILES="$CONFIG_FILES benchmark/Makefile" ;;
    1846017992    "tests/Makefile") CONFIG_FILES="$CONFIG_FILES tests/Makefile" ;;
    18461     "tests/preempt_longrun/Makefile") CONFIG_FILES="$CONFIG_FILES tests/preempt_longrun/Makefile" ;;
     17993    "longrun_tests/Makefile") CONFIG_FILES="$CONFIG_FILES longrun_tests/Makefile" ;;
    1846217994    "tools/Makefile") CONFIG_FILES="$CONFIG_FILES tools/Makefile" ;;
    1846317995    "tools/prettyprinter/Makefile") CONFIG_FILES="$CONFIG_FILES tools/prettyprinter/Makefile" ;;
  • configure.ac

    r6a9d4b4 r933f32f  
    139139                "debug") ;;
    140140                "nolib") ;;
     141                "profile") ;;
    141142                *)
    142143                        >&2 echo "Configuration must be 'debug', 'nodebug' or 'nolib'"
     
    178179AC_PROG_CC
    179180AM_PROG_AS
    180 AM_PROG_CC_C_O  # deprecated
    181181# These are often not installed and people miss seeing the "no", so stop the configure.
    182182AC_PROG_YACC
     
    186186AC_PROG_LIBTOOL
    187187AC_PROG_INSTALL
    188 AC_PROG_MAKE_SET
    189188
    190189# Checks for libraries.
     
    192191AM_CONDITIONAL([WITH_LIBFIBRE], [test "$HAVE_LIBFIBRE" -eq 1])
    193192
     193AC_CHECK_LIB([profiler], [ProfilingIsEnabledForAllThreads], [HAVE_LIBPROFILER=1], [HAVE_LIBPROFILER=0])
     194AM_CONDITIONAL([WITH_LIBPROFILER], [test "$HAVE_LIBPROFILER" -eq 1])
     195
     196AC_CHECK_LIB([tcmalloc], [malloc], [HAVE_LIBTCMALLOC=1], [HAVE_LIBTCMALLOC=0])
     197AM_CONDITIONAL([WITH_LIBTCMALLOC], [test "$HAVE_LIBTCMALLOC" -eq 1])
     198
    194199# Checks for header files.
    195 AC_FUNC_ALLOCA
    196 AC_CHECK_HEADERS([fenv.h float.h inttypes.h libintl.h limits.h malloc.h stddef.h stdlib.h string.h unistd.h])
     200AC_CHECK_HEADERS([libintl.h malloc.h unistd.h], [], [echo "Error: Missing required header"; exit 1])
    197201
    198202# Checks for typedefs, structures, and compiler characteristics.
    199 AC_HEADER_STDBOOL
    200 AC_C_INLINE
    201 AC_TYPE_INT16_T
    202 AC_TYPE_INT32_T
    203 AC_TYPE_INT8_T
    204 AC_C_RESTRICT
    205 AC_TYPE_SIZE_T
    206 AC_TYPE_UINT16_T
    207 AC_TYPE_UINT32_T
    208 AC_TYPE_UINT8_T
    209 
    210 # Checks for library functions.
    211 AC_CHECK_FUNCS([memset putenv strchr strtol])
     203AC_CHECK_TYPES([_Float32], AC_DEFINE([HAVE_KEYWORDS_FLOATXX], [], [Have keywords _FloatXX.]), [], [[]])
     204
     205# Checks for compiler flags.
     206M4CFA_CHECK_COMPILE_FLAG([-Wcast-function-type], AC_DEFINE([HAVE_CAST_FUNCTION_TYPE], [], [Have compiler warning cast-function-type.]))
    212207
    213208#==============================================================================
     
    223218        benchmark/Makefile
    224219        tests/Makefile
    225         tests/preempt_longrun/Makefile
     220        longrun_tests/Makefile
    226221        tools/Makefile
    227222        tools/prettyprinter/Makefile
  • doc/LaTeXmacros/lstlang.sty

    r6a9d4b4 r933f32f  
    88%% Created On       : Sat May 13 16:34:42 2017
    99%% Last Modified By : Peter A. Buhr
    10 %% Last Modified On : Fri Apr  6 23:44:50 2018
    11 %% Update Count     : 20
     10%% Last Modified On : Tue Jan  8 14:40:33 2019
     11%% Update Count     : 21
    1212%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    1313
     
    114114                _Alignas, _Alignof, __alignof, __alignof__, asm, __asm, __asm__, __attribute, __attribute__,
    115115                auto, _Bool, catch, catchResume, choose, _Complex, __complex, __complex__, __const, __const__,
    116                 coroutine, disable, dtype, enable, __extension__, exception, fallthrough, fallthru, finally,
     116                coroutine, disable, dtype, enable, exception, __extension__, fallthrough, fallthru, finally,
    117117                __float80, float80, __float128, float128, forall, ftype, _Generic, _Imaginary, __imag, __imag__,
    118118                inline, __inline, __inline__, __int128, int128, __label__, monitor, mutex, _Noreturn, one_t, or,
  • doc/bibliography/pl.bib

    r6a9d4b4 r933f32f  
    330330    contributer = {pabuhr@plg},
    331331    author      = {Nissim Francez},
    332     title       = {Another Advantage of Key word Notation for Parameter Communication with Subprograms},
     332    title       = {Another Advantage of Keyword Notation for Parameter Communication with Subprograms},
    333333    journal     = cacm,
    334334    volume      = 20,
     
    831831    year        = 2015,
    832832    howpublished= {\href{http://www.boost.org/doc/libs/1_61_0/libs/coroutine/doc/html/index.html}
    833                   {{http://www.boost.org/\-doc/\-libs/1\_61\_0/\-libs/\-coroutine/\-doc/\-html/\-index.html}}},
    834     optnote     = {Accessed: 2016-09},
     833                  {http://www.boost.org/\-doc/\-libs/1\_61\_0/\-libs/\-coroutine/\-doc/\-html/\-index.html}},
     834}
     835
     836@misc{BoostThreads,
     837    keywords    = {Boost Thread Library},
     838    contributer = {pabuhr@plg},
     839    author      = {Anthony Williams and Vicente J. Botet Escriba},
     840    title       = {Boost Thread Library},
     841    year        = 2015,
     842    howpublished= {\href{https://www.boost.org/doc/libs/1_61_0/doc/html/thread.html}
     843                  {https://\-www.boost.org/\-doc/\-libs/\-1\_61\_0/\-doc/\-html/\-thread.html}},
    835844}
    836845
     
    939948    author      = {{\textsf{C}{$\mathbf{\forall}$} Features}},
    940949    howpublished= {\href{https://plg.uwaterloo.ca/~cforall/features}{https://\-plg.uwaterloo.ca/\-$\sim$cforall/\-features}},
    941     optnote     = {Accessed: 2018-01-01},
    942950}
    943951
     
    959967    year        = 2018,
    960968    howpublished= {\href{https://cforall.uwaterloo.ca/CFAStackEvaluation.zip}{https://cforall.uwaterloo.ca/\-CFAStackEvaluation.zip}},
    961     optnote     = {[Accessed May 2018]},
    962969}
    963970
     
    966973    contributer = {pabuhr@plg},
    967974    author      = {Aaron Moss and Robert Schluntz and Peter A. Buhr},
    968     title       = {\textsf{C}$\mathbf{\forall}$ : Adding Modern Programming Language Features to C},
     975    title       = {\textsf{C}$\mathbf{\forall}$ : Adding Modern Programming Language Features to {C}},
    969976    journal     = spe,
    970977    volume      = 48,
     
    10861093}
    10871094
     1095@techreport{Prokopec11,
     1096  keywords = {ctrie, concurrent map},
     1097  contributer = {a3moss@uwaterloo.ca},
     1098  title={Cache-aware lock-free concurrent hash tries},
     1099  author={Prokopec, Aleksandar and Bagwell, Phil and Odersky, Martin},
     1100  institution={EPFL},
     1101  year={2011}
     1102}
     1103
    10881104@article{Buhr85,
    10891105    keywords    = {goto, multi-exit loop},
     
    11321148    year        = 1998,
    11331149    note        = {{\small\textsf{ftp://\-plg.uwaterloo.ca/\-pub/\-Cforall/\-refrat.ps.gz}}},
     1150}
     1151
     1152@phdthesis{Norrish98,
     1153  title={C formalised in HOL},
     1154  author={Norrish, Michael},
     1155  year={1998},
     1156  school={University of Cambridge}
     1157}
     1158
     1159@inproceedings{Tarditi18,
     1160    keywords    = {Checked C},
     1161    contributer = {a3moss@uwaterloo.ca},
     1162    author      = {Tarditi, David and Elliott, Archibald Samuel and Ruef, Andrew and Hicks, Michael},
     1163    title       = {Checked C: Making C Safe by Extension},
     1164    booktitle   = {2018 IEEE Cybersecurity Development (SecDev)},
     1165    year        = {2018},
     1166    month       = {September},
     1167    pages       = {53-60},
     1168    publisher   = {IEEE},
     1169    url         = {https://www.microsoft.com/en-us/research/publication/checkedc-making-c-safe-by-extension/},
     1170}
     1171
     1172@misc{Clang,
     1173    keywords    = {clang},
     1174    contributer = {a3moss@uwaterloo.ca},
     1175    title       = {Clang: a {C} language family frontend for {LLVM}},
     1176    howpublished= {\href{https://clang.llvm.org/}{https://\-clang.llvm.org/}}
    11341177}
    11351178
     
    12341277}
    12351278
     1279@inproceedings{Odersky01,
     1280 keywords = {Scala},
     1281 contributer = {a3moss@uwaterloo.ca},
     1282 author = {Odersky, Martin and Zenger, Christoph and Zenger, Matthias},
     1283 title = {Colored Local Type Inference},
     1284 booktitle = {Proceedings of the 28th ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages},
     1285 series = {POPL '01},
     1286 year = {2001},
     1287 isbn = {1-58113-336-7},
     1288 location = {London, United Kingdom},
     1289 pages = {41--53},
     1290 numpages = {13},
     1291 url = {http://doi.acm.org/10.1145/360204.360207},
     1292 doi = {10.1145/360204.360207},
     1293 acmid = {360207},
     1294 publisher = {ACM},
     1295 address = {New York, NY, USA},
     1296}
     1297
    12361298@book{sml:commentary,
    12371299    author      = {Robin Milner and Mads Tofte},
     
    12941356    journal     = sigplan,
    12951357    year        = 1986,
    1296     month       = oct, volume = 21, number = 10, pages = {19-28},
     1358    month       = oct,
     1359    volume      = 21,
     1360    number      = 10,
     1361    pages       = {19-28},
    12971362    note        = {Object Oriented Programming Workshop}
    12981363}
     
    14591524    title       = {concurrent-locking},
    14601525    howpublished= {\href{https://github.com/pabuhr/concurrent-locking}{https://\-github.com/\-pabuhr/\-concurrent-locking}},
    1461     optnote     = {[Accessed April 2017]},
    14621526}
    14631527
     
    16211685}
    16221686
     1687@inproceedings{Prokopec12,
     1688  keywords={ctrie, hash trie, concurrent map},
     1689  contributer={a3moss@uwaterloo.ca},
     1690  title={Concurrent tries with efficient non-blocking snapshots},
     1691  author={Prokopec, Aleksandar and Bronson, Nathan Grasso and Bagwell, Phil and Odersky, Martin},
     1692  booktitle={ACM SIGPLAN Notices},
     1693  volume={47},
     1694  number={8},
     1695  pages={151--160},
     1696  year={2012},
     1697  organization={ACM}
     1698}
     1699
    16231700@article{Buhr05a,
    16241701    keywords    = {concurrency, myths},
     
    17471824    howpublished= {\href{https://www.airs.com/blog/archives/428}
    17481825                  {https://www.airs.com/\-blog/\-archives/\-428}},
    1749     optnote     = {Accessed: 2018-05},
    17501826}
    17511827
     
    18431919    year        = 1965,
    18441920    note        = {Reprinted in \cite{Genuys68} pp. 43--112.}
     1921}
     1922
     1923@manual{C++20Coroutine19,
     1924    keywords    = {coroutine},
     1925    contributer = {pabuhr@plg},
     1926    title       = {Coroutines (C++20)},
     1927    organization= {cppreference.com},
     1928    month       = apr,
     1929    year        = 2019,
     1930    note        = {\href{https://en.cppreference.com/w/cpp/language/coroutines}{https://\-en.cppreference.com/\-w/\-cpp/\-language/\-coroutines}},
    18451931}
    18461932
     
    22712357}
    22722358
     2359@article{Ritchie93,
     2360    keywords    = {C, history},
     2361    contributer = {pabuhr@plg},
     2362    author      = {Ritchie, Dennis M.},
     2363    title       = {The Development of the {C} Language},
     2364    journal     = sigplan,
     2365    volume      = 28,
     2366    number      = 3,
     2367    month       = mar,
     2368    year        = 1993,
     2369    pages       = {201--208},
     2370    url         = {http://doi.acm.org/10.1145/155360.155580},
     2371    publisher   = {ACM},
     2372    address     = {New York, NY, USA},
     2373}
     2374
    22732375@article{design,
    22742376    keywords    = {Smalltalk, designing classes},
     
    22782380    journal     = joop,
    22792381    year        = 1988,
    2280     volume      = 1, number = 2, pages = {22-35},
     2382    volume      = 1,
     2383    number      = 2,
     2384    pages       = {22-35},
    22812385    comment     = {
    22822386        Abstract classes represent standard protocols.  ``It is better to
     
    23632467    year        = 1990,
    23642468    pages       = {315-323}
     2469}
     2470
     2471@misc{Dotty-github,
     2472    keywords = {dotty,scala},
     2473    contributer = {a3moss@uwaterloo.ca},
     2474    author = {Martin Odersky},
     2475    title = {Dotty},
     2476    howpublished = {\href{https://github.com/lampepfl/dotty}{https://\-github.com/\-lampepfl/\-dotty}},
     2477    note = {Acessed: 2019-02-22}
    23652478}
    23662479
     
    24702583    pages       = {325-361},
    24712584}
     2585
     2586@article{Tarjan75,
     2587 keywords = {union-find},
     2588 contributer = {a3moss@uwaterloo.ca},
     2589 author = {Tarjan, Robert Endre},
     2590 title = {Efficiency of a Good But Not Linear Set Union Algorithm},
     2591 journal = {J. ACM},
     2592 issue_date = {April 1975},
     2593 volume = {22},
     2594 number = {2},
     2595 month = apr,
     2596 year = {1975},
     2597 issn = {0004-5411},
     2598 pages = {215--225},
     2599 numpages = {11},
     2600 url = {http://doi.acm.org/10.1145/321879.321884},
     2601 doi = {10.1145/321879.321884},
     2602 acmid = {321884},
     2603 publisher = {ACM},
     2604 address = {New York, NY, USA},
     2605}
    24722606
    24732607@book{Eiffel,
     
    29063040    year        = 2014,
    29073041    howpublished= {\href{https://gcc.gnu.org/onlinedocs/gcc-4.7.2/gcc/C-Extensions.html}{https://\-gcc.gnu.org/\-onlinedocs/\-gcc-4.7.2/\-gcc/\-C\-Extensions.html}},
    2908     optnote     = {Accessed: 2017-04-02},
    29093042}
    29103043
     
    29903123}
    29913124
     3125@manual{WindowsFibers,
     3126    keywords    = {threads, fibers},
     3127    contributer = {pabuhr@plg},
     3128    author      = {Windows},
     3129    title       = {Fibers},
     3130    organization= {Microsoft, Windows Development Center},
     3131    address     = {\href{https://docs.microsoft.com/en-us/windows/desktop/ProcThread/fibers}{https://\-docs.microsoft.com/\-en-us/\-windows/\-desktop/\-ProcThread/\-fibers}},
     3132    year        = 2018,
     3133}
     3134
    29923135@inproceedings{F-bound,
    29933136    keywords    = {},
     
    30373180}
    30383181
     3182@manual{Folly,
     3183    keywords    = {Folly},
     3184    contributer = {pabuhr@plg},
     3185    author      = {Folly},
     3186    title       = {Facebook Open-source Library},
     3187    organization= {Facebook},
     3188    address     = {\href{https://github.com/facebook/folly}{https://\-github.com/\-facebook/\-folly}},
     3189    year        = 2018,
     3190}
     3191
     3192@article{Leroy09,
     3193 keywords = {C formalization},
     3194 contributer = {a3moss@uwaterloo.ca},
     3195 author = {Leroy, Xavier},
     3196 title = {Formal Verification of a Realistic Compiler},
     3197 journal = {Commun. ACM},
     3198 issue_date = {July 2009},
     3199 volume = {52},
     3200 number = {7},
     3201 month = jul,
     3202 year = {2009},
     3203 issn = {0001-0782},
     3204 pages = {107--115},
     3205 numpages = {9},
     3206 url = {http://doi.acm.org/10.1145/1538788.1538814},
     3207 doi = {10.1145/1538788.1538814},
     3208 acmid = {1538814},
     3209 publisher = {ACM},
     3210 address = {New York, NY, USA},
     3211}
     3212
    30393213@manual{Fortran95,
    30403214    keywords    = {Fortran 95},
     
    30573231    address     = {\href{https://www.iso.org/standard/50459.html}{https://\-www.iso.org/\-standard/\-50459.html}},
    30583232    year        = 2010,
     3233}
     3234
     3235@manual{Fortran18,
     3236    keywords    = {ISO/IEC Fortran 10},
     3237    contributer = {pabuhr@plg},
     3238    author      = {Fortran18},
     3239    title       = {Programming Languages -- {Fortran} Part 1:Base Language ISO/IEC 1539-1:2018},
     3240    edition     = {4rd},
     3241    publisher   = {International Standard Organization},
     3242    address     = {\href{https://www.iso.org/standard/72320.html}{https://\-www.iso.org/\-standard/\-72320.html}},
     3243    year        = 2018,
    30593244}
    30603245
     
    33063491    year        = 2014,
    33073492    howpublished= {https://developer.gnome.org/gobject/stable/},
    3308     optnote     = {Accessed: 2017-04},
    33093493}
    33103494
     
    36213805    year        = {1964},
    36223806    publisher   = {ACM}
     3807}
     3808
     3809@phdthesis{Barghi18,
     3810    keywords    = {concurrency, user threads, actors},
     3811    contributer = {pabuhr@plg},
     3812    author      = {Saman Barghi},
     3813    title       = {Improving the Performance of User-level Runtime Systems for Concurrent Applications},
     3814    school      = {School of Computer Science, University of Waterloo},
     3815    year        = 2018,
     3816    month       = sep,
     3817    optaddress  = {Waterloo, Ontario, Canada, N2L 3G1},
     3818    note        = {\href{https://uwspace.uwaterloo.ca/handle/10012/13935}{https://\-uwspace.uwaterloo.ca/\-handle/\-10012/\-13935}},
     3819}
     3820
     3821@article{Swift05,
     3822   contributer  = {pabuhr@plg},
     3823   author       = {Michael M. Swift and Brian N. Bershad and Henry M. Levy},
     3824   title        = {Improving the Reliability of Commodity Operating Systems},
     3825   journal      = tocs,
     3826   volume       = 23,
     3827   number       = 1,
     3828   month        = feb,
     3829   year         = 2005,
     3830   pages        = {77-110},
    36233831}
    36243832
     
    39274135}
    39284136
     4137@article{Morgado13,
     4138  keywords = {expression resolution},
     4139  contributer = {a3moss@uwaterloo.ca},
     4140  title={Iterative and core-guided {MaxSAT} solving: A survey and assessment},
     4141  author={Morgado, Antonio and Heras, Federico and Liffiton, Mark and Planes, Jordi and Marques-Silva, Joao},
     4142  journal={Constraints},
     4143  volume={18},
     4144  number={4},
     4145  pages={478--534},
     4146  year={2013},
     4147  publisher={Springer}
     4148}
     4149
    39294150% J
    39304151                 
     
    39484169    year        = 2015,
    39494170    edition     = {{J}ava {SE} 8},
     4171}
     4172
     4173@manual{Java11,
     4174    keywords    = {Java SE 11},
     4175    contributer = {pabuhr@plg},
     4176    author      = {James Gosling and Bill Joy and Guy Steele and Gilad Bracha and Alex Buckley and Daniel Smith},
     4177    title       = {{Java} Language Specification},
     4178    publisher   = {Oracle},
     4179    month       = sep,
     4180    year        = 2018,
     4181    edition     = {{J}ava {SE} 11},
     4182}
     4183
     4184@manual{JDK1.1,
     4185    keywords    = {JDK 1.1},
     4186    contributer = {pabuhr@plg},
     4187    author      = {{Multithreading Models}},
     4188    title       = {JDK 1.1 for Solaris Developer's Guide},
     4189    publisher   = {Oracle},
     4190    address     = {\href{https://docs.oracle.com/cd/E19455-01/806-3461/6jck06gqk/index.html#ch2mt-41}{https://\-docs.oracle.com/\-cd/\-E19455-01/\-806-3461/\-6jck06gqk/\-index.html\#ch2mt-41}},
     4191    year        = 2010,
    39504192}
    39514193
     
    41294371}
    41304372
     4373@manual{libmill,
     4374    keywords    = {libmill},
     4375    contributer = {pabuhr@plg},
     4376    author      = {libmill},
     4377    title       = {{G}o-style concurrency in {C}, Version 1.18},
     4378    organization= {libmill},
     4379    address     = {\href{http://libmill.org/documentation.html}{http://\-libmill.org/\-documentation.html}},
     4380    month       = jan,
     4381    year        = 2017,
     4382}
     4383
    41314384@book{Weissman67,
    41324385    keywords    = {lisp},
     
    41384391}
    41394392
     4393@article{Pierce00,
     4394 keywords = {Scala},
     4395 contributer = {a3moss@uwaterloo.ca},
     4396 author = {Pierce, Benjamin C. and Turner, David N.},
     4397 title = {Local Type Inference},
     4398 journal = {ACM Trans. Program. Lang. Syst.},
     4399 issue_date = {Jan. 2000},
     4400 volume = {22},
     4401 number = {1},
     4402 month = jan,
     4403 year = {2000},
     4404 issn = {0164-0925},
     4405 pages = {1--44},
     4406 numpages = {44},
     4407 url = {http://doi.acm.org/10.1145/345099.345100},
     4408 doi = {10.1145/345099.345100},
     4409 acmid = {345100},
     4410 publisher = {ACM},
     4411 address = {New York, NY, USA},
     4412 keywords = {polymorphism, subtyping, type inference},
     4413}
     4414
    41404415@article{Sundell08,
    41414416    keywords    = {lock free, deque},
     
    41484423    year        = 2008,
    41494424    pages       = {1008-1020},
     4425}
     4426
     4427@misc{Matsakis17,
     4428    keywords    = {Rust, Chalk, PROLOG},
     4429    contributer = {a3moss@uwaterloo.ca},
     4430    author      = {Nicholas Matsakis},
     4431    title       = {Lowering {Rust} traits to logic},
     4432    month       = jan,
     4433    year        = 2017,
     4434    howpublished= {\href{http://smallcultfollowing.com/babysteps/blog/2017/01/26/lowering-rust-traits-to-logic/}
     4435                  {http://smallcultfollowing.com/\-babysteps/\-blog/\-2017/\-01/\-26/\-lowering-rust-traits-to-logic/}},
     4436    optnote     = {Accessed: 2019-01},
    41504437}
    41514438
     
    41644451}
    41654452
     4453@manual{Lua,
     4454    keywords    = {Lua},
     4455    contributer = {pabuhr@plg},
     4456    author      = {Lua},
     4457    title       = {Lua 5.3 Reference Manual},
     4458    address     = {\href{https://www.lua.org/manual/5.3}{https://\-www.lua.org/\-manual/\-5.3}},
     4459    year        = 2018,
     4460}
     4461
    41664462% M
    41674463
     
    41734469    publisher   = {Motorola},
    41744470    year        = 1992,
     4471}
     4472
     4473@misc{Haberman16,
     4474    keywords    = {C++ template expansion},
     4475    contributer = {a3moss@uwaterloo.ca},
     4476    author      = {Josh Haberman},
     4477    title       = {Making arbitrarily-large binaries from fixed-size {C}{\kern-.1em\hbox{\large\texttt{+\kern-.25em+}}} code},
     4478    year        = 2016,
     4479    howpublished= {\href{http://blog.reverberate.org/2016/01/making-arbitrarily-large-binaries-from.html}
     4480                  {
     4481          {http://blog.reverberate.org/\-2016/\-01/\-making-arbitrarily-large-binaries-from.html}
     4482          }},
     4483    optnote     = {Accessed: 2016-09},
    41754484}
    41764485
     
    44914800}
    44924801%    editor     = {Allen Kent and James G. Williams},
     4802
     4803@incollection{MPC,
     4804    keywords    = {user-level threading},
     4805    contributer = {pabuhr@plg},
     4806    author      = {Marc P\'erache and Herv\'e Jourdren and Raymond Namyst},
     4807    title       = {MPC: A Unified Parallel Runtime for Clusters of {NUMA} Machines},
     4808    booktitle   = {Euro-Par 2008},
     4809    pages       = {329-342},
     4810    publisher   = {Springer},
     4811    address     = {Berlin, Heidelberg},
     4812    year        = 2008,
     4813    volume      = 5168,
     4814    series      = {Lecture Notes in Computer Science},
     4815}
    44934816
    44944817@manual{MPI,
     
    49175240    year        = 2014,
    49185241    howpublished= {\href{https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/ProgrammingWithObjectiveC}{https://\-developer.apple.com/\-library/archive/\-documentation/\-Cocoa/\-Conceptual/\-ProgrammingWithObjectiveC}},
    4919     optnote     = {Accessed: 2018-03}
    49205242}
    49215243
     
    49275249    year        = 2015,
    49285250    howpublished= {\href{https://developer.apple.com/library/content/documentation/Xcode/Conceptual/RN-Xcode-Archive/Chapters/xc7_release_notes.html}{https://\-developer.apple.com/\-library/\-content/\-documentation/\-Xcode/\-Conceptual/\-RN-Xcode-Archive/\-Chapters/\-xc7\_release\_notes.html}},
    4929     optnote     = {Accessed: 2017-04}
    49305251}
    49315252
     
    50625383    note        = {\href{https://www.openmp.org/wp-content/uploads/openmp-4.5.pdf}{https://\-www.openmp.org/\-wp-content/\-uploads/\-openmp-4.5.pdf}},
    50635384}
     5385
     5386@inproceedings{Krebbers14,
     5387 keywords = {c formalization},
     5388 contributer = {a3moss@uwaterloo.ca},
     5389 author = {Krebbers, Robbert},
     5390 title = {An Operational and Axiomatic Semantics for Non-determinism and Sequence Points in C},
     5391 booktitle = {Proceedings of the 41st ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages},
     5392 series = {POPL '14},
     5393 year = {2014},
     5394 isbn = {978-1-4503-2544-8},
     5395 location = {San Diego, California, USA},
     5396 pages = {101--112},
     5397 numpages = {12},
     5398 url = {http://doi.acm.org/10.1145/2535838.2535878},
     5399 doi = {10.1145/2535838.2535878},
     5400 acmid = {2535878},
     5401 publisher = {ACM},
     5402 address = {New York, NY, USA},
     5403}
    50645404
    50655405@book{Deitel04,
     
    54405780    year        = 2012,
    54415781    howpublished= {\href{http://cs.brown.edu/research/pubs/theses/masters/2012/verch.pdf}{http://cs.brown.edu/\-research/\-pubs/\-theses/\-masters/\-2012/\-verch.pdf}},
    5442     optnote     = {Accessed: 2013-10-4}
    54435782}
    54445783
     
    57646103    address     = {\href{https://www.iso.org/standard/64029.html}{https://\-www.iso.org/\-standard/\-64029.html}},
    57656104    year        = 2014,
     6105}
     6106
     6107@manual{C++17,
     6108    keywords    = {ISO/IEC C++ 17},
     6109    contributer = {pabuhr@plg},
     6110    key         = {C++17},
     6111    title       = {{C}{\kern-.1em\hbox{\large\texttt{+\kern-.25em+}}} Programming Language ISO/IEC 14882:2017},
     6112    edition     = {5th},
     6113    publisher   = {International Standard Organization},
     6114    address     = {\href{https://www.iso.org/standard/68564.html}{https://\-www.iso.org/\-standard/\-68564.html}},
     6115    year        = 2017,
    57666116}
    57676117
     
    59176267    institution = {Carnegie Mellon University},
    59186268    year        = 1991,
    5919     month       = feb, number = "CMU-CS-91-106",
     6269    month       = feb,
     6270    number      = {CMU-CS-91-106},
    59206271    annote      = {
    59216272        Discusses a typed lambda calculus with
     
    59746325    journal     = sigplan,
    59756326    year        = 1988,
    5976     month       = jul, volume = 23, number = 7, pages = {260-267},
    5977     note        = {Proceedings of the SIGPLAN '88 Conference on Programming Language
    5978          Design and Implementation},
     6327    month       = jul,
     6328    volume      = 23,
     6329    number      = 7,
     6330    pages       = {260-267},
     6331    note        = {Proceedings of the SIGPLAN '88 Conference on Programming Language Design and Implementation},
    59796332    abstract    = {
    59806333        This paper deals with the integration of an efficient asynchronous
     
    60266379}
    60276380
     6381@misc{Pthreads,
     6382    keywords    = {pthreads, C concurrency},
     6383    contributer = {pabuhr@plg},
     6384    key         = {pthreads},
     6385    title       = {{Pthread}.h, Specifications Issue 7, {IEEE} Std 1003.1-2017},
     6386    author      = {IEEE and {The Open Group}},
     6387    year        = 2018,
     6388    howpublished= {\href{http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/pthread.h.html}
     6389                  {http://\-pubs.opengroup.org/\-onlinepubs/\-9699919799/\-basedefs/\-pthread.h.html}},
     6390}
     6391
    60286392@manual{Python,
    60296393    keywords    = {Python},
    60306394    contributer = {pabuhr@plg},
    6031     title       = {Python Reference Manual, Release 2.5},
    6032     author      = {Guido van Rossum},
     6395    author      = {Python},
     6396    title       = {Python Language Reference, Release 3.7.2},
    60336397    organization= {Python Software Foundation},
    6034     month       = sep,
    6035     year        = 2006,
    6036     note        = {Fred L. Drake, Jr., editor},
     6398    address     = {\href{https://docs.python.org/3/reference/index.html}{https://\-docs.python.org/\-3/\-reference/\-index.html}},
     6399    year        = 2018,
    60376400}
    60386401
    60396402% Q
     6403
     6404@inproceedings{Qthreads,
     6405    keywords    = {user-level threading},
     6406    author      = {Kyle B. Wheeler and Richard C. Murphy and Douglas Thain},
     6407    title       = {Qthreads: An API for Programming with Millions of Lightweight Threads},
     6408    booktitle   = {International Symposium on Parallel and Distributed Processing},
     6409    organization= {IEEE},
     6410    address     = {Miami, FL, USA},
     6411    month       = apr,
     6412    year        = 2008,
     6413}
    60406414
    60416415@article{Grossman06,
     
    60746448}
    60756449
     6450@manual{Quasar,
     6451    keywords    = {Quasar},
     6452    contributer = {pabuhr@plg},
     6453    author      = {Quasar},
     6454    title       = {Quasar Documentation, Release 0.8.0},
     6455    organization= {Parallel Universe},
     6456    address     = {\href{http://docs.paralleluniverse.co/quasar}{http://\-docs.paralleluniverse.co/\-quasar}},
     6457    year        = 2018,
     6458}
     6459
    60766460% R
    60776461
     
    61876571    number      = 10,
    61886572    pages       = {27-32},
     6573}
     6574
     6575@article{Hesselink06,
     6576    author      = {Wim H. Hesselink},
     6577    title       = {Refinement Verification of the Lazy Caching Algorithm},
     6578    journal     = acta,
     6579    year        = 2006,
     6580    month       = oct,
     6581    volume      = 43,
     6582    number      = 3,
     6583    pages       = {195--222},
    61896584}
    61906585
     
    63256720}
    63266721
     6722@manual{Ruby,
     6723    keywords    = {Ruby},
     6724    contributer = {pabuhr@plg},
     6725    author      = {Ruby},
     6726    title       = {Ruby Documentation, Release 2.6.0},
     6727    organization= {Python Software Foundation},
     6728    address     = {\href{https://www.ruby-lang.org/en/documentation}{https://\-www.ruby-lang.org/\-en/\-documentation}},
     6729    year        = 2018,
     6730}
     6731
    63276732% S
    63286733
     
    70617466}
    70627467
     7468@article{SysVABI,
     7469  keywords = {System V ABI},
     7470  contributer = {a3moss@uwaterloo.ca},
     7471  title={System {V} application binary interface},
     7472  author={Matz, Michael and Hubicka, Jan and Jaeger, Andreas and Mitchell, Mark},
     7473  journal={AMD64 Architecture Processor Supplement, Draft v0},
     7474  volume={99},
     7475  year={2013}
     7476}
     7477
    70637478% T
    70647479
     
    71157530    author      = {{TIOBE Index}},
    71167531    howpublished= {\href{http://www.tiobe.com/tiobe_index}{http://\-www.tiobe.com/\-tiobe\_index}},
    7117     optnote     = {Accessed: 2018-09},
     7532}
     7533
     7534@misc{ThreadModel,
     7535    contributer = {pabuhr@plg},
     7536    key         = {ThreadModel},
     7537    title       = {Thread (computing)},
     7538    author      = {{Threading Model}},
     7539    howpublished= {\href{https://en.wikipedia.org/wiki/Thread_(computing)}{https://\-en.wikipedia.org/\-wiki/\-Thread\_(computing)}},
    71187540}
    71197541
     
    72777699}
    72787700
     7701@techreport{Black90,
     7702  title={Typechecking polymorphism in {Emerald}},
     7703  author={Black, Andrew P and Hutchinson, Norman C},
     7704  year={1990},
     7705  institution={Cambridge Research Laboratory, Digital Equipment Corporation}
     7706}
     7707
    72797708@article{Cormack90,
    72807709    keywords    = {polymorphism},
     
    74477876    year        = 2017,
    74487877    howpublished= {\url{https://wiki.gnome.org/Projects/Vala/Manual}},
    7449     optnote     = {Accessed: 2017-04}
    74507878}
    74517879
     
    76218049% Y
    76228050
     8051@article{Boehm12,
     8052    keywords    = {memory model, race condition},
     8053    contributer = {pabuhr@plg},
     8054    author      = {Boehm, Hans-J. and Adve, Sarita V.},
     8055    title       = {You Don'T Know Jack About Shared Variables or Memory Models},
     8056    journal     = cacm,
     8057    volume      = 55,
     8058    number      = 2,
     8059    month       = feb,
     8060    year        = 2012,
     8061    pages       = {48--54},
     8062    publisher   = {ACM},
     8063    address     = {New York, NY, USA},
     8064}
     8065
    76238066% Z
    76248067
  • doc/papers/concurrency/Paper.tex

    r6a9d4b4 r933f32f  
    215215{}
    216216\lstnewenvironment{Go}[1][]
    217 {\lstset{#1}}
     217{\lstset{language=go,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}
     218{}
     219\lstnewenvironment{python}[1][]
     220{\lstset{language=python,moredelim=**[is][\protect\color{red}]{`}{`},#1}\lstset{#1}}
    218221{}
    219222
     
    228231}
    229232
    230 \title{\texorpdfstring{Concurrency in \protect\CFA}{Concurrency in Cforall}}
     233\title{\texorpdfstring{Advanced Control-flow and Concurrency in \protect\CFA}{Advanced Control-flow in Cforall}}
    231234
    232235\author[1]{Thierry Delisle}
     
    238241\corres{*Peter A. Buhr, Cheriton School of Computer Science, University of Waterloo, 200 University Avenue West, Waterloo, ON, N2L 3G1, Canada. \email{pabuhr{\char`\@}uwaterloo.ca}}
    239242
    240 \fundingInfo{Natural Sciences and Engineering Research Council of Canada}
     243% \fundingInfo{Natural Sciences and Engineering Research Council of Canada}
    241244
    242245\abstract[Summary]{
    243 \CFA is a modern, polymorphic, \emph{non-object-oriented} extension of the C programming language.
    244 This paper discusses the design of the concurrency and parallelism features in \CFA, and its concurrent runtime-system.
    245 These features are created from scratch as ISO C lacks concurrency, relying largely on the pthreads library for concurrency.
    246 Coroutines and lightweight (user) threads are introduced into \CFA;
    247 as well, monitors are added as a high-level mechanism for mutual exclusion and synchronization.
    248 A unique contribution of this work is allowing multiple monitors to be safely acquired \emph{simultaneously}.
    249 All features respect the expectations of C programmers, while being fully integrate with the \CFA polymorphic type-system and other language features.
     246\CFA is a polymorphic, non-object-oriented, concurrent, backwards-compatible extension of the C programming language.
     247This paper discusses the design philosophy and implementation of its advanced control-flow and concurrent/parallel features, along with the supporting runtime.
     248These features are created from scratch as ISO C has only low-level and/or unimplemented concurrency, so C programmers continue to rely on library features like C pthreads.
     249\CFA introduces modern language-level control-flow mechanisms, like coroutines, user-level threading, and monitors for mutual exclusion and synchronization.
     250Library extension for executors, futures, and actors are built on these basic mechanisms.
     251The runtime provides significant programmer simplification and safety by eliminating spurious wakeup and reducing monitor barging.
     252The runtime also ensures multiple monitors can be safely acquired \emph{simultaneously} (deadlock free), and this feature is fully integrated with all monitor synchronization mechanisms.
     253All language features integrate with the \CFA polymorphic type-system and exception handling, while respecting the expectations and style of C programmers.
    250254Experimental results show comparable performance of the new features with similar mechanisms in other concurrent programming-languages.
    251255}%
    252256
    253 \keywords{concurrency, parallelism, coroutines, threads, monitors, runtime, C, Cforall}
     257\keywords{coroutines, concurrency, parallelism, threads, monitors, runtime, C, \CFA (Cforall)}
    254258
    255259
     
    262266\section{Introduction}
    263267
     268This paper discusses the design philosophy and implementation of advanced language-level control-flow and concurrent/parallel features in \CFA~\cite{Moss18} and its runtime.
     269\CFA is a modern, polymorphic, non-object-oriented\footnote{
     270\CFA has features often associated with object-oriented programming languages, such as constructors, destructors, virtuals and simple inheritance.
     271However, functions \emph{cannot} be nested in structures, so there is no lexical binding between a structure and set of functions (member/method) implemented by an implicit \lstinline@this@ (receiver) parameter.},
     272backwards-compatible extension of the C programming language.
     273Within the \CFA framework, new control-flow features are created from scratch.
     274ISO \Celeven defines only a subset of the \CFA extensions, where the overlapping features are concurrency~\cite[\S~7.26]{C11}.
     275However, \Celeven concurrency is largely wrappers for a subset of the pthreads library~\cite{Butenhof97,Pthreads}.
     276Furthermore, \Celeven and pthreads concurrency is simple, based on thread fork/join in a function and a few locks, which is low-level and error prone;
     277no high-level language concurrency features are defined.
     278Interestingly, almost a decade after publication of the \Celeven standard, neither gcc-8, clang-8 nor msvc-19 (most recent versions) support the \Celeven include @threads.h@, indicating little interest in the C11 concurrency approach.
     279Finally, while the \Celeven standard does not state a threading model, the historical association with pthreads suggests implementations would adopt kernel-level threading (1:1)~\cite{ThreadModel}.
     280
     281In contrast, there has been a renewed interest during the past decade in user-level (M:N, green) threading in old and new programming languages.
     282As multi-core hardware became available in the 1980/90s, both user and kernel threading were examined.
     283Kernel threading was chosen, largely because of its simplicity and fit with the simpler operating systems and hardware architectures at the time, which gave it a performance advantage~\cite{Drepper03}.
     284Libraries like pthreads were developed for C, and the Solaris operating-system switched from user (JDK 1.1~\cite{JDK1.1}) to kernel threads.
     285As a result, languages like Java, Scala~\cite{Scala}, Objective-C~\cite{obj-c-book}, \CCeleven~\cite{C11}, and C\#~\cite{Csharp} adopt the 1:1 kernel-threading model, with a variety of presentation mechanisms.
     286From 2000 onwards, languages like Go~\cite{Go}, Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, D~\cite{D}, and \uC~\cite{uC++,uC++book} have championed the M:N user-threading model, and many user-threading libraries have appeared~\cite{Qthreads,MPC,BoostThreads}, including putting green threads back into Java~\cite{Quasar}.
     287The main argument for user-level threading is that they are lighter weight than kernel threads (locking and context switching do not cross the kernel boundary), so there is less restriction on programming styles that encourage large numbers of threads performing smaller work-units to facilitate load balancing by the runtime~\cite{Verch12}.
     288As well, user-threading facilitates a simpler concurrency approach using thread objects that leverage sequential patterns versus events with call-backs~\cite{vonBehren03}.
     289Finally, performant user-threading implementations (both time and space) are largely competitive with direct kernel-threading implementations, while achieving the programming advantages of high concurrency levels and safety.
     290
     291A further effort over the past two decades is the development of language memory-models to deal with the conflict between language features and compiler/hardware optimizations, i.e., some language features are unsafe in the presence of aggressive sequential optimizations~\cite{Buhr95a,Boehm05}.
     292The consequence is that a language must provide sufficient tools to program around safety issues, as inline and library code is all sequential to the compiler.
     293One solution is low-level qualifiers and functions (e.g., @volatile@ and atomics) allowing \emph{programmers} to explicitly write safe (race-free~\cite{Boehm12}) programs.
     294A safer solution is high-level language constructs so the \emph{compiler} knows the optimization boundaries, and hence, provides implicit safety.
     295This problem is best know with respect to concurrency, but applies to other complex control-flow, like exceptions\footnote{
     296\CFA exception handling will be presented in a separate paper.
     297The key feature that dovetails with this paper is non-local exceptions allowing exceptions to be raised across stacks, with synchronous exceptions raised among coroutines and asynchronous exceptions raised among threads, similar to that in \uC~\cite[\S~5]{uC++}
     298} and coroutines.
     299Finally, solutions in the language allows matching constructs with language paradigm, i.e., imperative and functional languages have different presentations of the same concept.
     300
     301Finally, it is important for a language to provide safety over performance \emph{as the default}, allowing careful reduction of safety for performance when necessary.
     302Two concurrency violations of this philosophy are \emph{spurious wakeup} and \emph{barging}, i.e., random wakeup~\cite[\S~8]{Buhr05a} and signalling-as-hints~\cite[\S~8]{Buhr05a}, where one begats the other.
     303If you believe spurious wakeup is a foundational concurrency property, than unblocking (signalling) a thread is always a hint.
     304If you \emph{do not} believe spurious wakeup is foundational, than signalling-as-hints is a performance decision.
     305Most importantly, removing spurious wakeup and signals-as-hints makes concurrent programming significantly safer because it removes local non-determinism.
     306Clawing back performance where the local non-determinism is unimportant, should be an option not the default.
     307
     308\begin{comment}
     309For example, it is possible to provide exceptions, coroutines, monitors, and tasks as specialized types in an object-oriented language, integrating these constructs to allow leveraging the type-system (static type-checking) and all other object-oriented capabilities~\cite{uC++}.
     310It is also possible to leverage call/return for blocking communication via new control structures, versus switching to alternative communication paradigms, like channels or message passing.
     311As well, user threading is often a complementary feature, allowing light-weight threading to match with low-cost objects, while hiding the application/kernel boundary.
     312User threading also allows layering of implicit concurrency models (no explicit thread creation), such executors, data-flow, actors, into a single language, so programmers can chose the model that best fits an algorithm.\footnote{
     313All implicit concurrency models have explicit threading in their implementation, and hence, can be build from explicit threading;
     314however, the reverse is seldom true, i.e., given implicit concurrency, e.g., actors, it is virtually impossible to create explicit concurrency, e.g., blocking thread objects.}
     315Finally, with extended language features and user-level threading it is possible to discretely fold locking and non-blocking I/O multiplexing into the language's I/O libraries, so threading implicitly dovetails with the I/O subsystem.
     316\CFA embraces language extensions and user-level threading to provide advanced control-flow (exception handling\footnote{
     317\CFA exception handling will be presented in a separate paper.
     318The key feature that dovetails with this paper is non-local exceptions allowing exceptions to be raised across stacks, with synchronous exceptions raised among coroutines and asynchronous exceptions raised among threads, similar to that in \uC~\cite[\S~5]{uC++}
     319} and coroutines) and concurrency.
     320
     321Most augmented traditional (Fortran 18~\cite{Fortran18}, Cobol 14~\cite{Cobol14}, Ada 12~\cite{Ada12}, Java 11~\cite{Java11}) and new languages (Go~\cite{Go}, Rust~\cite{Rust}, and D~\cite{D}), except \CC, diverge from C with different syntax and semantics, only interoperate indirectly with C, and are not systems languages, for those with managed memory.
     322As a result, there is a significant learning curve to move to these languages, and C legacy-code must be rewritten.
     323While \CC, like \CFA, takes an evolutionary approach to extend C, \CC's constantly growing complex and interdependent features-set (e.g., objects, inheritance, templates, etc.) mean idiomatic \CC code is difficult to use from C, and C programmers must expend significant effort learning \CC.
     324Hence, rewriting and retraining costs for these languages, even \CC, are prohibitive for companies with a large C software-base.
     325\CFA with its orthogonal feature-set, its high-performance runtime, and direct access to all existing C libraries circumvents these problems.
     326\end{comment}
     327
     328\CFA embraces user-level threading, language extensions for advanced control-flow, and safety as the default.
     329We present comparative examples so the reader can judge if the \CFA control-flow extensions are better and safer than those in or proposed for \Celeven, \CC and other concurrent, imperative programming languages, and perform experiments to show the \CFA runtime is competitive with other similar mechanisms.
     330The main contributions of this work are:
     331\begin{itemize}
     332\item
     333expressive language-level coroutines and user-level threading, which respect the expectations of C programmers.
     334\item
     335monitor synchronization without barging.
     336\item
     337safely acquiring multiple monitors \emph{simultaneously} (deadlock free), while seamlessly integrating this capability with all monitor synchronization mechanisms.
     338\item
     339providing statically type-safe interfaces that integrate with the \CFA polymorphic type-system and other language features.
     340\item
     341library extensions for executors, futures, and actors built on the basic mechanisms.
     342\item
     343a runtime system with no spurious wakeup.
     344\item
     345experimental results showing comparable performance of the new features with similar mechanisms in other concurrent programming-languages.
     346\end{itemize}
     347
     348\begin{comment}
    264349This paper provides a minimal concurrency \newterm{Application Program Interface} (API) that is simple, efficient and can be used to build other concurrency features.
    265350While the simplest concurrency system is a thread and a lock, this low-level approach is hard to master.
     
    281366The proposed concurrency API is implemented in a dialect of C, called \CFA (pronounced C-for-all).
    282367The paper discusses how the language features are added to the \CFA translator with respect to parsing, semantics, and type checking, and the corresponding high-performance runtime-library to implement the concurrent features.
    283 
    284 
     368\end{comment}
     369
     370
     371\begin{comment}
    285372\section{\CFA Overview}
    286373
     
    551638\end{cfa}
    552639where the return type supplies the type/size of the allocation, which is impossible in most type systems.
    553 
    554 
    555 \section{Concurrency}
    556 \label{s:Concurrency}
    557 
    558 At its core, concurrency is based on multiple call-stacks and scheduling threads executing on these stacks.
    559 Multiple call stacks (or contexts) and a single thread of execution, called \newterm{coroutining}~\cite{Conway63,Marlin80}, does \emph{not} imply concurrency~\cite[\S~2]{Buhr05a}.
    560 In coroutining, the single thread is self-scheduling across the stacks, so execution is deterministic, \ie the execution path from input to output is fixed and predictable.
    561 A \newterm{stackless} coroutine executes on the caller's stack~\cite{Python} but this approach is restrictive, \eg preventing modularization and supporting only iterator/generator-style programming;
    562 a \newterm{stackful} coroutine executes on its own stack, allowing full generality.
    563 Only stackful coroutines are a stepping stone to concurrency.
    564 
    565 The transition to concurrency, even for execution with a single thread and multiple stacks, occurs when coroutines also context switch to a \newterm{scheduling oracle}, introducing non-determinism from the coroutine perspective~\cite[\S~3]{Buhr05a}.
    566 Therefore, a minimal concurrency system is possible using coroutines (see Section \ref{coroutine}) in conjunction with a scheduler to decide where to context switch next.
    567 The resulting execution system now follows a cooperative threading-model, called \newterm{non-preemptive scheduling}.
    568 
    569 Because the scheduler is special, it can either be a stackless or stackful coroutine.
    570 For stackless, the scheduler performs scheduling on the stack of the current coroutine and switches directly to the next coroutine, so there is one context switch.
    571 For stackful, the current coroutine switches to the scheduler, which performs scheduling, and it then switches to the next coroutine, so there are two context switches.
    572 A stackful scheduler is often used for simplicity and security.
    573 
    574 Regardless of the approach used, a subset of concurrency related challenges start to appear.
    575 For the complete set of concurrency challenges to occur, the missing feature is \newterm{preemption}, where context switching occurs randomly between any two instructions, often based on a timer interrupt, called \newterm{preemptive scheduling}.
    576 While a scheduler introduces uncertainty in the order of execution, preemption introduces uncertainty about where context switches occur.
    577 Interestingly, uncertainty is necessary for the runtime (operating) system to give the illusion of parallelism on a single processor and increase performance on multiple processors.
    578 The reason is that only the runtime has complete knowledge about resources and how to best utilized them.
    579 However, the introduction of unrestricted non-determinism results in the need for \newterm{mutual exclusion} and \newterm{synchronization} to restrict non-determinism for correctness;
    580 otherwise, it is impossible to write meaningful programs.
    581 Optimal performance in concurrent applications is often obtained by having as much non-determinism as correctness allows.
    582 
    583 An important missing feature in C is threading\footnote{While the C11 standard defines a \protect\lstinline@threads.h@ header, it is minimal and defined as optional.
    584 As such, library support for threading is far from widespread.
    585 At the time of writing the paper, neither \protect\lstinline@gcc@ nor \protect\lstinline@clang@ support \protect\lstinline@threads.h@ in their standard libraries.}.
    586 In modern programming languages, a lack of threading is unacceptable~\cite{Sutter05, Sutter05b}, and therefore existing and new programming languages must have tools for writing efficient concurrent programs to take advantage of parallelism.
    587 As an extension of C, \CFA needs to express these concepts in a way that is as natural as possible to programmers familiar with imperative languages.
    588 Furthermore, because C is a system-level language, programmers expect to choose precisely which features they need and which cost they are willing to pay.
    589 Hence, concurrent programs should be written using high-level mechanisms, and only step down to lower-level mechanisms when performance bottlenecks are encountered.
    590 
    591 
    592 \subsection{Coroutines: A Stepping Stone}\label{coroutine}
    593 
    594 While the focus of this discussion is concurrency and parallelism, it is important to address coroutines, which are a significant building block of a concurrency system (but not concurrent among themselves).
     640\end{comment}
     641
     642
     643\section{Coroutines: Stepping Stone}
     644\label{coroutine}
     645
    595646Coroutines are generalized routines allowing execution to be temporarily suspended and later resumed.
    596647Hence, unlike a normal routine, a coroutine may not terminate when it returns to its caller, allowing it to be restarted with the values and execution location present at the point of suspension.
     
    616667\centering
    617668\newbox\myboxA
     669% \begin{lrbox}{\myboxA}
     670% \begin{cfa}[aboveskip=0pt,belowskip=0pt]
     671% `int fn1, fn2, state = 1;`   // single global variables
     672% int fib() {
     673%       int fn;
     674%       `switch ( state )` {  // explicit execution state
     675%         case 1: fn = 0;  fn1 = fn;  state = 2;  break;
     676%         case 2: fn = 1;  fn2 = fn1;  fn1 = fn;  state = 3;  break;
     677%         case 3: fn = fn1 + fn2;  fn2 = fn1;  fn1 = fn;  break;
     678%       }
     679%       return fn;
     680% }
     681% int main() {
     682%
     683%       for ( int i = 0; i < 10; i += 1 ) {
     684%               printf( "%d\n", fib() );
     685%       }
     686% }
     687% \end{cfa}
     688% \end{lrbox}
    618689\begin{lrbox}{\myboxA}
    619690\begin{cfa}[aboveskip=0pt,belowskip=0pt]
    620 `int f1, f2, state = 1;`   // single global variables
    621 int fib() {
    622         int fn;
    623         `switch ( state )` {  // explicit execution state
    624           case 1: fn = 0;  f1 = fn;  state = 2;  break;
    625           case 2: fn = 1;  f2 = f1;  f1 = fn;  state = 3;  break;
    626           case 3: fn = f1 + f2;  f2 = f1;  f1 = fn;  break;
    627         }
    628         return fn;
    629 }
     691#define FIB_INIT { 0, 1 }
     692typedef struct { int fn1, fn; } Fib;
     693int fib( Fib * f ) {
     694
     695        int ret = f->fn1;
     696        f->fn1 = f->fn;
     697        f->fn = ret + f->fn;
     698        return ret;
     699}
     700
     701
     702
    630703int main() {
    631 
     704        Fib f1 = FIB_INIT, f2 = FIB_INIT;
    632705        for ( int i = 0; i < 10; i += 1 ) {
    633                 printf( "%d\n", fib() );
     706                printf( "%d %d\n",
     707                                fib( &f1 ), fib( &f2 ) );
    634708        }
    635709}
     
    640714\begin{lrbox}{\myboxB}
    641715\begin{cfa}[aboveskip=0pt,belowskip=0pt]
    642 #define FIB_INIT `{ 0, 1 }`
    643 typedef struct { int f2, f1; } Fib;
    644 int fib( Fib * f ) {
    645 
    646         int ret = f->f2;
    647         int fn = f->f1 + f->f2;
    648         f->f2 = f->f1; f->f1 = fn;
    649 
    650         return ret;
    651 }
    652 int main() {
    653         Fib f1 = FIB_INIT, f2 = FIB_INIT;
    654         for ( int i = 0; i < 10; i += 1 ) {
    655                 printf( "%d %d\n", fib( &f1 ), fib( &f2 ) );
     716`coroutine` Fib { int fn1; };
     717void main( Fib & fib ) with( fib ) {
     718        int fn;
     719        [fn1, fn] = [0, 1];
     720        for () {
     721                `suspend();`
     722                [fn1, fn] = [fn, fn1 + fn];
    656723        }
    657724}
    658 \end{cfa}
    659 \end{lrbox}
    660 
    661 \subfloat[3 States: global variables]{\label{f:GlobalVariables}\usebox\myboxA}
    662 \qquad
    663 \subfloat[1 State: external variables]{\label{f:ExternalState}\usebox\myboxB}
    664 \caption{C Fibonacci Implementations}
    665 \label{f:C-fibonacci}
    666 
    667 \bigskip
    668 
    669 \newbox\myboxA
    670 \begin{lrbox}{\myboxA}
    671 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
    672 `coroutine` Fib { int fn; };
    673 void main( Fib & fib ) with( fib ) {
    674         int f1, f2;
    675         fn = 0;  f1 = fn;  `suspend()`;
    676         fn = 1;  f2 = f1;  f1 = fn;  `suspend()`;
    677         for ( ;; ) {
    678                 fn = f1 + f2;  f2 = f1;  f1 = fn;  `suspend()`;
    679         }
    680 }
    681 int next( Fib & fib ) with( fib ) {
    682         `resume( fib );`
    683         return fn;
     725int ?()( Fib & fib ) with( fib ) {
     726        `resume( fib );`  return fn1;
    684727}
    685728int main() {
    686729        Fib f1, f2;
    687         for ( int i = 1; i <= 10; i += 1 ) {
    688                 sout | next( f1 ) | next( f2 );
    689         }
    690 }
     730        for ( 10 ) {
     731                sout | f1() | f2();
     732}
     733
     734
    691735\end{cfa}
    692736\end{lrbox}
    693 \newbox\myboxB
    694 \begin{lrbox}{\myboxB}
    695 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
    696 `coroutine` Fib { int ret; };
    697 void main( Fib & f ) with( fib ) {
    698         int fn, f1 = 1, f2 = 0;
    699         for ( ;; ) {
    700                 ret = f2;
    701 
    702                 fn = f1 + f2;  f2 = f1;  f1 = fn; `suspend();`
    703         }
    704 }
    705 int next( Fib & fib ) with( fib ) {
    706         `resume( fib );`
    707         return ret;
    708 }
    709 
    710 
    711 
    712 
    713 
    714 
    715 \end{cfa}
     737
     738\newbox\myboxC
     739\begin{lrbox}{\myboxC}
     740\begin{python}[aboveskip=0pt,belowskip=0pt]
     741
     742def Fib():
     743
     744    fn1, fn = 0, 1
     745    while True:
     746        `yield fn1`
     747        fn1, fn = fn, fn1 + fn
     748
     749
     750// next prewritten
     751
     752
     753f1 = Fib()
     754f2 = Fib()
     755for i in range( 10 ):
     756        print( next( f1 ), next( f2 ) )
     757
     758
     759
     760\end{python}
    716761\end{lrbox}
    717 \subfloat[3 States, internal variables]{\label{f:Coroutine3States}\usebox\myboxA}
    718 \qquad\qquad
    719 \subfloat[1 State, internal variables]{\label{f:Coroutine1State}\usebox\myboxB}
    720 \caption{\CFA Coroutine Fibonacci Implementations}
    721 \label{f:cfa-fibonacci}
     762
     763\subfloat[C]{\label{f:GlobalVariables}\usebox\myboxA}
     764\hspace{3pt}
     765\vrule
     766\hspace{3pt}
     767\subfloat[\CFA]{\label{f:ExternalState}\usebox\myboxB}
     768\hspace{3pt}
     769\vrule
     770\hspace{3pt}
     771\subfloat[Python]{\label{f:ExternalState}\usebox\myboxC}
     772\caption{Fibonacci Generator}
     773\label{f:C-fibonacci}
     774
     775% \bigskip
     776%
     777% \newbox\myboxA
     778% \begin{lrbox}{\myboxA}
     779% \begin{cfa}[aboveskip=0pt,belowskip=0pt]
     780% `coroutine` Fib { int fn; };
     781% void main( Fib & fib ) with( fib ) {
     782%       fn = 0;  int fn1 = fn; `suspend()`;
     783%       fn = 1;  int fn2 = fn1;  fn1 = fn; `suspend()`;
     784%       for () {
     785%               fn = fn1 + fn2; fn2 = fn1; fn1 = fn; `suspend()`; }
     786% }
     787% int next( Fib & fib ) with( fib ) { `resume( fib );` return fn; }
     788% int main() {
     789%       Fib f1, f2;
     790%       for ( 10 )
     791%               sout | next( f1 ) | next( f2 );
     792% }
     793% \end{cfa}
     794% \end{lrbox}
     795% \newbox\myboxB
     796% \begin{lrbox}{\myboxB}
     797% \begin{python}[aboveskip=0pt,belowskip=0pt]
     798%
     799% def Fibonacci():
     800%       fn = 0; fn1 = fn; `yield fn`  # suspend
     801%       fn = 1; fn2 = fn1; fn1 = fn; `yield fn`
     802%       while True:
     803%               fn = fn1 + fn2; fn2 = fn1; fn1 = fn; `yield fn`
     804%
     805%
     806% f1 = Fibonacci()
     807% f2 = Fibonacci()
     808% for i in range( 10 ):
     809%       print( `next( f1 )`, `next( f2 )` ) # resume
     810%
     811% \end{python}
     812% \end{lrbox}
     813% \subfloat[\CFA]{\label{f:Coroutine3States}\usebox\myboxA}
     814% \qquad
     815% \subfloat[Python]{\label{f:Coroutine1State}\usebox\myboxB}
     816% \caption{Fibonacci input coroutine, 3 states, internal variables}
     817% \label{f:cfa-fibonacci}
    722818\end{figure}
    723819
     
    759855\begin{lrbox}{\myboxA}
    760856\begin{cfa}[aboveskip=0pt,belowskip=0pt]
    761 `coroutine` Format {
    762         char ch;   // used for communication
    763         int g, b;  // global because used in destructor
     857`coroutine` Fmt {
     858        char ch;   // communication variables
     859        int g, b;   // needed in destructor
    764860};
    765 void main( Format & fmt ) with( fmt ) {
    766         for ( ;; ) {
    767                 for ( g = 0; g < 5; g += 1 ) {      // group
    768                         for ( b = 0; b < 4; b += 1 ) { // block
     861void main( Fmt & fmt ) with( fmt ) {
     862        for () {
     863                for ( g = 0; g < 5; g += 1 ) { // groups
     864                        for ( b = 0; b < 4; b += 1 ) { // blocks
    769865                                `suspend();`
    770                                 sout | ch;              // separator
    771                         }
    772                         sout | "  ";               // separator
    773                 }
    774                 sout | nl;
    775         }
    776 }
    777 void ?{}( Format & fmt ) { `resume( fmt );` }
    778 void ^?{}( Format & fmt ) with( fmt ) {
    779         if ( g != 0 || b != 0 ) sout | nl;
    780 }
    781 void format( Format & fmt ) {
    782         `resume( fmt );`
    783 }
     866                                sout | ch; } // print character
     867                        sout | "  "; } // block separator
     868                sout | nl; }  // group separator
     869}
     870void ?{}( Fmt & fmt ) { `resume( fmt );` } // prime
     871void ^?{}( Fmt & fmt ) with( fmt ) { // destructor
     872        if ( g != 0 || b != 0 ) // special case
     873                sout | nl; }
     874void send( Fmt & fmt, char c ) { fmt.ch = c; `resume( fmt )`; }
    784875int main() {
    785         Format fmt;
    786         eof: for ( ;; ) {
    787                 sin | fmt.ch;
    788           if ( eof( sin ) ) break eof;
    789                 format( fmt );
    790         }
     876        Fmt fmt;
     877        sout | nlOff;   // turn off auto newline
     878        for ( 41 )
     879                send( fmt, 'a' );
    791880}
    792881\end{cfa}
     
    795884\newbox\myboxB
    796885\begin{lrbox}{\myboxB}
    797 \begin{cfa}[aboveskip=0pt,belowskip=0pt]
    798 struct Format {
    799         char ch;
    800         int g, b;
    801 };
    802 void format( struct Format * fmt ) {
    803         if ( fmt->ch != -1 ) {      // not EOF ?
    804                 printf( "%c", fmt->ch );
    805                 fmt->b += 1;
    806                 if ( fmt->b == 4 ) {  // block
    807                         printf( "  " );      // separator
    808                         fmt->b = 0;
    809                         fmt->g += 1;
    810                 }
    811                 if ( fmt->g == 5 ) {  // group
    812                         printf( "\n" );     // separator
    813                         fmt->g = 0;
    814                 }
    815         } else {
    816                 if ( fmt->g != 0 || fmt->b != 0 ) printf( "\n" );
    817         }
    818 }
    819 int main() {
    820         struct Format fmt = { 0, 0, 0 };
    821         for ( ;; ) {
    822                 scanf( "%c", &fmt.ch );
    823           if ( feof( stdin ) ) break;
    824                 format( &fmt );
    825         }
    826         fmt.ch = -1;
    827         format( &fmt );
    828 }
    829 \end{cfa}
     886\begin{python}[aboveskip=0pt,belowskip=0pt]
     887
     888
     889
     890def Fmt():
     891        try:
     892                while True:
     893                        for g in range( 5 ):
     894                                for b in range( 4 ):
     895
     896                                        print( `(yield)`, end='' )
     897                                print( '  ', end='' )
     898                        print()
     899
     900
     901        except GeneratorExit:
     902                if g != 0 | b != 0:
     903                        print()
     904
     905
     906fmt = Fmt()
     907`next( fmt )`                    # prime
     908for i in range( 41 ):
     909        `fmt.send( 'a' );`      # send to yield
     910
     911\end{python}
    830912\end{lrbox}
    831 \subfloat[\CFA Coroutine]{\label{f:CFAFmt}\usebox\myboxA}
     913\subfloat[\CFA]{\label{f:CFAFmt}\usebox\myboxA}
    832914\qquad
    833 \subfloat[C Linearized]{\label{f:CFmt}\usebox\myboxB}
    834 \caption{Formatting text into lines of 5 blocks of 4 characters.}
     915\subfloat[Python]{\label{f:CFmt}\usebox\myboxB}
     916\caption{Output formatting text}
    835917\label{f:fmt-line}
    836918\end{figure}
     
    853935void main( Prod & prod ) with( prod ) {
    854936        // 1st resume starts here
    855         for ( int i = 0; i < N; i += 1 ) {
     937        for ( i; N ) {
    856938                int p1 = random( 100 ), p2 = random( 100 );
    857939                sout | p1 | " " | p2;
     
    869951}
    870952void start( Prod & prod, int N, Cons &c ) {
    871         &prod.c = &c;
     953        &prod.c = &c; // reassignable reference
    872954        prod.[N, receipt] = [N, 0];
    873955        `resume( prod );`
     
    884966        Prod & p;
    885967        int p1, p2, status;
    886         _Bool done;
     968        bool done;
    887969};
    888970void ?{}( Cons & cons, Prod & p ) {
    889         &cons.p = &p;
     971        &cons.p = &p; // reassignable reference
    890972        cons.[status, done ] = [0, false];
    891973}
     
    9451027@start@ returns and the program main terminates.
    9461028
     1029One \emph{killer} application for a coroutine is device drivers, which at one time caused 70\%-85\% of failures in Windows/Linux~\cite{Swift05}.
     1030Many device drivers are a finite state-machine parsing a protocol, e.g.:
     1031\begin{tabbing}
     1032\ldots STX \= \ldots message \ldots \= ESC \= ETX \= \ldots message \ldots  \= ETX \= 2-byte crc \= \ldots      \kill
     1033\ldots STX \> \ldots message \ldots \> ESC \> ETX \> \ldots message \ldots  \> ETX \> 2-byte crc \> \ldots
     1034\end{tabbing}
     1035where a network message begins with the control character STX and ends with an ETX, followed by a 2-byte cyclic-redundancy check.
     1036Control characters may appear in a message if preceded by an ESC.
     1037Because FSMs can be complex and occur frequently in important domains, direct support of the coroutine is crucial in a systems programminglanguage.
     1038
     1039\begin{figure}
     1040\begin{cfa}
     1041enum Status { CONT, MSG, ESTX, ELNTH, ECRC };
     1042`coroutine` Driver {
     1043        Status status;
     1044        char * msg, byte;
     1045};
     1046void ?{}( Driver & d, char * m ) { d.msg = m; }         $\C[3.0in]{// constructor}$
     1047Status next( Driver & d, char b ) with( d ) {           $\C{// 'with' opens scope}$
     1048        byte = b; `resume( d );` return status;
     1049}
     1050void main( Driver & d ) with( d ) {
     1051        enum { STX = '\002', ESC = '\033', ETX = '\003', MaxMsg = 64 };
     1052        unsigned short int crc;                                                 $\C{// error checking}$
     1053  msg: for () {                                                                         $\C{// parse message}$
     1054                status = CONT;
     1055                unsigned int lnth = 0, sum = 0;
     1056                while ( byte != STX ) `suspend();`
     1057          emsg: for () {
     1058                        `suspend();`                                                    $\C{// process byte}$
     1059                        choose ( byte ) {                                               $\C{// switch with default break}$
     1060                          case STX:
     1061                                status = ESTX; `suspend();` continue msg;
     1062                          case ETX:
     1063                                break emsg;
     1064                          case ESC:
     1065                                suspend();
     1066                        } // choose
     1067                        if ( lnth >= MaxMsg ) {                                 $\C{// buffer full ?}$
     1068                                status = ELNTH; `suspend();` continue msg; }
     1069                        msg[lnth++] = byte;
     1070                        sum += byte;
     1071                } // for
     1072                msg[lnth] = '\0';                                                       $\C{// terminate string}\CRT$
     1073                `suspend();`
     1074                crc = (unsigned char)byte << 8; // prevent sign extension for signed char
     1075                `suspend();`
     1076                status = (crc | (unsigned char)byte) == sum ? MSG : ECRC;
     1077                `suspend();`
     1078        } // for
     1079}
     1080\end{cfa}
     1081\caption{Device driver for simple communication protocol}
     1082\end{figure}
     1083
    9471084
    9481085\subsection{Coroutine Implementation}
     
    10601197\end{cquote}
    10611198The combination of these two approaches allows an easy and concise specification to coroutining (and concurrency) for normal users, while more advanced users have tighter control on memory layout and initialization.
     1199
     1200
     1201\section{Concurrency}
     1202\label{s:Concurrency}
     1203
     1204At its core, concurrency is based on multiple call-stacks and scheduling threads executing on these stacks.
     1205Multiple call stacks (or contexts) and a single thread of execution, called \newterm{coroutining}~\cite{Conway63,Marlin80}, does \emph{not} imply concurrency~\cite[\S~2]{Buhr05a}.
     1206In coroutining, the single thread is self-scheduling across the stacks, so execution is deterministic, \ie the execution path from input to output is fixed and predictable.
     1207A \newterm{stackless} coroutine executes on the caller's stack~\cite{Python} but this approach is restrictive, \eg preventing modularization and supporting only iterator/generator-style programming;
     1208a \newterm{stackful} coroutine executes on its own stack, allowing full generality.
     1209Only stackful coroutines are a stepping stone to concurrency.
     1210
     1211The transition to concurrency, even for execution with a single thread and multiple stacks, occurs when coroutines also context switch to a \newterm{scheduling oracle}, introducing non-determinism from the coroutine perspective~\cite[\S~3]{Buhr05a}.
     1212Therefore, a minimal concurrency system is possible using coroutines (see Section \ref{coroutine}) in conjunction with a scheduler to decide where to context switch next.
     1213The resulting execution system now follows a cooperative threading-model, called \newterm{non-preemptive scheduling}.
     1214
     1215Because the scheduler is special, it can either be a stackless or stackful coroutine.
     1216For stackless, the scheduler performs scheduling on the stack of the current coroutine and switches directly to the next coroutine, so there is one context switch.
     1217For stackful, the current coroutine switches to the scheduler, which performs scheduling, and it then switches to the next coroutine, so there are two context switches.
     1218A stackful scheduler is often used for simplicity and security.
     1219
     1220Regardless of the approach used, a subset of concurrency related challenges start to appear.
     1221For the complete set of concurrency challenges to occur, the missing feature is \newterm{preemption}, where context switching occurs randomly between any two instructions, often based on a timer interrupt, called \newterm{preemptive scheduling}.
     1222While a scheduler introduces uncertainty in the order of execution, preemption introduces uncertainty about where context switches occur.
     1223Interestingly, uncertainty is necessary for the runtime (operating) system to give the illusion of parallelism on a single processor and increase performance on multiple processors.
     1224The reason is that only the runtime has complete knowledge about resources and how to best utilized them.
     1225However, the introduction of unrestricted non-determinism results in the need for \newterm{mutual exclusion} and \newterm{synchronization} to restrict non-determinism for correctness;
     1226otherwise, it is impossible to write meaningful programs.
     1227Optimal performance in concurrent applications is often obtained by having as much non-determinism as correctness allows.
     1228
     1229An important missing feature in C is threading\footnote{While the C11 standard defines a \protect\lstinline@threads.h@ header, it is minimal and defined as optional.
     1230As such, library support for threading is far from widespread.
     1231At the time of writing the paper, neither \protect\lstinline@gcc@ nor \protect\lstinline@clang@ support \protect\lstinline@threads.h@ in their standard libraries.}.
     1232In modern programming languages, a lack of threading is unacceptable~\cite{Sutter05, Sutter05b}, and therefore existing and new programming languages must have tools for writing efficient concurrent programs to take advantage of parallelism.
     1233As an extension of C, \CFA needs to express these concepts in a way that is as natural as possible to programmers familiar with imperative languages.
     1234Furthermore, because C is a system-level language, programmers expect to choose precisely which features they need and which cost they are willing to pay.
     1235Hence, concurrent programs should be written using high-level mechanisms, and only step down to lower-level mechanisms when performance bottlenecks are encountered.
    10621236
    10631237
  • doc/papers/concurrency/mail

    r6a9d4b4 r933f32f  
    2727
    2828Software: Practice and Experience Editorial Office
     29
     30
     31
     32Date: Wed, 3 Oct 2018 21:25:28 +0000
     33From: Richard Jones <onbehalfof@manuscriptcentral.com>
     34Reply-To: R.E.Jones@kent.ac.uk
     35To: tdelisle@uwaterloo.ca, pabuhr@uwaterloo.ca
     36Subject: Software: Practice and Experience - Decision on Manuscript ID
     37 SPE-18-0205
     38
     3903-Oct-2018
     40
     41Dear Dr Buhr,
     42
     43Many thanks for submitting SPE-18-0205 entitled "Concurrency in C∀" to Software: Practice and Experience.
     44
     45In view of the comments of the referees found at the bottom of this letter, I cannot accept your paper for publication in Software: Practice and Experience. I hope that you find the referees' very detailed comments helpful.
     46
     47Thank you for considering Software: Practice and Experience for the publication of your research.  I hope the outcome of this specific submission will not discourage you from submitting future manuscripts.
     48
     49Yours sincerely,
     50
     51
     52Prof. Richard Jones
     53Editor, Software: Practice and Experience
     54R.E.Jones@kent.ac.uk
     55
     56Referee(s)' Comments to Author:
     57
     58Reviewing: 1
     59
     60Comments to the Author
     61"Concurrency in Cforall" presents a design and implementation of a set of standard concurrency features, including coroutines, user-space and kernel-space threads, mutexes, monitors, and a scheduler, for a polymorphic derivation of C called Cforall.
     62
     63Section 2 is an overview of sequential Cforall that does not materially contribute to the paper. A brief syntax explanation where necessary in examples would be plenty.
     64
     65Section 3 begins with with an extensive discussion of concurrency that also does not materially contribute to the paper. A brief mention of whether a particular approach implements cooperative or preemptive scheduling would be sufficient. Section 3 also makes some unfortunate claims, such as C not having threads -- C does in fact define threads, and this is noted as being true in a footnote, immediately after claiming that it does not. The question remains why the C11 parallelism design is insufficient and in what way this paper proposes to augment it. While I am personally a proponent of parallel programming languages, backing the assertion that all modern languages must have threading with citations from 2005 ignores the massive popularity of modern non-parallel languages (Javascript, node.js, Typescript, Python, Ruby, etc.) and parallel languages that are not thread based, although the authors are clearly aware of such approaches.
     66
     67Sections 3.1 and 3.2 dicusses assymetric and symmetric coroutines. This also does not seem to materially contribute to a paper that is ostensibly about concurrency in a modern systems programming language. The area of coroutines, continuations, and generators is already well explored in the context of systems languages, including compilation techniques for these constructs that are more advanced than the stack instantiation model discussed in the paper.
     68
     69Section 3.3 describes threads in Cforall, briefly touching on user-space vs. kernel-space thread implementations without detailing the extensive practical differences. It is unclear how the described interface differes from C++11 threads, as the description seems to center on an RAII style approach to joining in the destructor.
     70
     71Section 4 briefly touches on a collection of well known synchronisation primitives. Again, this discussion does not materially contribute to the paper.
     72
     73Section 5 describes monitors, which are a well known and well researched technique. The Cforall implementation is unsurprising. The "multi-acquire semantics" described are not a contribution of this paper, as establishing a stable order for lock acquisition is a well known technique, one example of which is the C++ std::scoped_lock.
     74
     75Section 6 is a discussion of scheduling that does not appear to be informed by the literature. There is no discussion of work-stealing vs. work-scheduling, static vs. dynamic priorities, priority inversion, or fairness. There is a claim in secion 6.1 for a novel technique, partial signalling, that appears to be a form of dynamic priority, but no comparison is made. In section 6.6, a very brief mention of other synchronisation techniques is made, without reference to current techniques such as array-based locks, CLH or MCS queue locks, RCU and other epoch-based mechanisms, etc. Perhaps these are considered out of scope.
     76
     77Section 7 discusses parallelism, but does not materially contribute to the paper. It is claimed that preemption is necessary to implement spinning, which is not correct, since two cores can implement a spinning based approach without preemption. It is claimed that with thread pools "concurrency errors return", but no approach to removing concurrency errors with either preemptive or cooperatively scheduled user threads has been proposed in the paper that would not also apply to thread pools.
     78
     79Section 8 is intended to describe the Cforall runtime structure, but does so in a way that uses terminology in an unfamiliar way. The word cluster is more usually used in distributed systems, but here refers to a process. The term virtual processor is more usually used in hardware virtualisation, but here refers to a kernel thread. The term debug kernel is more usually used in operating systems to refer to kernels that have both debug info and a method for using a debugger in kernel space, but here refers to a debug build of a user-space process. This section does not materially contribute to the paper.
     80
     81Section 9 is intended to describe the Cforall runtime implementation. It makes some unusual claims, such as C libraries migrating to stack chaining (stack chaining was an experimental GCC feature that has been abandoned, much as it has been abandoned in both Go and Rust).
     82
     83The performance measurements in section 10 are difficult to evaluate. While I appreciate that comparable concurrency benchmarks are very difficult to write, and the corpus of existing benchmarks primarily boils down to the parallel programs in the Computer Language Benchmark Game, the lack of detail as to what is being measured in these benchmarks (particularly when implemented in other languages) is unfortunate. For example, in table 3, the benchmark appears to measure uncontended lock access, which is not a useful micro-benchmark.
     84
     85It is not clear what the contributions of this paper are intended to be. A concise listing of the intended contributions would be helpful. Currently, it appears that the paper makes neither PL contributions in terms of novel features in Cforall, nor does it make systems contributions in terms of novel features in the runtime.
     86
     87
     88Reviewing: 2
     89
     90Comments to the Author
     91This article presents the design and rationale behind the concurrency
     92features of C-forall, a new low-level programming language.  After an
     93introduction that defines a selection of standard terminology, section
     942 gives crucial background on the design of the C-forall language.
     95Section 3 then starts the core of the article, discussing the
     96language's support for "concurrency" which in this case means
     97coroutines and threads; a very brief Section 4 builds on section 3
     98with a discussion of lower level synchronizations.  Section 5 the
     99presents the main features of concurrency control in C-forall:
     100monitors and mutexes. Section 6 then extends monitors with condition
     101variables to to support scheduling, and a very brief section 7
     102discusses preemption and pooling. Section 8 discusses the runtime
     103conceptual model, section 9 gives implementation detail, and section
     10410 briefly evaluates C-forall's performance via five concurrent
     105micro benchmarks. Finally section 11 concludes the article, and then
     106section 12 presents some future work. 
     107
     108
     109At the start of section 7, article lays out its rationale: that while
     110"historically, computer performance was about processor speeds" but
     111"Now, high-performance applications must care about parallelism,
     112which requires concurrency". The doomsayers trumpeting the death of
     113Moore's law have been proved correct at last, with CPUs sequential
     114performance increasing much more slowly than the number of cores
     115within each die. This means programmers --- especially low-level,
     116systems programmers --- must somehow manage the essential complexity
     117of writing concurrent programs to run in parallel in multiple threads
     118across multiple cores. Unfortunately, the most venerable widely used
     119systems programming language, C, supports parallelism only via an
     120e.g. the threads library.  This article aims to integrate concurrent
     121programming mechanisms more closely into a novel low-level C-based
     122programming language, C-forall. The article gives an outline of much of
     123C-forall, presents a series of concurrency mechanisms, and finally
     124some microbenchmark results.  The article is detailed, comprehensive,
     125and generally well written in understandable English.
     126
     127My main concern about the article are indicated by the fact that the
     128best summary of the problem the design of concurrent C-forall sets
     129out to solve is buried more than halfway through the article in section
     1307, as above, and then the best overview of the proposed solution is
     131given in the 2nd, 4th and 5th sentence of the conclusion:
     132
     133   "The approach provides concurrency based on a preemptive M:N
     134    user-level threading-system, executing in clusters, which
     135    encapsulate scheduling of work on multiple kernel threads
     136    providing parallelism... High-level objects (monitor/task) are the
     137    core mechanism for mutual exclusion and synchronization. A novel
     138    aspect is allowing multiple mutex-objects to be accessed
     139    simultaneously reducing the potential for deadlock for this
     140    complex scenario."
     141
     142That is, in my reading of the article, it proceeds bottom up rather
     143than top down, and so my main recommendation is to essentially reverse
     144the order of the article, proceeding from the problem to be solved,
     145the high level architecture of the proposed solutions, and then going
     146down to the low-level mechanisms.  My biggest problem reading the
     147article was for explanations of why a particular decision was taken,
     148or why a particular mechanism may be used --- often this description
     149is actually later in the article, but at that point it's too late for
     150the reader.  I have tried to point out most of these places in the
     151detailed comments below.
     152
     153My second concern is that the article makes several claims that are
     154not really justified by the design or implementation in the article.
     155These include claims that this approach meets the expectations of C
     156programmers, is minimal, is implemented in itself, etc.  The article
     157doesn't generally offer evidence to support these assertions (for many
     158of them, that would require empirical studies of programmers, or at
     159least corpus studies). The solution here is to talk about motivations
     160for the design choices "we made these decisions hoping that C
     161programmers would be comfortable" rather than claims of fact "C
     162programmers are comfortable".  Again I attempt to point these out below.
     163
     164* abstract: needs to characterize the work top down, and not make
     165  claims "features respect the expectations of C programmers" that
     166  are not supported empirically.
     167
     168* p1 line 14 "integrated"
     169
     170* introduction needs to introduce the big ideas and scope of the
     171  article, not define terms.  Some of the terms / distinctions are
     172  non-standard (e.g. the distinction between "concurrency" and
     173  "parallelism") and can be avoided by using more specific terms
     174  (mutual exclusion, synchronization, parallel execution. etc).
     175
     176* to me this article introduces novel language features, not just an
     177  API.  Similarly, it doesn't talk about any additions "to the
     178  language translator" - i.e compiler changes! - rather about language
     179  features.
     180
     181
     182* section 2 lines 6-9 why buy this fight against object-orientation?
     183  this article doesn't need to make this argument, but needs to do a
     184  better job of it if it does (see other comments below)
     185
     186* sec 2.1 - are these the same as C++. IF so, say so, if not, say why
     187  not.
     188
     189* 2.2 calling it a "with statement" was confusing, given that a with
     190  clause can appear in a routine declaration with a shorthand syntax.
     191
     192* 2.3 again compare with C++ and Java (as well as Ada)
     193
     194* line 9 "as we will see in section 3"
     195
     196* 2.4 I really quite like this syntax for operators, destructors not
     197  so much.
     198
     199* 2.5 and many places elsewhere. Always first describe the semantics
     200  of your language constructs, then describe their properties, then
     201  compare with e.g. related languages (mostly C++ & Java?).  E.g in
     202  this case, something like:
     203
     204  "C-forall includes constructors, which are called to initialize
     205  newly allocated objects, and constructors, which are called when
     206  objects are deallocated. Constructors and destructors are written as
     207  functions returning void, under the special names "?{}" for
     208  constructors and "^{}" for destructors: constructors may be
     209  overridden, but destructors may not be.  The semantics of C-forall's
     210  constructors and destructors are essentially those of C++."
     211
     212  this problem repeats many times throughout the article and should be
     213  fixed everywhere.
     214
     215
     216* 2.6 again, first describe then properties then comparison.
     217   in this case, compare e.g. with C++ templates, Java/Ada generics
     218   etc.
     219
     220* why special case forward declarations? It's not 1970 any more.
     221
     222* what are traits?  structural interfaces (like Go interfaces) or
     223  nominal bindings?
     224
     225* section 3 - lines 2-30, also making very specific global definitions
     226  as in the introduction. The article does not need to take on this
     227  fight either, rather make clear that this is the conceptual model in
     228  C-forall. (If the article starts at the top and works down, that may
     229  well follow anyway).
     230
     231* "in modern programming languages... unacceptable"; "in a
     232  system-level language.. concurrent programs should be written with
     233  high-level features" - again, no need to take on these fights.
     234
     235* 3.1 onwards; I found all this "building" up hard to follow.
     236  also it's not clear a "minimal" API must separately support
     237  coroutines, threads, fibres, etc
     238
     239* FIG 2B - where's the output?
     240  syntax "sout | next(f1) | next(f2) | endl" nowhere explained
     241    why not use C++s' << and >>
     242
     243* FIG 3 be clearer, earlier about the coroutine" constructor syntax
     244
     245** ensure all figures are placed *after* their first mention in the
     246   text. consider interleaving smaller snippets of text rather than
     247   just referring to large figures
     248
     249* sec 3.1 p7 etc,. need more context / comparison e.g. Python
     250  generators etc.
     251
     252* FIGURE 4 is this right?  should there a constructor for Cons taking
     253  a Prod?
     254
     255
     256* sec 3.2 order of constructors depends on the language.  more
     257  generally, if the article is going to make arguments against OO
     258  (e.g. section 2) then the article needs to explain, in detail, why
     259  e.g. coroutine, thread, etc *cannot* be classes / objects.
     260
     261* "type coroutine_t must be an abstract handle.. descriptor and is
     262  stack are non-copyable" - too many assumptions in here (and other
     263  similar passages) that are not really spelled out in detail.
     264
     265* p10 line 4 introduces "coroutine" keyword. needs to give its
     266  semantics. also needs to introduce and define properties and compare
     267  before all the examples using coroutines.
     268
     269* p10 again, trait semantics need to be better defined
     270
     271* 3.3 should be an introduction to this section. Note that section
     272  titles are not part of the text of the article.
     273
     274* what's the difference between "coroutines" and "user threads" (and
     275  "fibres?")
     276
     277* what's a "task type" or an "interface routine"  or "underlying
     278  thread"
     279
     280* section 4 - "... meaningless". nope some semantics are possible
     281  e.g. if there's a memory model.
     282
     283* whatare "call/return based languages"
     284
     285* p12 - what if a programmer wants to join e.g. "1st of N" or "1st 3 of N"
     286  threads rather than all threads in order
     287
     288* 4.1 p12 13-25, again it's not clear where this is going.  presenting the model
     289  top down may hopefully resolve this
     290
     291* section 4 should be merged e.g. into sec 3 (or 5)
     292
     293
     294
     295* section 5 p13 what's "routine" scope. "call/return paradigm"
     296
     297* thread/ coroutine declarations, traits etc, all look pretty close to
     298  inheritance. why wouldn't inheritance work?
     299
     300* open/closed locks = free/acquired free locks?
     301
     302* testability?
     303
     304* p14 lines 14-20 I had trouble following this.  e.g/. what's the
     305  difference between "a type that is a monitor" and "a type that looks
     306  like a monitor"?  why?
     307
     308* line 39 - what's an "object-oriented monitor"?    Java?
     309    there is no one OO model of such things.
     310
     311* line 47 significant asset - how do you know?
     312
     313* how could this e.g. build a reader/writer lock
     314
     315* *p15 what's the "bank account transfer problem"
     316
     317*p16 lines6-10  why? explain?
     318
     319*p17 semantics of arrays of conditions is unclear
     320     given e.g. previous comments about arrays of mutexes.
     321
     322*p18 define "spurious wakeup"
     323
     324*p18 line 44 - "a number of approaches were examined"?  which
     325 approaches? examined by whom?  if this is a novel contribution, needs
     326 rather more there, and more comparison with related work
     327
     328* FIG 8 consider e.g. sequence diagrams rather than code to show these
     329  cases
     330
     331* 6.2 p19 line 5 "similarly, monitor routines can be added at any
     332  time" really?  I thought C-forall was compiled? there's a big
     333  difference between "static" and "dynamic" inheritance. which is this
     334  closer to?
     335
     336* line 25 "FIgure 9 (B) shows the monitor implementation"
     337   I didn't understand this, especially not as an implementation.
     338
     339* section 6.6 - if the article is to make claims about completeness,
     340  about supporting low and high level operations, then this must be
     341  expanded to give enough detail to support that argument
     342
     343* "truest realization" huh?
     344
     345* section 7 should be merged into 6 or 8.
     346  it's not clear if this is exploring rejected alternatives,
     347  out outlining different features offered by C-forall, or what.
     348
     349
     350* sec 7.2 how do the other threads in sections 5 & 6 relate to the
     351  user threads, fibres, etc here;
     352
     353* sec 8.1 I found these sections hard to follow. how is a cluster a
     354  "collection of threads and virtual processors... like a virtual
     355  machine"? Where do the thread pools from 7.3 fit in?
     356
     357*  sec 8.3 is out of place, probably unneeded in the paper
     358
     359* section 9 dives straight into details with no overview.  Section 9
     360  seems very detailed, and depends on assumptions or details that are
     361  not in the article.
     362
     363* section 10 covers only microbenchmarks. are there any moderate sized
     364  macrobenchmarks that can compare across the different systems?
     365  (e.g the Erlang Ring?)
     366
     367* sec 11 claims that "the entire C-forall runtime system are written
     368  in C-forall". The article doesn't
     369
     370
     371* future work should precede conclusion, not follow it
     372
     373* the article should have a related work section (2-3 pages) comparing
     374  the design overall with various competing designs (C++, Java, go,
     375  Rust,...)
     376
     377To encourage accountability, I'm signing my reviews in 2018. For the record, I am James Noble, kjx@ecs.vuw.ac.nz.
     378
     379Reviewing: 3
     380
     381Comments to the Author
     382This paper describes the design and implementation of coroutine- and thread-based concurrency in the C-for-all (I will write "C\/") system, a considerably extended form of the C language with many concurrency features.
     383
     384It first provides an overview of the non-concurrency-related aspects of the host language (references, operator overloading, generics, etc.), then addresses several technical issues around concurrency, including the multi-monitor design, bulk acquiring of locks (including deadlock-avoiding management of acquisition order), solutions to difficult scheduling problems around these, and implementation of monitors in the presence of separate compilation. It also presents empirical data showing the execution times of several microbenchmarks in comparison with other threaded concurrency systems, in support of the claim that the implementation is competitive with them.
     385
     386Overall the impression I gained is that this is a substantial system into which have gone much thought and effort.
     387
     388However, the present paper is not written so as to communicate sufficiently clearly the novel practices or experiences that emerged from that effort. This manifests itself in several ways.
     389
     390The system is described in general, rather than with a focus on novel insights or experiences. It was not until page 18 that I found a statement that hinted at a possible core contribution: "Supporting barging prevention as well as extending internal scheduling to multiple monitors is the main source of complexity in design and implementation of C\/ concurrency." Even then, it is unclear whether such challenges have already been surmounted in prior systems, or what other challenges the paper may also be covering. The most complete list of claims appears to be in the Conclusion (section 11; oddly not the last section), although not everything listed is a novel feature of the work (e.g. N:M threading models are an old idea). This presentation needs to be completely inverted, to focus from the outset on the claimed novel/noteworthy experiences that the work embodies.
     391
     392The text describing the system's motivation is unconvincing on one point: the claim that library support for threading in C is "far from widespread" (p5, footnote A). The pthreads library API is standardised, albeit not in the C language specification but rather in POSIX -- a widespread standard indeed. (With systems languages, even if the language does not define a feature, it of course does not follow that that feature is not available -- since such languages permit extension of their own runtime and/or toolchain.) Of course, the combination of C and pthreads does not provide close to the full complement of C\/-supported features, so it is easy to make a case for C\/'s targeted "gap in the market". But again, a presentation focused on novel aspects would bring this out and enable the reader to learn from the authors' efforts much more readily.
     393
     394Certain sections of the text read like a tutorial on concurrency... which is potentially valuable, but does not seem to belong here. For example, much effort is spent introducing the notions of "synchronization" and "mutual exclusion", including the whole of Section 4.2. Presently it is unclear how this content supports the findings/experiences that the paper is detailing.
     395
     396Similarly, section 8 reads mostly as a basic introduction to user versus kernel threading implementations (including hybrid models such as N:M scheduling), and appears superfluous to this paper. Mixed into this are details of C\/'s specific approach. These could instead be stated directly, with references to handle the unlikely case where the reader is unfamiliar.
     397
     398I also found the definitions of certain terms through the paper a bit non-standard, for unclear reasons. For example, why "condition lock" rather than the standard "condition variable" (if indeed that is what is intended)? To say that "synchronisation" is about "timing" strikes me as potentially confusing, since in truth synchronisation concerns only relative timing, i.e. ordering. (Even ordering is something of a derived concept -- since of course, most commonly, control over ordering is built atop synchronisation primitives, rather than being provided directly by them.)
     399
     400The empirical data presented is a reasonable start at characterising the implementation's performance. However, it currently suffers certain flaws.
     401
     402Firstly, it is not clear what is being claimed. The data cannot really be said to "verify the implementation" (section 10). Presumably the claim is that the system is competitive with other systems offering reasonably high-level concurrency constructs (Java monitors, Go channels, etc.) and/or on low-level facilities (mutexes, coroutines). A claim of this form, emphasising the latter, does eventually appear in the Conclusion, but it needs to be made explicitly during the presentation of the experiments. Shifting the focus towards higher-level features may be a better target, since this appears to be C\/'s main advance over pthreads and similar libraries.
     403
     404It appears some additional or alternative competitor systems might be a better match. For example, many green-thread or N:M libraries for C exist (libdill/libmill, Marcel, even GNU Pth). It would be instructive to compare with these.
     405
     406It would help greatly if the "functionally identical" benchmark code that was run on the competing systems were made available somewhere. Omitting it from the main text of the paper is understandable, since it would take too much space, but its details may still have a critical bearing on the results.
     407
     408In some cases it simply wasn't clear what is being compared. In Table 3, what are "FetchAdd + FetchSub"? I'm guessing this is some open-coded mutex using C++ atomics, but (unless I'm missing something) I cannot see an explanation in the text.
     409
     410The reports of variance (or, rather, standard deviation) are not always plausible. Is there really no observable variation in three of Table 3's cases? At the least, I would appreciate more detail on the measures taken to reduce run-time variance (e.g. disabling CPU throttling perhaps?).
     411
     412The text habitually asserts the benefits of C\/'s design without convincing argument. For example, in 2.1, do C\/'s references really reduce "syntactic noise"? I am sympathetic to the problem here, because many design trade-offs simply cannot be evaluated without very large-scale or long-term studies. However, the authors could easily refrain from extrapolating to a grand claim that cannot be substantiated. For example, instead of saying C\/ is "expressive" or "flexible" or "natural", or (say) that fork/join concurrency is "awkward and unnecessary" (p11), it would be preferable simply to give examples of the cases are captured well in the C\/ design (ideally together with any less favourable examples that illustrate the design trade-off in question) and let them speak for themselves.
     413
     414One thing I found confusing in the presentation of coroutines is that it elides the distinction between "coroutines" (i.e. their definitions) and activations thereof. It would be helpful to make this clearer, since at present this makes some claims/statements hard to understand. For example, much of 3.2 talks about "adding fields", which implies that a coroutine's activation state exists as fields in a structured object -- as, indeed, it does in C\/. This is non-obvious because in a more classical presentation of coroutines, their state would live not in "fields" but in local variables. Similarly, the text also talks about composition of "coroutines" as fields within other "coroutines", and so on, whereas if I understand correctly, these are also activations. (By later on in the text, the "C\/ style" of such constructs is clear, but not at first.)
     415
     416I was expecting a reference to Adya et al's 2002 Usenix ATC paper, on the topic of "fibers" and cooperative threading generally but also for its illustrative examples of stack ripping (maybe around "linearized code is the bane of device drivers", p7, which seems to be making a similar observation).
     417
     418Minor comments:
     419
     420The writing is rather patchy. It has many typos, and also some cases of "not meaning what is said", unclear allusions, etc.. The following is a non-exhaustive list.
     421
     422- p2 line 7: "C has a notion of objects" -- true, but this is not intended as "object" in anything like the same sense as "object-oriented", so raising it here is somewhere between confusing and meaningless.
     423
     424- lots of extraneous hyphenation e.g "inheritance-relationships", "critical-section", "mutual-exclusion", "shared-state" (as a general rule, only hyphenate noun phrases when making an adjective out of them)
     425
     426- p4 "impossible in most type systems" -- this is not a property of the "type system" as usually understood, merely the wider language design
     427
     428- p17: "release all acquired mutex types in the parameter list" should just say "release all acquired mutexes that are designated in the parameter list" (it is not "types" that are being released or acquired);
     429
     430- p19: "a class includes an exhaustive list of operations" -- except it is definitively *not* exhaustive, for the reasons given immediately afterwards. I do see the problem here, about separate compilation meaning that the space of functions using a particular type is not bounded at compile time, but that needs to be identified clearly as the problem. (Incidentally, one idea is that perhaps this mapping onto a dense space could be solved at link- or load-time, in preference to run-time indirection.)
     431
     432- p22: in 6.5, the significance of this design decision ("threads... are monitors") was still not clear to me.
     433
     434- p22: [user threads are] "the truest realization of concurrency" sounds like unnecessary editorializing (many systems can exist that can also encode all others, without necessarily giving one supremacy... e.g. actors can be used to encode shared-state concurrency).
     435
     436- p24: on line 19, the necessary feature is not "garbage collection" but precise pointer identification (which is distinct; not all GCs have it, and it has other applications besides GC)
     437
     438- p24: lines 32-39 are very dense and of unclear significance; an example, including code, would be much clearer.
     439
     440- p25: "current UNIX systems" seems to mean "Linux", so please say that or give the behaviour or some other modern Unix (I believe Solaris is somewhat different, and possibly the BSDs too). Also, in the explanation of signal dynamics, it would be useful to adopt the quotation's own terminology of "process-directed" signals. Presumably the "internal" thread-directed signals were generated using tgkill()? And presumably the timer expiry signal is left unblocked only on the thread (virtual processor) running the "simulation"? (Calling it a "simulation" is a bit odd, although I realise it is borrowing the concept of a discrete event queue.)
     441
  • doc/proposals/vtable.md

    r6a9d4b4 r933f32f  
    22==================================
    33
    4 This is an adaptation of the earlier virtual proposal, updating it with new
    5 ideas, re-framing it and laying out more design decisions. It should
    6 eventually replace the earlier proposal, but not all features and syntax have
    7 been converted to the new design.
    8 
    94The basic concept of a virtual table (vtable) is the same here as in most
    10 other languages. They will mostly contain function pointers although they
    11 should be able to store anything that goes into a trait.
     5other languages that use them. They will mostly contain function pointers
     6although they should be able to store anything that goes into a trait.
     7
     8I also include notes on a sample implementation, which primarily exists to show
     9there is a reasonable implementation. The code samples for that are in a slight
     10pseudo-code to help avoid name mangling and keeps some CFA features while they
     11would actually be written in C.
    1212
    1313Trait Instances
     
    1515
    1616Currently traits are completely abstract. Data types might implement a trait
    17 but traits are not themselves data types. This will change that and allow
    18 instances of traits to be created from instances of data types that implement
    19 the trait.
     17but traits are not themselves data types. Which is to say you cannot have an
     18instance of a trait. This proposal will change that and allow instances of
     19traits to be created from instances of data types that implement the trait.
     20
     21For example:
    2022
    2123    trait combiner(otype T) {
    22                 void combine(T&, int);
    23         };
     24        void combine(T&, int);
     25    };
    2426
    2527    struct summation {
    26                 int sum;
    27         };
    28 
    29         void ?{}( struct summation & this ) {
    30                 this.sum = 0;
    31         }
     28        int sum;
     29    };
     30
     31    void ?{}( struct summation & this ) {
     32        this.sum = 0;
     33    }
    3234
    3335    void combine( struct summation & this, int num ) {
    34                 this.sum = this.sum + num;
    35         }
    36 
    37         trait combiner obj = struct summation{};
    38         combine(obj, 5);
     36        this.sum = this.sum + num;
     37    }
     38
     39    trait combiner obj = struct summation{};
     40    combine(obj, 5);
    3941
    4042As with `struct` (and `union` and `enum`), `trait` might be optional when
     
    4244before.
    4345
    44 Internally a trait object is a pair of pointers. One to an underlying object
    45 and the other to the vtable. All calls on an trait are implemented by looking
    46 up the matching function pointer and passing the underlying object and the
    47 remaining arguments to it.
    48 
    49 Trait objects can be moved by moving the pointers. Almost all other operations
    50 require some functions to be implemented on the underlying type. Depending on
    51 what is in the virtual table a trait type could be a dtype or otype.
     46For traits to be used this way they should meet two requirements. First they
     47should only have a single polymorphic type and each assertion should use that
     48type once as a parameter. Extensions may later loosen these requirements.
     49
     50Also note this applies to the final expanded list of assertions. Consider:
     51
     52    trait foo(otype T, otype U) {
     53        ... functions that use T once ...
     54    }
     55
     56    trait bar(otype S | foo(S, char)) {
     57        ... functions that use S once ...
     58    }
     59
     60In this example `bar` may be used as a type but `foo` may not.
     61
     62When a trait is used as a type it creates a generic object which combines
     63the base structure (an instance of `summation` in this case) and the vtable,
     64which is currently created and provided by a hidden mechanism.
     65
     66The generic object type for each trait also implements that trait. This is
     67actually the only means by which it can be used. The type of these functions
     68look something like this:
     69
     70    void combine(trait combiner & this, int num);
     71
     72The main use case for trait objects is that they can be stored. They can be
     73passed into functions, but using the trait directly is preferred in this case.
     74
     75    trait drawable(otype T) {
     76        void draw(Surface & to, T & draw);
     77        Rect(int) drawArea(T & draw);
     78    };
     79
     80    struct UpdatingSurface {
     81        Surface * surface;
     82        vector(trait drawable) drawables;
     83    };
     84
     85    void updateSurface(UpdatingSurface & us) {
     86        for (size_t i = 0 ; i < us.drawables.size ; ++i) {
     87            draw(us.surface, us.drawables[i]);
     88        }
     89    }
     90
     91With a more complete widget trait you could, for example, construct a UI tool
     92kit that can declare containers that hold widgets without knowing about the
     93widget types. Making it reasonable to extend the tool kit.
     94
     95The trait types can also be used in the types of assertions on traits as well.
     96In this usage they passed as the underlying object and vtable pair as they
     97are stored. The trait types can also be used in that trait's definition, which
     98means you can pass two instances of a trait to a single function. However the
     99look-up of the one that is not used to look up any functions, until another
     100function that uses that object in the generic/look-up location is called.
     101
     102    trait example(otype T) {
     103        bool test(T & this, trait example & that);
     104    }
     105
     106### Explanation Of Restrictions
     107
     108The two restrictions on traits that can be used as trait objects are:
     109
     1101.  Only one generic parameter may be defined in the trait's header.
     1112.  Each function assertion must have one parameter with the type of the
     112    generic parameter. They may or may not return a value of that type.
     113
     114Elsewhere in this proposal I suggest ways to broaden these requirements.
     115A simple example would be if a trait meets requirement 1 but not 2, then
     116the assertions that do not satisfy the exactly one parameter requirement can
     117be ignored.
     118
     119However I would like to talk about why these two rules are in place in the
     120first place and the problems that any exceptions to these rules must avoid.
     121
     122The problems appear when the dispatcher function which operates on the
     123generic object.
     124
     125    trait combiner(otype T, otype U) {
     126        void combine(T&, U);
     127    }
     128
     129This one is so strange I don't have proper syntax for it but let us say that
     130the concrete dispatcher would be typed as
     131`void combine(combiner(T) &, combiner(U));`. Does the function that combine
     132the two underlying types exist to dispatch too?
     133
     134Maybe not. If `combiner(T)` works with ints and `combiner(U)` is a char then
     135they could not be. It would have to enforce that all pairs of any types
     136that are wrapped in this way. Which would pretty much destroy any chance of
     137separate compilation.
     138
     139Even then it would be more expensive as the wrappers would have to carry ids
     140that you use to look up on an <number of types>+1 dimensional table.
     141
     142The second restriction has a similar issue but makes a bit more sense to
     143write out.
     144
     145    trait Series(otype T) {
     146        ... size, iterators, getters ...
     147        T join(T const &, T const &);
     148    }
     149
     150With the dispatcher typed as:
     151
     152    Series join(Series const &, Series const &);
     153
     154Because these instances are generic and hide the underlying implementation we
     155do not know what that implementation is. Unfortunately this also means the
     156implementation for the two parameters might not be the same. Once we have
     157two different types involved this devolves into the first case.
     158
     159We could check at run-time that the have the same underlying type, but this
     160would likely time and space overhead and there is no clear recovery path.
     161
     162#### Sample Implementation
     163A simple way to implement trait objects is by a pair of pointers. One to the
     164underlying object and one to the vtable.
     165
     166    struct vtable_drawable {
     167        void (*draw)(Surface &, void *);
     168        Rect(int) (*drawArea)(void *);
     169    };
     170
     171    struct drawable {
     172        void * object;
     173        vtable_drawable * vtable;
     174    };
     175
     176The functions that run on the trait object would generally be generated using
     177the following pattern:
     178
     179    void draw(Surface & surface, drawable & traitObj) {
     180        return traitObj.vtable->draw(surface, traitObj.object);
     181    }
     182
     183There may have to be special cases for things like copy construction, that
     184might require a more significant wrapper. On the other hand moving could be
     185implemented by moving the pointers without any need to refer to the base
     186object.
     187
     188### Extension: Multiple Trait Parameters
     189The base proposal in effect creates another use for the trait syntax that is
     190related to the ones currently in the language but is also separate from them.
     191The current uses generic functions and generic types, this new use could be
     192described as generic objects.
     193
     194A generic object is of a concrete type and has concrete functions that work on
     195it. It is generic in that it is a wrapper for an unknown type. Traits serve
     196a similar role here as in generic functions as they limit what the function
     197can be generic over.
     198
     199This combines the use allowing to have a generic type that is a generic
     200object. All but one of the trait's parameters is given a concrete type,
     201conceptually currying the trait to create a trait with on generic parameter
     202that fits the original restrictions. The resulting concrete generic object
     203type is different with each set of provided parameters and their values.
     204
     205Then it just becomes a question of where this is done. Again both examples use
     206a basic syntax to show the idea.
     207
     208    trait iterator(virtual otype T, otype Item) {
     209        bool has_next(T const &);
     210        Item get_next(T const *);
     211    }
     212
     213    iterator(int) int_it = begin(container_of_ints);
     214
     215The first option is to do it at the definition of the trait. One parameter
     216is selected (here with the `virtual` keyword, but other rules like "the first"
     217could also be used) and when an instance of the trait is created all the
     218other parameters must be provided.
     219
     220    trait iterator(otype T, otype Item) {
     221        bool has_next(T const &);
     222        Item get_next(T const *);
     223    }
     224
     225    iterator(virtual, int) int_it = begin(container_of_ints);
     226
     227The second option is to skip a parameter as part of the type instance
     228definition. One parameter is explicitly skipped (again with the `virtual`
     229keyword) and the others have concrete types. The skipped one is the one we
     230are generic on.
     231
     232Incidentally in both examples `container_of_ints` may itself be a generic
     233object and `begin` returns a generic iterator with unknown implementation.
     234
     235These options are not exclusive. Defining a default on the trait allows for
     236an object to be created as in the first example. However, whether the
     237default is provided or not, the second syntax can be used to pick a
     238parameter on instantiation.
    52239
    53240Hierarchy
    54241---------
    55242
    56 Virtual tables by them selves are not quite enough to implement the planned
    57 hierarchy system. An addition of type ids, implemented as pointers which
    58 point to your parent's type id, is required to actually create the shape of
    59 the hierarchy. However vtables would allow behaviour to be carried with the
    60 tree.
    61 
    62 The hierarchy would be a tree of types, of traits and structs. Currently we do
    63 not support structural extension, so traits form the internal nodes and
    64 structures the leaf nodes.
    65 
    66 The syntax is undecided but it will include a clause like `virtual (PARENT)`
    67 on trait and struct definitions. It marks out all types in a hierarchy.
    68 PARENT may be omitted, if it is this type is the root of a hierarchy. Otherwise
    69 it is the name of the type that is this type's parent in the hierarchy.
    70 
    71 Traits define a trait instance type that implements all assertions in this
    72 trait and its parents up until the root of the hierarchy. Each trait then
    73 defines a vtable type. Structures will also have a vtable type but it should
    74 be the same as their parent's.
    75 
    76 Trait objects within the tree can be statically cast to a parent type. Casts
    77 from a parent type to a child type are conditional, they check to make sure
    78 the underlying instance is an instance of the child type, or an instance of
    79 one of its children. The type then is recoverable at run-time.
    80 
    81 As with regular trait objects, calling a function on a trait object will cause
    82 a look-up on the the virtual table. The casting rules make sure anything that
    83 can be cast to a trait type will have all the function implementations for
    84 that trait.
    85 
    86 Converting from a concrete type (structures at the edge of the hierarchy) to
    87 an abstract type works the same as with normal trait objects, the underlying
    88 object is packaged with a virtual table pointer. Converting back to an abstract
    89 type requires confirming the underlying type matches, but then simply extracts
    90 the pointer to it.
    91 
    92 ### Inline vtables
     243We would also like to implement hierarchical relations between types.
     244
     245    ast_node
     246    |-expression_node
     247    | |-operator_expression
     248    |
     249    |-statement_node
     250    | |-goto_statement
     251    |
     252    |-declaration_node
     253      |-using_declaration
     254      |-variable_declaration
     255
     256Virtual tables by themselves are not quite enough to implement this system.
     257A vtable is just a list of functions and there is no way to check at run-time
     258what these functions, we carry that knowledge with the table.
     259
     260This proposal adds type ids to check for position in the hierarchy and an
     261explicate syntax for establishing a hierarchical relation between traits and
     262their implementing types. The ids should uniquely identify each type and
     263allow retrieval of the type's parent if one exists. By recursion this allows
     264the ancestor relation between any two hierarchical types can be checked.
     265
     266The hierarchy is created with traits as the internal nodes and structures
     267as the leaf nodes. The structures may be used normally and the traits can
     268be used to create generic objects as in the first section (the same
     269restrictions apply). However these type objects store their type id which can
     270be recovered to figure out which type they are or at least check to see if
     271they fall into a given sub-tree at run-time.
     272
     273Here is an example of part of a hierarchy. The `virtual(PARENT)` syntax is
     274just an example. But when used it give the name of the parent type or if
     275empty it shows that this type is the root of its hierarchy.
     276(Also I'm not sure where I got these casing rules.)
     277
     278    trait ast_node(otype T) virtual() {
     279        void print(T & this, ostream & out);
     280        void visit(T & this, Visitor & visitor);
     281        CodeLocation const & get_code_location(T & this);
     282    }
     283
     284    trait expression_node(otype T) virtual(ast_node) {
     285        Type eval_type(T const & this);
     286    }
     287
     288    struct operator_expression virtual(expression_node) {
     289        enum operator_kind kind;
     290        trait expression_node rands[2];
     291    }
     292
     293    trait statement_node(otype T) virtual(ast_node) {
     294        vector(Label) & get_labels(T & this);
     295    }
     296
     297    struct goto_statement virtual(statement_node) {
     298        vector(Label) labels;
     299        Label target;
     300    }
     301
     302    trait declaration_node(otype T) virtual(ast_node) {
     303        string name_of(T const & this);
     304        Type type_of(T const & this);
     305    }
     306
     307    struct using_declaration virtual(declaration_node) {
     308        string new_type;
     309        Type old_type;
     310    }
     311
     312    struct variable_declaration virtual(declaration_node) {
     313        string name;
     314        Type type;
     315    }
     316
     317This system does not support multiple inheritance. The system could be
     318extended to support it or a limited form (ex. you may have multiple parents
     319but they may not have a common ancestor). However this proposal focuses just
     320on using hierachy as organization. Other uses for reusable/genaric code or
     321shared interfaces is left for other features of the language.
     322
     323### Extension: Structural Inheritance
     324An extension would be allow structures to be used as internal nodes on the
     325inheritance tree. Its child types would have to implement the same fields.
     326
     327The weaker restriction would be to convert the fields into field assertions
     328(Not implemented yet: `U T.x` means there is a field of type you on the type
     329T. Offset unknown and passed in/stored with function pointers.)
     330A concrete child would have to declare the same set of fields with the same
     331types. This is of a more functional style.
     332
     333The stronger restriction is that the fields of the parent are a prefix of the
     334child's fields. Possibly automatically inserted. This the imperative view and
     335may also have less overhead.
     336
     337### Extension: Unions and Enumerations
     338Currently there is no reason unions and enumerations, in the cases they
     339do implement the trait, could not be in the hierarchy as leaf nodes.
     340
     341It does not work with structural induction, but that could just be a compile
     342time check that all ancestors are traits or do not add field assertions.
     343
     344#### Sample Implementation
     345The type id may be as little as:
     346
     347    struct typeid {
     348        struct typeid const * const parent;
     349    };
     350
     351Some linker magic would have to be used to ensure exactly one copy of each
     352structure for each type exists in memory. There seem to be special once
     353sections that support this and it should be easier than generating unique
     354ids across compilation units.
     355
     356The structure could be extended to contain any additional type information.
     357
     358There are two general designs for vtables with type ids. The first is to put
     359the type id at the top of the vtable, this is the most compact and efficient
     360solution but only works if we have exactly 1 vtable for each type. The second
     361is to put a pointer to the type id in each vtable. This has more overhead but
     362allows multiple vtables per type.
     363
     364    struct <trait>_vtable {
     365        struct typeid const id;
     366
     367        // Trait dependent list of vtable members.
     368    };
     369
     370    struct <trait>_vtable {
     371        struct typeid const * const id;
     372
     373        // Trait dependent list of vtable members.
     374    };
     375
     376One important restriction is that only one instance of each typeid in memory.
     377There is a ".gnu.linkonce" feature in the linker that might solve the issue.
     378
     379### Virtual Casts
     380The generic objects may be cast up and down the hierarchy.
     381
     382Casting to an ancestor type always succeeds. From one generic type to another
     383is just a reinterpretation and could be implicate. Wrapping and unwrapping
     384a concrete type will probably use the same syntax as in the first section.
     385
     386Casting from an ancestor to a descendent requires a check. The underlying
     387type may or may not belong to the sub-tree headed by that descendent. For this
     388we introduce a new cast operator, which returns the pointer unchanged if the
     389check succeeds and null otherwise.
     390
     391    trait SubType * new_value = (virtual trait SubType *)super_type;
     392
     393For the following example I am using the as of yet finished exception system.
     394
     395    trait exception(otype T) virtual() {
     396        char const * what(T & this);
     397    }
     398
     399    trait io_error(otype T) virtual(exception) {
     400        FILE * which_file(T & this);
     401    }
     402
     403    struct eof_error(otype T) virtual(io_error) {
     404        FILE * file;
     405    }
     406
     407    char const * what(eof_error &) {
     408        return "Tried to read from an empty file.";
     409    }
     410
     411    FILE * which_file(eof_error & this) {
     412        return eof_error.file;
     413    }
     414
     415    bool handleIoError(exception * exc) {
     416        io_error * error = (virtual io_error *)exc;
     417        if (NULL == error) {
     418            return false;
     419        }
     420        ...
     421        return true;
     422    }
     423
     424### Extension: Implicate Virtual Cast Target
     425This is a small extension, even in the example above `io_error *` is repeated
     426in the cast and the variable being assigned to. Using return type inference
     427would allow the second type to be skipped in cases it is clear what type is
     428being checked against.
     429
     430The line then becomes:
     431
     432    io_error * error = (virtual)exc;
     433
     434#### Sample Implementation
     435This cast implementation assumes a type id layout similar to the one given
     436above. Also this code is definitely in the underlying C. Functions that give
     437this functionality could exist in the standard library but these are meant to
     438be produced by code translation of the virtual cast.
     439
     440    bool is_in_subtree(typeid const * root, typeid const * id) {
     441        if (root == id) {
     442            return true
     443        } else if (NULL == id->parent) {
     444            return false;
     445        } else {
     446            return is_in_subtree(root, id->parent);
     447        }
     448    }
     449
     450    void * virtual_cast(typeid const * target, void * value) {
     451        return is_in_subtree(target, *(typeid const **)value) ? value : NULL;
     452    }
     453
     454The virtual cast function might have to be wrapped with some casts to make it
     455compile without warning.
     456
     457For the implicate target type we may be able to lean on the type resolution
     458system that already exists. If the casting to ancestor type is built into
     459the resolution then the impicate target could be decided by picking an
     460overload, generated for each hierarchial type (here io_error and its root
     461type exception).
     462
     463    io_error * virtual_cast(exception * value) {
     464        return virtual_cast(io_error_typeid, value);
     465    }
     466
     467### Extension: Inline vtables
    93468Since the structures here are usually made to be turned into trait objects
    94 it might be worth it to have fields on them to store the virtual table
    95 pointer. This would have to be declared on the trait as an assertion, but if
    96 it is the trait object could be a single pointer.
    97 
    98 It is trivial to do if the field with the virtual table pointer is fixed.
    99 Otherwise some trickery with pointing to the field and storing the offset in
    100 the virtual table to recover the main object would have to be used.
     469it might be worth it to have fields in them to store the virtual table
     470pointer. This would have to be declared on the trait as an assertion (example:
     471`vtable;` or `T.vtable;`), but if it is the trait object could be a single
     472pointer.
     473
     474There are also three options for where the pointer to the vtable. It could be
     475anywhere, a fixed location for each trait or always at the front. For the per-
     476trait solution an extension to specify what it is (example `vtable[0];`) which
     477could also be used to combine it with others. So these options can be combined
     478to allow access to all three options.
     479
     480The pointer to virtual table field on structures might implicately added (the
     481types have to declare they are a child here) or created with a declaration,
     482possibly like the one used to create the assertion.
    101483
    102484### Virtual Tables as Types
    103 Here we consider encoding plus the implementation of functions on it. Which
    104 is to say in the type hierarchy structures aren't concrete types anymore,
    105 instead they are parent types to vtables, which combine the encoding and
    106 implementation.
     485Here we consider encoding plus the implementation of functions on it to be a
     486type. Which is to say in the type hierarchy structures aren't concrete types
     487anymore, instead they are parent types to vtables, which combine the encoding
     488and implementation.
     489
     490### Question: Wrapping Structures
     491One issue is what to do with concrete types at the base of the type tree.
     492When we are working with the concrete type generally it would like them to be
     493regular structures with direct calls. On the other hand for interactions with
     494other types in the hierarchy it is more convenent for the type already to be
     495cast.
     496
     497Which of these two should we use? Should we support both and if so how do we
     498choose which one is being used at any given time.
     499
     500On a related note I have been using pointers two trait types here, as that
     501is how many existing languages handle it. However the generic objects might
     502be only one or two pointers wide passing the objects as a whole would not
     503be very expensive and all operations on the generic objects probably have
     504to be defined anyways.
    107505
    108506Resolution Scope
     
    120518the type declaration, including the functions that satisfy the trait, are
    121519all defined. Currently there are many points where this can happen, not all
    122 of them will have the same definitions and no way to select one over the
    123 other.
    124 
    125 Some syntax would have to be added. All resolutions can be found at compile
    126 time and a single vtable created for each type at compilation time.
     520of them have the same definitions and no way to select one over the other.
     521
     522Some syntax would have to be added to specify the resolution point. To ensure
     523a single instance there may have to be two variants, one forward declaration
     524and one to create the instance. With some compiler magic the forward
     525declaration maybe enough.
     526
     527    extern trait combiner(struct summation) vtable;
     528    trait combiner(struct summation) vtable;
     529
     530Or (with the same variants):
     531
     532    vtable combiner(struct summation);
     533
     534The extern variant promises that the vtable will exist while the normal one
     535is where the resolution actually happens.
    127536
    128537### Explicit Resolution Points:
     
    141550vtable.
    142551
     552    extern trait combiner(struct summation) vtable sum;
     553    trait combiner(struct summation) vtable sum;
     554
     555    extern trait combiner(struct summation) vtable sum default;
     556    trait combiner(struct summation) vtable sum default;
     557
     558The extern difference is the same before. The name (sum in the samples) is
     559used at the binding site to say which one is picked. The default keyword can
     560be used in only some of the declarations.
     561
     562    trait combiner fee = (summation_instance, sum);
     563    trait combiner foe = summation_instance;
     564
     565(I am not really happy about this syntax, but it kind of works.)
     566The object being bound is required. The name of the vtable is optional if
     567there is exactly one vtable name marked with default.
     568
     569These could also be placed inside functions. In which case both the name and
     570the default keyword might be optional. If the name is omitted in an assignment
     571the closest vtable is chosen (returning to the global default rule if no
     572appropriate local vtable is in scope).
     573
    143574### Site Based Resolution:
    144575Every place in code where the binding of a vtable to an object occurs has
     
    165596Stack allocated functions interact badly with this because they are not
    166597static. There are several ways to try to resolve this, however without a
    167 general solution most can only buy time.
     598general solution most can keep vtables from making the existing thunk problem
     599worse, they don't do anything to solve it.
    168600
    169601Filling in some fields of a static vtable could cause issues on a recursive
     
    180612shortest lifetime of a function assigned to it. However this still limits the
    181613lifetime "implicitly" and returns to the original problem with thunks.
     614
     615Odds And Ends
     616-------------
     617
     618In addition to the main design there are a few extras that should be
     619considered. They are not part of the core design but make the new uses fully
     620featured.
     621
     622### Extension: Parent-Child Assertion
     623For hierarchy types in regular traits, generic functions or generic structures
     624we may want to be able to check parent-child relationships between two types
     625given. For this we might have to add another primitive assertion. It would
     626have the following form if declared in code:
     627
     628    trait is_parent_child(dtype Parent, dtype Child) { <built-in magic> }
     629
     630This assertion is satified if Parent is an ancestor of Child in a hierarchy.
     631In other words Child can be statically cast to Parent. The cast from Parent
     632to child would be dynamically checked as usual.
     633
     634However in this form there are two concerns. The first that Parent will
     635usually be consistent for a given use, it will not be a variable. Second is
     636that we may also need the assertion functions. To do any casting/conversions
     637anyways.
     638TODO: Talk about when we wrap a concrete type and how that leads to "may".
     639
     640To this end it may be better that the parent trait combines the usual
     641assertions plus this new primitive assertion. There may or may not be use
     642cases for accessing just one half and providing easy access to them may be
     643required depending on how that turns out.
     644
     645    trait Parent(dtype T | interface(T)) virtual(<grand-parent?>) { }
     646
     647### Extension: sizeof Compatablity
     648Trait types are always sized, it may even be a fixed size like how pointers
     649have the same size regardless of what they point at. However their contents
     650may or may not be of a known size (if the `sized(...)` assertion is used).
     651
     652Currently there is no way to access this information. If it is needed a
     653special syntax would have to be added. Here a special case of `sizeof` is
     654used.
     655
     656    struct line aLine;
     657    trait drawable widget = aLine;
     658
     659    size_t x = sizeof(widget);
     660    size_t y = sizeof(trait drawable);
     661
     662As usual `y`, size of the type, is the size of the local storage used to put
     663the value into. The other case `x` checks the saved stored value in the
     664virtual table and returns that.
  • doc/theses/aaron_moss_PhD/phd/Makefile

    r6a9d4b4 r933f32f  
    22BIBDIR = ../../../bibliography
    33EVALDIR = evaluation
     4FIGDIR = figures
    45TEXLIB = .:${BUILD}:${BIBDIR}:
    56
     
    89BIBTEX = BIBINPUTS=${TEXLIB} && export BIBINPUTS && bibtex
    910
    10 VPATH = ${EVALDIR}
     11VPATH = ${EVALDIR} ${FIGDIR}
    1112
    1213BASE = thesis
     
    2223background \
    2324generic-types \
     25resolution-heuristics \
    2426type-environment \
    25 resolution-heuristics \
     27experiments \
    2628conclusion \
     29generic-bench \
     30}
     31
     32FIGURES = ${addsuffix .eps, \
     33safe-conv-graph \
     34resolution-dag \
     35union-find-with-classes \
     36persistent-union-find \
    2737}
    2838
    2939GRAPHS = ${addsuffix .tex, \
    3040generic-timing \
     41tests-completed \
     42per-prob-histo \
     43per-prob-depth \
     44cfa-time \
    3145}
    3246
     
    4761        dvips ${BUILD}/$< -o ${BUILD}/$@
    4862
    49 ${BASE}.dvi : Makefile ${SOURCES} ${GRAPHS} ${BIBFILE} ${BUILD}
     63${BASE}.dvi : Makefile ${SOURCES} ${GRAPHS} ${FIGURES} ${BIBFILE} ${BUILD}
    5064        ${LATEX} ${BASE}
    5165        ${BIBTEX} ${BUILD}/${BASE}
     
    5367        ${LATEX} ${BASE}
    5468
    55 ${GRAPHS} : generic-timing.gp generic-timing.dat ${BUILD}
     69generic-timing.tex : generic-timing.gp generic-timing.dat ${BUILD}
    5670        gnuplot -e BUILD="'${BUILD}/'" ${EVALDIR}/generic-timing.gp
     71       
     72tests-completed.tex : algo-summary.gp algo-summary.dat bu-summary.dat ${BUILD}
     73        gnuplot -e BUILD="'${BUILD}/'" ${EVALDIR}/algo-summary.gp
     74
     75per-prob-histo.tex : per-prob.gp per-prob.tsv ${BUILD}
     76        gnuplot -e BUILD="'${BUILD}/'" ${EVALDIR}/per-prob.gp
     77
     78per-prob-depth.tex : per-prob-scatter.gp ${BUILD}
     79        gnuplot -e BUILD="'${BUILD}/'" ${EVALDIR}/per-prob-scatter.gp
     80
     81cfa-time.tex : cfa-plots.gp cfa-time.tsv cfa-mem.tsv ${BUILD}
     82        gnuplot -e BUILD="'${BUILD}/'" ${EVALDIR}/cfa-plots.gp
    5783
    5884${BUILD}:
  • doc/theses/aaron_moss_PhD/phd/background.tex

    r6a9d4b4 r933f32f  
    11\chapter{\CFA{}}
     2\label{cfa-chap}
    23
    34\CFA{} adds a number of features to C, some of them providing significant increases to the expressive power of the language, but all designed to maintain the existing procedural programming paradigm of C and to be as orthogonal as possible to each other.
    45To provide background for the contributions in subsequent chapters, this chapter provides a summary of the features of \CFA{} at the time this work was conducted.
    56
    6 The core design of \CFA{} is laid out in Glen Ditchfield's 1992 PhD thesis, \emph{Contextual Polymorphism}\cite{Ditchfield92}; in that thesis, Ditchfield presents the theoretical underpinnings of the \CFA{} polymorphism model.
    7 Building on Ditchfield's design for contextual polymorphism as well as KW-C\cite{Buhr94a}, an earlier set of (largely syntactic) extensions to C, Richard Bilson\cite{Bilson03} built the first version of the \CFA{} compiler, \CFACC{}, in the early 2000's.
    8 This early \CFACC{} provided basic functionality, but incorporated a number of poor algorithmic choices due to a rushed implementation time frame, and as such lacked the runtime performance required for practical use; this thesis is substantially concerned with rectifying those deficits.
    9 
    10 The \CFA{} project was revived in 2015 with the intention of building a production-ready language and compiler; at the time of this writing, both \CFA{} and \CFACC{} have been under active development continuously since.
    11 As this development has been proceeding concurrently with the work described in this thesis, the state of \CFA{} has been somewhat of a moving target; however, Moss~\etal\cite{Moss18} provides a reasonable summary of the current design.
    12 Notable features added during this period include generic types (Chapter~\ref{generic-chap}), constructors and destructors\cite{Schluntz17}, improved support for tuples\cite{Schluntz17}, reference types\cite{Moss18}, first-class concurrent and parallel programming support\cite{Delisle18}, as well as numerous pieces of syntactic sugar and the start of an idiomatic standard library\cite{Moss18}.
    13 
    14 \section{\CFA{} Features}
    15 
    16 The selection of features presented in this chapter are chosen to elucidate the design constraints of the work presented in this thesis.
    17 In some cases the interactions of multiple features make this design a significantly more complex problem than any individual feature would; in other cases a feature that does not by itself add any complexity to expression resolution triggers previously rare edge cases more frequently.
    18 
    19 \subsection{Procedural Paradigm}
     7Glen Ditchfield laid out the core design of \CFA{} in his 1992 PhD thesis, \emph{Contextual Polymorphism} \cite{Ditchfield92}; in that thesis, Ditchfield presents the theoretical underpinnings of the \CFA{} polymorphism model.
     8Building on Ditchfield's design for contextual polymorphism as well as KW-C \cite{Buhr94a}, an earlier set of (largely syntactic) extensions to C, Richard Bilson \cite{Bilson03} built the first version of the \CFA{} compiler, \CFACC{}, in the early 2000's.
     9This early \CFACC{} provided basic functionality, but incorporated a number of algorithmic choices that have failed to scale as \CFA{} has developed, lacking the runtime performance for practical use; this thesis is substantially concerned with rectifying those deficits.
     10
     11The \CFA{} project was revived in 2015 with the intention of building a production-ready language and compiler; at the time of this writing, both \CFA{} and \CFACC{} remain under active development.
     12As this development has been proceeding concurrently with the work described in this thesis, the state of \CFA{} has been somewhat of a moving target; however, Moss~\etal~\cite{Moss18} provides a reasonable summary of the current design.
     13Notable features added during this period include generic types (Chapter~\ref{generic-chap}), constructors and destructors \cite{Schluntz17}, improved support for tuples \cite{Schluntz17}, reference types \cite{Moss18}, first-class concurrent and parallel programming support \cite{Delisle18}, as well as numerous pieces of syntactic sugar and the start of an idiomatic standard library \cite{Moss18}.
     14
     15This thesis is primarily concerned with the \emph{expression resolution} portion of \CFA{} type-checking; resolution is discussed in more detail in Chapter~\ref{resolution-chap}, but is essentially determining which declarations the identifiers in each expression correspond to.
     16In C, no simultaneously-visible declarations share identifiers, hence expression resolution in C is not difficult.
     17In \CFA{}, multiple added features make the resolution process significantly more complex.
     18Due to this complexity, the expression-resolution pass in \CFACC{} requires 95\% of compiler runtime on some source files, making a new, more efficient procedure for expression resolution a requirement for a performant \CFA{} compiler.
     19
     20The features presented in this chapter are chosen to elucidate the design constraints of the work presented in this thesis.
     21In some cases the interactions of multiple features make this design a significantly more complex problem than any individual feature; in other cases a feature that does not by itself add any complexity to expression resolution triggers previously rare edge cases more frequently.
     22
     23\section{Procedural Paradigm}
    2024
    2125It is important to note that \CFA{} is not an object-oriented language.
    22 This is a deliberate choice intended to maintain the applicability of the mental model and language idioms already possessed by C programmers.
    23 This choice is in marked contrast to \CC{}, which, though it has backward-compatibility with C on the source code level, is a much larger and more complex language, and requires extensive developer re-training to write idiomatic, efficient code in \CC{}'s object-oriented paradigm.
    24 
    25 \CFA{} does have a system of implicit type conversions derived from C's ``usual arithmetic conversions''; while these conversions may be thought of as something like an inheritance hierarchy, the underlying semantics are significantly different and such an analogy is loose at best.
     26This is a deliberate choice intended to maintain the applicability of the programming model and language idioms already possessed by C programmers.
     27This choice is in marked contrast to \CC{}, which is a much larger and more complex language, and requires extensive developer re-training to write idiomatic, efficient code in \CC{}'s object-oriented paradigm.
     28
    2629Particularly, \CFA{} has no concept of \emph{subclass}, and thus no need to integrate an inheritance-based form of polymorphism with its parametric and overloading-based polymorphism.
    27 The graph structure of the \CFA{} type conversions is also markedly different than an inheritance hierarchy; it has neither a top nor a bottom type, and does not satisfy the lattice properties typical of inheritance hierarchies.
     30While \CFA{} does have a system of implicit type conversions derived from C's ``usual arithmetic conversions'' \cite[\S{}6.3.1.8]{C11} and these conversions may be thought of as something like an inheritance hierarchy, the underlying semantics are significantly different and such an analogy is loose at best.
     31The graph structure of the \CFA{} type conversions (discussed in Section~\ref{conv-cost-sec}) is also markedly different than an inheritance hierarchy; it has neither a top nor a bottom type, and does not satisfy the lattice properties typical of inheritance hierarchies.
    2832
    2933Another aspect of \CFA{}'s procedural paradigm is that it retains C's translation-unit-based encapsulation model, rather than class-based encapsulation such as in \CC{}.
    30 This choice implies that that separate compilation must be maintained to allow headers to act as an encapsulation boundary, rather than the header-only libraries used by \CC{} templates.
    31 
    32 \subsection{Name Overloading} \label{overloading-sec}
    33 
    34 In C, no more than one variable or function in the same scope may share the same name\footnote{Technically, C has multiple separated namespaces, one holding \lstinline{struct}, \lstinline{union}, and \lstinline{enum} tags, one holding labels, one holding \lstinline{typedef} names, variable, function, and enumerator identifiers, and one for each \lstinline{struct} and \lstinline{union} type holding the field names\cit{}.}, and variable or function declarations in inner scopes with the same name as a declaration in an outer scope hide the outer declaration.
    35 This restriction makes finding the proper declaration to match to a variable expression or function application a simple matter of symbol-table lookup, which can be easily and efficiently implemented.
     34As such, any language feature that requires code to be exposed in header files (\eg{} \CC{} templates) also eliminates encapsulation in \CFA{}.
     35Given this constraint, \CFA{} is carefully designed to allow separate compilation for its added language features under the existing C usage patterns.
     36
     37\section{Name Overloading} \label{overloading-sec}
     38
     39In C, no more than one variable or function in the same scope may share the same name\footnote{Technically, C has multiple separated namespaces, one holding \lstinline{struct}, \lstinline{union}, and \lstinline{enum} tags, one holding labels, one holding \lstinline{typedef} names, variable, function, and enumerator identifiers, and one for each \lstinline{struct} and \lstinline{union} type holding the field names \cite[\S{}6.2.3]{C11}.}, and variable or function declarations in inner scopes with the same name as a declaration in an outer scope hide the outer declaration.
     40This restriction makes finding the proper declaration to match to a variable expression or function application a simple matter of lexically-scoped name lookup, which can be easily and efficiently implemented.
    3641\CFA{}, on the other hand, allows overloading of variable and function names so long as the overloaded declarations do not have the same type, avoiding the multiplication of variable and function names for different types common in the C standard library, as in the following example:
    3742
     
    5055\end{cfa}
    5156
    52 While the wisdom of giving both the maximum value of a type and the function to take the maximum of two values the same name is debatable, \eg{} some developers may prefer !MAX! for the former, the guiding philosophy of \CFA{} is ``describe, don't prescribe'' --- we prefer to trust programmers with powerful tools, and there is no technical reason to restrict overloading between variables and functions.
    53 However, the expressivity of \CFA{}'s name overloading has the consequence that simple table lookup is insufficient to match identifiers to declarations, and a type-matching algorithm must be part of expression resolution.
    54 
    55 \subsubsection{Operator Overloading}
     57The final expression in the preceding example includes a feature of \CFA{} name overloading not shared by \CC{}, the ability to disambiguate expressions based on their return type. This provides greater flexibility and power than the parameter-based overload selection of \CC{}, though at the cost of greater complexity in the resolution algorithm.
     58
     59While the wisdom of giving both the maximum value of a type and the function to take the maximum of two values the same name in the example above is debatable, \eg{} some developers may prefer !MAX! for the former, the guiding philosophy of \CFA{} is ``describe, don't prescribe'' --- we prefer to trust programmers with powerful tools, and there is no technical reason to restrict overloading between variables and functions.
     60However, the expressivity of \CFA{}'s name overloading does have the consequence that simple table lookup is insufficient to match identifiers to declarations, and a type-matching algorithm must be part of expression resolution.
     61
     62\subsection{Operator Overloading}
    5663
    5764C does allow name overloading in one context: operator overloading.
    5865From the perspective of the type system, there is nothing special about operators as opposed to other functions, nor is it desirable to restrict the clear and readable syntax of operators to only the built-in types.
    59 For these reasons, \CFA{} also allows overloading of operators by writing specially-named functions where !?! stands in for the operands\footnote{This example uses \CFA{}'s reference types, described in Section~\ref{type-features-sec}}:
     66For these reasons, \CFA{}, like \CC{} and many other programming languages, allows overloading of operators by writing specially-named functions where !?! stands in for the operands.
     67This syntax is more natural than the operator overloading syntax of \CC{}, which requires ``dummy'' parameters to disambiguate overloads of similarly-named pre- and postfix operators\footnote{This example uses \CFA{}'s reference types, described in Section~\ref{type-features-sec}}:
    6068
    6169\begin{cfa}
     
    7179\end{cfa}
    7280
    73 Together, \CFA{}'s backward-compatibility with C and the inclusion of this operator overloading feature imply that \CFA{} must select among function overloads using a method compatible with C's ``usual arithmetic conversions''\cit{}, so as to present user programmers with only a single set of overloading rules.
    74 
    75 \subsubsection{Special Literal Types}
     81Together, \CFA{}'s backward-compatibility with C and the inclusion of this operator overloading feature imply that \CFA{} must select among function overloads using a method compatible with C's ``usual arithmetic conversions'' \cite[\S{}6.3.1.8]{C11}, so as to present user programmers with only a single set of overloading rules.
     82
     83\subsection{Special Literal Types}
    7684
    7785Literal !0! is also used polymorphically in C; it may be either integer zero or the null value of any pointer type.
    78 \CFA{} provides a special type for the !0! literal, !zero_t!, so that users can define a zero value for their own types without being forced to create a conversion from an integer or pointer type (though \CFA{} also includes implicit conversions from !zero_t! to the integer and pointer types for backward compatibility).
    79 
    80