Changes in / [f5c3b6c:0fe4e62]


Ignore:
Files:
8 added
1 deleted
71 edited

Legend:

Unmodified
Added
Removed
  • Jenkinsfile

    rf5c3b6c r0fe4e62  
    1515        arch_name               = ''
    1616        architecture    = ''
    17        
     17
    1818        do_alltests             = false
    1919        do_benchmark    = false
     
    183183                sh 'make clean > /dev/null'
    184184                sh 'make > /dev/null 2>&1'
    185         } 
     185        }
    186186        catch (Exception caughtError) {
    187187                err = caughtError //rethrow error later
     
    257257def build() {
    258258        build_stage('Build') {
    259        
     259
    260260                def install_dir = pwd tmp: true
    261                
     261
    262262                //Configure the conpilation (Output is not relevant)
    263263                //Use the current directory as the installation target so nothing
     
    290290                if( !do_benchmark ) return
    291291
    292                 //Write the commit id to Benchmark
    293                 writeFile  file: 'bench.csv', text:'data=' + gitRefNewValue + ',' + arch_name + ','
    294  
    295292                //Append bench results
    296                 sh 'make -C src/benchmark --no-print-directory csv-data >> bench.csv'
     293                sh 'make -C src/benchmark --no-print-directory jenkins githash=' + gitRefNewValue + ' arch=' + arch_name + ' | tee bench.json'
    297294        }
    298295}
     
    327324
    328325                //Then publish the results
    329                 sh 'curl --silent --data @bench.csv http://plg2:8082/jenkins/publish > /dev/null || true'
     326                sh 'curl -H "Content-Type: application/json" --silent --data @bench.json http://plg2:8082/jenkins/publish > /dev/null || true'
    330327        }
    331328}
  • doc/proposals/concurrency/.gitignore

    rf5c3b6c r0fe4e62  
    1616build/*.out
    1717build/*.ps
     18build/*.pstex
     19build/*.pstex_t
    1820build/*.tex
    1921build/*.toc
  • doc/proposals/concurrency/Makefile

    rf5c3b6c r0fe4e62  
    1313annex/glossary \
    1414text/intro \
     15text/basics \
    1516text/cforall \
    16 text/basics \
    1717text/concurrency \
    1818text/internals \
    1919text/parallelism \
     20text/results \
    2021text/together \
    2122text/future \
     
    2930}}
    3031
    31 PICTURES = ${addsuffix .pstex, \
    32 }
     32PICTURES = ${addprefix build/, ${addsuffix .pstex, \
     33        system \
     34        monitor_structs \
     35}}
    3336
    3437PROGRAMS = ${addsuffix .tex, \
     
    6770        build/*.out     \
    6871        build/*.ps      \
     72        build/*.pstex   \
    6973        build/*.pstex_t \
    7074        build/*.tex     \
     
    8084        dvips $< -o $@
    8185
    82 build/${basename ${DOCUMENT}}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} ${basename ${DOCUMENT}}.tex ../../LaTeXmacros/common.tex ../../LaTeXmacros/indexstyle
     86build/${basename ${DOCUMENT}}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} ${basename ${DOCUMENT}}.tex ../../LaTeXmacros/common.tex ../../LaTeXmacros/indexstyle annex/local.bib
    8387
    8488        @ if [ ! -r ${basename $@}.ind ] ; then touch ${basename $@}.ind ; fi                           # Conditionally create an empty *.ind (index) file for inclusion until makeindex is run.
     
    9195        @ -${BibTeX} ${basename $@}
    9296        @ echo "Glossary"
    93         makeglossaries -q -s ${basename $@}.ist ${basename $@}                                          # Make index from *.aux entries and input index at end of document
     97        @ makeglossaries -q -s ${basename $@}.ist ${basename $@}                                                # Make index from *.aux entries and input index at end of document
    9498        @ echo ".dvi generation"
    9599        @ -build/bump_ver.sh
  • doc/proposals/concurrency/annex/local.bib

    rf5c3b6c r0fe4e62  
    5252        year            = 2017
    5353}
     54
     55@manual{Cpp-Transactions,
     56        keywords        = {C++, Transactional Memory},
     57        title           = {Technical Specification for C++ Extensions for Transactional Memory},
     58        organization= {International Standard ISO/IEC TS 19841:2015 },
     59        publisher   = {American National Standards Institute},
     60        address = {http://www.iso.org},
     61        year            = 2015,
     62}
     63
     64@article{BankTransfer,
     65        keywords        = {Bank Transfer},
     66        title   = {Bank Account Transfer Problem},
     67        publisher       = {Wiki Wiki Web},
     68        address = {http://wiki.c2.com},
     69        year            = 2010
     70}
     71
     72@misc{2FTwoHardThings,
     73        keywords        = {Hard Problem},
     74        title   = {TwoHardThings},
     75        author  = {Martin Fowler},
     76        address = {https://martinfowler.com/bliki/TwoHardThings.html},
     77        year            = 2009
     78}
     79
     80@article{IntrusiveData,
     81        title           = {Intrusive Data Structures},
     82        author  = {Jiri Soukup},
     83        journal = {CppReport},
     84        year            = 1998,
     85        month           = May,
     86        volume  = {10/No5.},
     87        page            = 22
     88}
     89
     90@misc{affinityLinux,
     91        title           = "{Linux man page - sched\_setaffinity(2)}"
     92}
     93
     94@misc{affinityWindows,
     95        title           = "{Windows (vs.85) - SetThreadAffinityMask function}"
     96}
     97
     98@misc{affinityFreebsd,
     99        title           = "{FreeBSD General Commands Manual - CPUSET(1)}"
     100}
     101
     102@misc{affinityNetbsd,
     103        title           = "{NetBSD Library Functions Manual - AFFINITY(3)}"
     104}
     105
     106@misc{affinityMacosx,
     107        title           = "{Affinity API Release Notes for OS X v10.5}"
     108}
  • doc/proposals/concurrency/figures/int_monitor.fig

    rf5c3b6c r0fe4e62  
    88-2
    991200 2
    10 5 1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 600.000 2625.000 600 2325 300 2625 600 2925
    11 6 3225 4500 7425 4800
    12 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3375 4650 80 80 3375 4650 3455 4730
    13 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4725 4650 105 105 4725 4650 4830 4755
    14 1 3 0 1 -1 -1 0 0 4 0.000 1 0.0000 6225 4650 105 105 6225 4650 6330 4755
    15 4 0 -1 0 0 0 12 0.0000 2 135 1035 4950 4725 blocked task\001
    16 4 0 -1 0 0 0 12 0.0000 2 135 870 3525 4725 active task\001
    17 4 0 -1 0 0 0 12 0.0000 2 180 930 6450 4725 routine ptrs\001
     105 1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 675.000 2700.000 675 2400 375 2700 675 3000
     116 4533 2866 4655 3129
     125 1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 4657.017 2997.000 4655 2873 4533 2997 4655 3121
     132 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     14         4655 2866 4655 3129
    1815-6
    19 6 8445 1695 8655 1905
    20 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 8550 1800 105 105 8550 1800 8655 1905
    21 4 1 -1 0 0 0 10 0.0000 2 75 75 8550 1860 a\001
     166 4725 2866 4847 3129
     175 1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 4849.017 2997.000 4847 2873 4725 2997 4847 3121
     182 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     19         4847 2866 4847 3129
    2220-6
    23 6 8445 1395 8655 1605
    24 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 8550 1500 105 105 8550 1500 8655 1605
    25 4 1 -1 0 0 0 10 0.0000 2 105 90 8550 1560 b\001
     216 4911 2866 5033 3129
     225 1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 5035.017 2997.000 5033 2873 4911 2997 5033 3121
     232 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     24         5033 2866 5033 3129
    2625-6
    27 6 3945 1695 4155 1905
    28 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4050 1800 105 105 4050 1800 4155 1905
    29 4 1 -1 0 0 0 10 0.0000 2 75 75 4050 1860 a\001
     266 9027 2866 9149 3129
     275 1 0 1 0 7 50 -1 -1 0.000 0 0 0 0 9024.983 2997.000 9027 2873 9149 2997 9027 3121
     282 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     29         9027 2866 9027 3129
    3030-6
    31 6 3945 1395 4155 1605
    32 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4050 1500 105 105 4050 1500 4155 1605
    33 4 1 -1 0 0 0 10 0.0000 2 105 90 4050 1560 b\001
     316 9253 2866 9375 3129
     325 1 0 1 0 7 50 -1 -1 0.000 0 0 0 0 9250.983 2997.000 9253 2873 9375 2997 9253 3121
     332 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     34         9253 2866 9253 3129
     35-6
     366 9478 2866 9600 3129
     375 1 0 1 0 7 50 -1 -1 0.000 0 0 0 0 9475.983 2997.000 9478 2873 9600 2997 9478 3121
     382 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     39         9478 2866 9478 3129
    3440-6
    35411 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 7650 3675 80 80 7650 3675 7730 3755
    36421 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3150 3675 80 80 3150 3675 3230 3755
     431 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 4047 1793 125 125 4047 1793 3929 1752
     441 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 4050 1500 125 125 4050 1500 3932 1459
     451 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 8550 1500 125 125 8550 1500 8432 1459
     461 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 8550 1800 125 125 8550 1800 8432 1759
     471 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 1200 2850 125 125 1200 2850 1082 2809
     481 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 900 2850 125 125 900 2850 782 2809
     491 3 0 1 -1 -1 0 0 4 0.000 1 0.0000 6225 4650 105 105 6225 4650 6330 4755
     501 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3150 4650 80 80 3150 4650 3230 4730
     511 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4575 4650 105 105 4575 4650 4680 4755
    37522 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    3853         3900 1950 4200 2100
     
    6277         3000 4050 3300 4200
    63782 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    64          600 2925 1350 2925
     79         675 3000 1425 3000
    65802 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    66          600 2325 1350 2325
     81         675 2400 1425 2400
    67822 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    68          1350 2625 1425 2850
     83         1425 2700 1500 2925
    69842 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    70          1350 2325 1275 2550
     85         1425 2400 1350 2625
    71862 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    72          600 2625 1350 2625
    73 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    74          1350 2775 1275 2645 1125 2645 1050 2775 1125 2905 1275 2905
    75          1350 2775
    76 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    77          975 2775 900 2645 750 2645 675 2775 750 2905 900 2905
    78          975 2775
    79 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    80          4800 3000 4725 2870 4575 2870 4500 3000 4575 3130 4725 3130
    81          4800 3000
    82 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    83          5100 3000 5025 2870 4875 2870 4800 3000 4875 3130 5025 3130
    84          5100 3000
    85 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    86          9300 3000 9225 2870 9075 2870 9000 3000 9075 3130 9225 3130
    87          9300 3000
    88 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    89          9600 3000 9525 2870 9375 2870 9300 3000 9375 3130 9525 3130
    90          9600 3000
    91 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    92          675 2775 975 2775
    93 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    94          1050 2775 1350 2775
    95 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    96          4875 4950 4800 4820 4650 4820 4575 4950 4650 5080 4800 5080
    97          4875 4950
    98 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    99          4575 4950 4875 4950
    100 2 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
    101          3525 4970 3450 4840 3300 4840 3225 4970 3300 5100 3450 5100
    102          3525 4970
     87         675 2700 1425 2700
    103884 1 -1 0 0 0 12 0.0000 2 135 315 2850 4275 exit\001
    104894 1 -1 0 0 0 12 0.0000 2 135 315 7350 4275 exit\001
     
    1211064 1 -1 0 0 0 12 0.0000 2 135 495 4050 1275 queue\001
    1221074 1 -1 0 0 0 12 0.0000 2 165 420 4050 1050 entry\001
    123 4 0 0 50 -1 0 11 0.0000 2 120 705 450 2250 Condition\001
    124 4 0 0 50 -1 0 11 0.0000 2 165 630 3600 5025 signalled\001
    125 4 0 0 50 -1 0 11 0.0000 2 165 525 4950 5025 waiting\001
     1084 0 0 50 -1 0 11 0.0000 2 120 705 600 2325 Condition\001
     1094 0 -1 0 0 0 12 0.0000 2 180 930 6450 4725 routine ptrs\001
     1104 0 -1 0 0 0 12 0.0000 2 135 1050 3300 4725 active thread\001
     1114 0 -1 0 0 0 12 0.0000 2 135 1215 4725 4725 blocked thread\001
  • doc/proposals/concurrency/style/cfa-format.tex

    rf5c3b6c r0fe4e62  
    254254}{}
    255255
     256\lstnewenvironment{gocode}[1][]{
     257  \lstset{
     258    language = Golang,
     259    style=defaultStyle,
     260    #1
     261  }
     262}{}
     263
    256264\newcommand{\zero}{\lstinline{zero_t}\xspace}
    257265\newcommand{\one}{\lstinline{one_t}\xspace}
  • doc/proposals/concurrency/text/basics.tex

    rf5c3b6c r0fe4e62  
    99At its core, concurrency is based on having multiple call-stacks and scheduling among threads of execution executing on these stacks. Concurrency without parallelism only requires having multiple call stacks (or contexts) for a single thread of execution.
    1010
    11 Indeed, while execution with a single thread and multiple stacks where the thread is self-scheduling deterministically across the stacks is called coroutining, execution with a single and multiple stacks but where the thread is scheduled by an oracle (non-deterministic from the thread perspective) across the stacks is called concurrency.
    12 
    13 Therefore, a minimal concurrency system can be achieved by creating coroutines, which instead of context switching among each other, always ask an oracle where to context switch next. While coroutines can execute on the caller's stack-frame, stackfull coroutines allow full generality and are sufficient as the basis for concurrency. The aforementioned oracle is a scheduler and the whole system now follows a cooperative threading-model \cit. The oracle/scheduler can either be a stackless or stackfull entity and correspondingly require one or two context switches to run a different coroutine. In any case, a subset of concurrency related challenges start to appear. For the complete set of concurrency challenges to occur, the only feature missing is preemption. Indeed, concurrency challenges appear with non-determinism. Using mutual-exclusion or synchronisation are ways of limiting the lack of determinism in a system. A scheduler introduces order of execution uncertainty, while preemption introduces uncertainty about where context-switches occur. Now it is important to understand that uncertainty is not undesireable; uncertainty can often be used by systems to significantly increase performance and is often the basis of giving a user the illusion that tasks are running in parallel. Optimal performance in concurrent applications is often obtained by having as much non-determinism as correctness allows\cit.
     11Execution with a single thread and multiple stacks where the thread is self-scheduling deterministically across the stacks is called coroutining. Execution with a single and multiple stacks but where the thread is scheduled by an oracle (non-deterministic from the thread perspective) across the stacks is called concurrency.
     12
     13Therefore, a minimal concurrency system can be achieved by creating coroutines, which instead of context switching among each other, always ask an oracle where to context switch next. While coroutines can execute on the caller's stack-frame, stackfull coroutines allow full generality and are sufficient as the basis for concurrency. The aforementioned oracle is a scheduler and the whole system now follows a cooperative threading-model (a.k.a non-preemptive scheduling). The oracle/scheduler can either be a stackless or stackfull entity and correspondingly require one or two context switches to run a different coroutine. In any case, a subset of concurrency related challenges start to appear. For the complete set of concurrency challenges to occur, the only feature missing is preemption.
     14
     15A scheduler introduces order of execution uncertainty, while preemption introduces uncertainty about where context-switches occur. Mutual-exclusion and synchronisation are ways of limiting non-determinism in a concurrent system. Now it is important to understand that uncertainty is desireable; uncertainty can be used by runtime systems to significantly increase performance and is often the basis of giving a user the illusion that tasks are running in parallel. Optimal performance in concurrent applications is often obtained by having as much non-determinism as correctness allows.
    1416
    1517\section{\protect\CFA 's Thread Building Blocks}
    16 One of the important features that is missing in C is threading. On modern architectures, a lack of threading is unacceptable\cite{Sutter05, Sutter05b}, and therefore modern programming languages must have the proper tools to allow users to write performant concurrent and/or parallel programs. As an extension of C, \CFA needs to express these concepts in a way that is as natural as possible to programmers familiar with imperative languages. And being a system-level language means programmers expect to choose precisely which features they need and which cost they are willing to pay.
     18One of the important features that is missing in C is threading. On modern architectures, a lack of threading is unacceptable\cite{Sutter05, Sutter05b}, and therefore modern programming languages must have the proper tools to allow users to write performant concurrent programs to take advantage of parallelism. As an extension of C, \CFA needs to express these concepts in a way that is as natural as possible to programmers familiar with imperative languages. And being a system-level language means programmers expect to choose precisely which features they need and which cost they are willing to pay.
    1719
    1820\section{Coroutines: A stepping stone}\label{coroutine}
    19 While the main focus of this proposal is concurrency and parallelism, it is important to address coroutines, which are actually a significant building block of a concurrency system. Coroutines need to deal with context-switchs and other context-management operations. Therefore, this proposal includes coroutines both as an intermediate step for the implementation of threads, and a first class feature of \CFA. Furthermore, many design challenges of threads are at least partially present in designing coroutines, which makes the design effort that much more relevant. The core \acrshort{api} of coroutines revolve around two features: independent call stacks and \code{suspend}/\code{resume}.
    20 
    21 A good example of a problem made easier with coroutines is genereting the fibonacci sequence. This problem comes with the challenge of decoupling how a sequence is generated and how it is used. Figure \ref{fig:fibonacci-c} shows conventional approaches to writing generators in C. All three of these approach suffer from strong coupling. The left and center approaches require that the generator have knowledge of how the sequence will be used, while the rightmost approach requires to user to hold internal state between calls on behalf of th sequence generator and makes it much harder to handle corner cases like the Fibonacci seed.
     21While the main focus of this proposal is concurrency and parallelism, it is important to address coroutines, which are actually a significant building block of a concurrency system. Coroutines need to deal with context-switches and other context-management operations. Therefore, this proposal includes coroutines both as an intermediate step for the implementation of threads, and a first class feature of \CFA. Furthermore, many design challenges of threads are at least partially present in designing coroutines, which makes the design effort that much more relevant. The core \acrshort{api} of coroutines revolve around two features: independent call stacks and \code{suspend}/\code{resume}.
     22
    2223\begin{figure}
    23 \label{fig:fibonacci-c}
    2424\begin{center}
    2525\begin{tabular}{c @{\hskip 0.025in}|@{\hskip 0.025in} c @{\hskip 0.025in}|@{\hskip 0.025in} c}
     
    4545        }
    4646}
     47
     48int main() {
     49        void print_fib(int n) {
     50                printf("%d\n", n);
     51        }
     52
     53        fibonacci_func(
     54                10, print_fib
     55        );
     56
     57
     58
     59}
    4760\end{ccode}&\begin{ccode}[tabsize=2]
    4861//Using output array
     
    6275                        f2 = next;
    6376                }
    64                 *array = next;
    65                 array++;
    66         }
     77                array[i] = next;
     78        }
     79}
     80
     81
     82int main() {
     83        int a[10];
     84
     85        fibonacci_func(
     86                10, a
     87        );
     88
     89        for(int i=0;i<10;i++){
     90                printf("%d\n", a[i]);
     91        }
     92
    6793}
    6894\end{ccode}&\begin{ccode}[tabsize=2]
     
    7096typedef struct {
    7197        int f1, f2;
    72 } iterator_t;
     98} Iterator_t;
    7399
    74100int fibonacci_state(
    75         iterator_t * it
     101        Iterator_t * it
    76102) {
    77103        int f;
    78104        f = it->f1 + it->f2;
    79105        it->f2 = it->f1;
    80         it->f1 = f;
     106        it->f1 = max(f,1);
    81107        return f;
    82108}
     
    87113
    88114
     115
     116int main() {
     117        Iterator_t it={0,0};
     118
     119        for(int i=0;i<10;i++){
     120                printf("%d\n",
     121                        fibonacci_state(
     122                                &it
     123                        );
     124                );
     125        }
     126
     127}
    89128\end{ccode}
    90129\end{tabular}
    91130\end{center}
    92131\caption{Different implementations of a fibonacci sequence generator in C.}
     132\label{lst:fibonacci-c}
    93133\end{figure}
    94134
    95 
    96 Figure \ref{fig:fibonacci-cfa} is an example of a solution to the fibonnaci problem using \CFA coroutines, using the coroutine stack to hold sufficient state for the generation. This solution has the advantage of having very strong decoupling between how the sequence is generated and how it is used. Indeed, this version is a easy to use as the \code{fibonacci_state} solution, while the imlpementation is very similar to the \code{fibonacci_func} example.
     135A good example of a problem made easier with coroutines is generators, like the fibonacci sequence. This problem comes with the challenge of decoupling how a sequence is generated and how it is used. Figure \ref{lst:fibonacci-c} shows conventional approaches to writing generators in C. All three of these approach suffer from strong coupling. The left and center approaches require that the generator have knowledge of how the sequence is used, while the rightmost approach requires holding internal state between calls on behalf of the generator and makes it much harder to handle corner cases like the Fibonacci seed.
     136
     137Figure \ref{lst:fibonacci-cfa} is an example of a solution to the fibonnaci problem using \CFA coroutines, where the coroutine stack holds sufficient state for the generation. This solution has the advantage of having very strong decoupling between how the sequence is generated and how it is used. Indeed, this version is as easy to use as the \code{fibonacci_state} solution, while the imlpementation is very similar to the \code{fibonacci_func} example.
    97138
    98139\begin{figure}
    99 \label{fig:fibonacci-cfa}
    100140\begin{cfacode}
    101141coroutine Fibonacci {
     
    108148
    109149//main automacically called on first resume
    110 void main(Fibonacci & this) {
     150void main(Fibonacci & this) with (this) {
    111151        int fn1, fn2;           //retained between resumes
    112         this.fn = 0;
    113         fn1 = this.fn;
     152        fn = 0;
     153        fn1 = fn;
    114154        suspend(this);          //return to last resume
    115155
    116         this.fn = 1;
     156        fn = 1;
    117157        fn2 = fn1;
    118         fn1 = this.fn;
     158        fn1 = fn;
    119159        suspend(this);          //return to last resume
    120160
    121161        for ( ;; ) {
    122                 this.fn = fn1 + fn2;
     162                fn = fn1 + fn2;
    123163                fn2 = fn1;
    124                 fn1 = this.fn;
     164                fn1 = fn;
    125165                suspend(this);  //return to last resume
    126166        }
     
    140180\end{cfacode}
    141181\caption{Implementation of fibonacci using coroutines}
     182\label{lst:fibonacci-cfa}
    142183\end{figure}
    143184
    144 \subsection{Construction}
    145 One important design challenge for coroutines and threads (shown in section \ref{threads}) is that the runtime system needs to run code after the user-constructor runs to connect the object into the system. In the case of coroutines, this challenge is simpler since there is no non-determinism from preemption or scheduling. However, the underlying challenge remains the same for coroutines and threads.
    146 
    147 The runtime system needs to create the coroutine's stack and more importantly prepare it for the first resumption. The timing of the creation is non-trivial since users both expect to have fully constructed objects once execution enters the coroutine main and to be able to resume the coroutine from the constructor. As regular objects, constructors can leak coroutines before they are ready. There are several solutions to this problem but the chosen options effectively forces the design of the coroutine.
    148 
    149 Furthermore, \CFA faces an extra challenge as polymorphic routines create invisible thunks when casted to non-polymorphic routines and these thunks have function scope. For example, the following code, while looking benign, can run into undefined behaviour because of thunks:
    150 
    151 \begin{cfacode}
    152 //async: Runs function asynchronously on another thread
    153 forall(otype T)
    154 extern void async(void (*func)(T*), T* obj);
    155 
    156 forall(otype T)
    157 void noop(T *) {}
    158 
    159 void bar() {
    160         int a;
    161         async(noop, &a);
    162 }
    163 \end{cfacode}
    164 
    165 The generated C code\footnote{Code trimmed down for brevity} creates a local thunk to hold type information:
    166 
    167 \begin{ccode}
    168 extern void async(/* omitted */, void (*func)(void *), void *obj);
    169 
    170 void noop(/* omitted */, void *obj){}
    171 
    172 void bar(){
    173         int a;
    174         void _thunk0(int *_p0){
    175                 /* omitted */
    176                 noop(/* omitted */, _p0);
    177         }
    178         /* omitted */
    179         async(/* omitted */, ((void (*)(void *))(&_thunk0)), (&a));
    180 }
    181 \end{ccode}
    182 The problem in this example is a storage management issue, the function pointer \code{_thunk0} is only valid until the end of the block. This extra challenge limits which solutions are viable because storing the function pointer for too long causes undefined behavior; i.e. the stack based thunk being destroyed before it was used. This challenge is an extension of challenges that come with second-class routines. Indeed, GCC nested routines also have the limitation that the routines cannot be passed outside of the scope of the functions these were declared in. The case of coroutines and threads is simply an extension of this problem to multiple call-stacks.
    183 
    184 \subsection{Alternative: Composition}
    185 One solution to this challenge is to use composition/containement, where uses add insert a coroutine field which contains the necessary information to manage the coroutine.
    186 
    187 \begin{cfacode}
    188 struct Fibonacci {
    189         int fn; //used for communication
    190         coroutine c; //composition
    191 };
    192 
    193 void ?{}(Fibonacci & this) {
    194         this.fn = 0;
    195         (this.c){}; //Call constructor to initialize coroutine
    196 }
    197 \end{cfacode}
    198 There are two downsides to this approach. The first, which is relatively minor, made aware of the main routine pointer. This information must either be store in the coroutine runtime data or in its static type structure. When using composition, all coroutine handles have the same static type structure which means the pointer to the main needs to be part of the runtime data. This requirement means the coroutine data must be made larger to store a value that is actually a compile time constant (address of the main routine). The second problem, which is both subtle and significant, is that now users can get the initialisation order of coroutines wrong. Indeed, every field of a \CFA struct is constructed but in declaration order, unless users explicitly write otherwise. This semantics means that users who forget to initialize the coroutine handle may resume the coroutine with an uninitilized object. For coroutines, this is unlikely to be a problem, for threads however, this is a significant problem. Figure \ref{fig:fmt-line} shows the \code{Format} coroutine which rearranges text in order to group characters into blocks of fixed size. This is a good example where the control flow is made much simpler from being able to resume the coroutine from the constructor and highlights the idea that interesting control flow can occor in the constructor.
     185Figure \ref{lst:fmt-line} shows the \code{Format} coroutine which rearranges text in order to group characters into blocks of fixed size. The example takes advantage of resuming coroutines in the constructor to simplify the code and highlights the idea that interesting control flow can occur in the constructor.
     186
    199187\begin{figure}
    200 \label{fig:fmt-line}
    201188\begin{cfacode}[tabsize=3]
    202189//format characters into blocks of 4 and groups of 5 blocks per line
     
    244231\end{cfacode}
    245232\caption{Formatting text into lines of 5 blocks of 4 characters.}
     233\label{lst:fmt-line}
    246234\end{figure}
    247235
     236\subsection{Construction}
     237One important design challenge for coroutines and threads (shown in section \ref{threads}) is that the runtime system needs to run code after the user-constructor runs to connect the fully constructed object into the system. In the case of coroutines, this challenge is simpler since there is no non-determinism from preemption or scheduling. However, the underlying challenge remains the same for coroutines and threads.
     238
     239The runtime system needs to create the coroutine's stack and more importantly prepare it for the first resumption. The timing of the creation is non-trivial since users both expect to have fully constructed objects once execution enters the coroutine main and to be able to resume the coroutine from the constructor. As regular objects, constructors can leak coroutines before they are ready. There are several solutions to this problem but the chosen options effectively forces the design of the coroutine.
     240
     241Furthermore, \CFA faces an extra challenge as polymorphic routines create invisible thunks when casted to non-polymorphic routines and these thunks have function scope. For example, the following code, while looking benign, can run into undefined behaviour because of thunks:
     242
     243\begin{cfacode}
     244//async: Runs function asynchronously on another thread
     245forall(otype T)
     246extern void async(void (*func)(T*), T* obj);
     247
     248forall(otype T)
     249void noop(T*) {}
     250
     251void bar() {
     252        int a;
     253        async(noop, &a); //start thread running noop with argument a
     254}
     255\end{cfacode}
     256
     257The generated C code\footnote{Code trimmed down for brevity} creates a local thunk to hold type information:
     258
     259\begin{ccode}
     260extern void async(/* omitted */, void (*func)(void *), void *obj);
     261
     262void noop(/* omitted */, void *obj){}
     263
     264void bar(){
     265        int a;
     266        void _thunk0(int *_p0){
     267                /* omitted */
     268                noop(/* omitted */, _p0);
     269        }
     270        /* omitted */
     271        async(/* omitted */, ((void (*)(void *))(&_thunk0)), (&a));
     272}
     273\end{ccode}
     274The problem in this example is a storage management issue, the function pointer \code{_thunk0} is only valid until the end of the block, which limits the viable solutions because storing the function pointer for too long causes undefined behavior; i.e., the stack-based thunk being destroyed before it can be used. This challenge is an extension of challenges that come with second-class routines. Indeed, GCC nested routines also have the limitation that nested routine cannot be passed outside of the declaration scope. The case of coroutines and threads is simply an extension of this problem to multiple call-stacks.
     275
     276\subsection{Alternative: Composition}
     277One solution to this challenge is to use composition/containement, where coroutine fields are added to manage the coroutine.
     278
     279\begin{cfacode}
     280struct Fibonacci {
     281        int fn; //used for communication
     282        coroutine c; //composition
     283};
     284
     285void FibMain(void *) {
     286        //...
     287}
     288
     289void ?{}(Fibonacci & this) {
     290        this.fn = 0;
     291        //Call constructor to initialize coroutine
     292        (this.c){myMain};
     293}
     294\end{cfacode}
     295The downside of this approach is that users need to correctly construct the coroutine handle before using it. Like any other objects, doing so the users carefully choose construction order to prevent usage of unconstructed objects. However, in the case of coroutines, users must also pass to the coroutine information about the coroutine main, like in the previous example. This opens the door for user errors and requires extra runtime storage to pass at runtime information that can be known statically.
    248296
    249297\subsection{Alternative: Reserved keyword}
     
    255303};
    256304\end{cfacode}
    257 This mean the compiler can solve problems by injecting code where needed. The downside of this approach is that it makes coroutine a special case in the language. Users who would want to extend coroutines or build their own for various reasons can only do so in ways offered by the language. Furthermore, implementing coroutines without language supports also displays the power of the programming language used. While this is ultimately the option used for idiomatic \CFA code, coroutines and threads can both be constructed by users without using the language support. The reserved keywords are only present to improve ease of use for the common cases.
     305The \code{coroutine} keyword means the compiler can find and inject code where needed. The downside of this approach is that it makes coroutine a special case in the language. Users wantint to extend coroutines or build their own for various reasons can only do so in ways offered by the language. Furthermore, implementing coroutines without language supports also displays the power of the programming language used. While this is ultimately the option used for idiomatic \CFA code, coroutines and threads can still be constructed by users without using the language support. The reserved keywords are only present to improve ease of use for the common cases.
    258306
    259307\subsection{Alternative: Lamda Objects}
    260308
    261 For coroutines as for threads, many implementations are based on routine pointers or function objects\cit. For example, Boost implements coroutines in terms of four functor object types:
     309For coroutines as for threads, many implementations are based on routine pointers or function objects\cite{Butenhof97, ANSI14:C++, MS:VisualC++, BoostCoroutines15}. For example, Boost implements coroutines in terms of four functor object types:
    262310\begin{cfacode}
    263311asymmetric_coroutine<>::pull_type
     
    268316Often, the canonical threading paradigm in languages is based on function pointers, pthread being one of the most well known examples. The main problem of this approach is that the thread usage is limited to a generic handle that must otherwise be wrapped in a custom type. Since the custom type is simple to write in \CFA and solves several issues, added support for routine/lambda based coroutines adds very little.
    269317
    270 A variation of this would be to use an simple function pointer in the same way pthread does for threads :
     318A variation of this would be to use a simple function pointer in the same way pthread does for threads :
    271319\begin{cfacode}
    272320void foo( coroutine_t cid, void * arg ) {
     
    281329}
    282330\end{cfacode}
    283 This semantic is more common for thread interfaces than coroutines but would work equally well. As discussed in section \ref{threads}, this approach is superseeded by static approaches in terms of expressivity.
     331This semantics is more common for thread interfaces than coroutines works equally well. As discussed in section \ref{threads}, this approach is superseeded by static approaches in terms of expressivity.
    284332
    285333\subsection{Alternative: Trait-based coroutines}
     
    350398\end{cfacode}
    351399
    352 In this example, threads of type \code{foo} start execution in the \code{void main(foo &)} routine, which prints \code{"Hello World!"}. While this thesis encourages this approach to enforce strongly-typed programming, users may prefer to use the routine-based thread semantics for the sake of simplicity. With these semantics it is trivial to write a thread type that takes a function pointer as a parameter and executes it on its stack asynchronously
     400In this example, threads of type \code{foo} start execution in the \code{void main(foo &)} routine, which prints \code{"Hello World!"}. While this thesis encourages this approach to enforce strongly-typed programming, users may prefer to use the routine-based thread semantics for the sake of simplicity. With the static semantics it is trivial to write a thread type that takes a function pointer as a parameter and executes it on its stack asynchronously.
    353401\begin{cfacode}
    354402typedef void (*voidFunc)(int);
     
    361409void ?{}(FuncRunner & this, voidFunc inFunc, int arg) {
    362410        this.func = inFunc;
     411        this.arg  = arg;
    363412}
    364413
    365414void main(FuncRunner & this) {
     415        //thread starts here and runs the function
    366416        this.func( this.arg );
    367417}
    368418\end{cfacode}
    369419
    370 An consequence of the strongly typed approach to main is that memory layout of parameters and return values to/from a thread are now explicitly specified in the \acrshort{api}.
     420A consequence of the strongly-typed approach to main is that memory layout of parameters and return values to/from a thread are now explicitly specified in the \acrshort{api}.
    371421
    372422Of course for threads to be useful, it must be possible to start and stop threads and wait for them to complete execution. While using an \acrshort{api} such as \code{fork} and \code{join} is relatively common in the literature, such an interface is unnecessary. Indeed, the simplest approach is to use \acrshort{raii} principles and have threads \code{fork} after the constructor has completed and \code{join} before the destructor runs.
     
    389439\end{cfacode}
    390440
    391 This semantic has several advantages over explicit semantics: a thread is always started and stopped exaclty once and users cannot make any progamming errors and it naturally scales to multiple threads meaning basic synchronisation is very simple
     441This semantic has several advantages over explicit semantics: a thread is always started and stopped exaclty once, users cannot make any progamming errors, and it naturally scales to multiple threads meaning basic synchronisation is very simple.
    392442
    393443\begin{cfacode}
     
    411461\end{cfacode}
    412462
    413 However, one of the drawbacks of this approach is that threads now always form a lattice, that is they are always destroyed in opposite order of construction because of block structure. This restriction is relaxed by using dynamic allocation, so threads can outlive the scope in which they are created, much like dynamically allocating memory lets objects outlive the scope in which they are created
     463However, one of the drawbacks of this approach is that threads now always form a lattice, that is they are always destroyed in the opposite order of construction because of block structure. This restriction is relaxed by using dynamic allocation, so threads can outlive the scope in which they are created, much like dynamically allocating memory lets objects outlive the scope in which they are created.
    414464
    415465\begin{cfacode}
  • doc/proposals/concurrency/text/cforall.tex

    rf5c3b6c r0fe4e62  
    11% ======================================================================
    22% ======================================================================
    3 \chapter{Cforall crash course}
     3\chapter{Cforall Overview}
    44% ======================================================================
    55% ======================================================================
    66
    7 This thesis presents the design for a set of concurrency features in \CFA. Since it is a new dialect of C, the following is a quick introduction to the language, specifically tailored to the features needed to support concurrency.
     7The following is a quick introduction to the \CFA language, specifically tailored to the features needed to support concurrency.
    88
    9 \CFA is a extension of ISO-C and therefore supports all of the same paradigms as C. It is a non-object oriented system language, meaning most of the major abstractions have either no runtime overhead or can be opt-out easily. Like C, the basics of \CFA revolve around structures and routines, which are thin abstractions over machine code. The vast majority of the code produced by the \CFA translator respects memory-layouts and calling-conventions laid out by C. Interestingly, while \CFA is not an object-oriented language, lacking the concept of a received (e.g.: this), it does have some notion of objects\footnote{C defines the term objects as : [Where to I get the C11 reference manual?]}, most importantly construction and destruction of objects. Most of the following pieces of code can be found on the \CFA website \cite{www-cfa}
     9\CFA is a extension of ISO-C and therefore supports all of the same paradigms as C. It is a non-object oriented system language, meaning most of the major abstractions have either no runtime overhead or can be opt-out easily. Like C, the basics of \CFA revolve around structures and routines, which are thin abstractions over machine code. The vast majority of the code produced by the \CFA translator respects memory-layouts and calling-conventions laid out by C. Interestingly, while \CFA is not an object-oriented language, lacking the concept of a receiver (e.g., this), it does have some notion of objects\footnote{C defines the term objects as : ``region of data storage in the execution environment, the contents of which can represent
     10values''\cite[3.15]{C11}}, most importantly construction and destruction of objects. Most of the following code examples can be found on the \CFA website \cite{www-cfa}
    1011
    1112\section{References}
    1213
    13 Like \CC, \CFA introduces references as an alternative to pointers. In regards to concurrency, the semantics difference between pointers and references are not particularly relevant but since this document uses mostly references here is a quick overview of the semantics :
     14Like \CC, \CFA introduces rebindable references providing multiple dereferecing as an alternative to pointers. In regards to concurrency, the semantic difference between pointers and references are not particularly relevant, but since this document uses mostly references, here is a quick overview of the semantics:
    1415\begin{cfacode}
    1516int x, *p1 = &x, **p2 = &p1, ***p3 = &p2,
    16 &r1 = x,    &&r2 = r1,   &&&r3 = r2;
     17        &r1 = x,    &&r2 = r1,   &&&r3 = r2;
    1718***p3 = 3;                                                      //change x
    1819r3    = 3;                                                      //change x, ***r3
     
    2526sizeof(&ar[1]) == sizeof(int *);        //is true, i.e., the size of a reference
    2627\end{cfacode}
    27 The important thing to take away from this code snippet is that references offer a handle to an object much like pointers but which is automatically derefferenced when convinient.
     28The important take away from this code example is that references offer a handle to an object, much like pointers, but which is automatically dereferenced for convinience.
    2829
    2930\section{Overloading}
    3031
    31 Another important feature of \CFA is function overloading as in Java and \CC, where routine with the same name are selected based on the numbers and type of the arguments. As well, \CFA uses the return type as part of the selection criteria, as in Ada\cite{Ada}. For routines with multiple parameters and returns, the selection is complex.
     32Another important feature of \CFA is function overloading as in Java and \CC, where routines with the same name are selected based on the number and type of the arguments. As well, \CFA uses the return type as part of the selection criteria, as in Ada\cite{Ada}. For routines with multiple parameters and returns, the selection is complex.
    3233\begin{cfacode}
    3334//selection based on type and number of parameters
     
    4546double d = f(4);                //select (2)
    4647\end{cfacode}
    47 This feature is particularly important for concurrency since the runtime system relies on creating different types to represent concurrency objects. Therefore, overloading is necessary to prevent the need for long prefixes and other naming conventions that prevent name clashes. As seen in chapter \ref{basics}, routines main is an example that benefits from overloading.
     48This feature is particularly important for concurrency since the runtime system relies on creating different types to represent concurrency objects. Therefore, overloading is necessary to prevent the need for long prefixes and other naming conventions that prevent name clashes. As seen in chapter \ref{basics}, routine \code{main} is an example that benefits from overloading.
    4849
    4950\section{Operators}
    50 Overloading also extends to operators. The syntax for denoting operator-overloading is to name a routine with the symbol of the operator and question marks where the arguments of the operation would be, like so :
     51Overloading also extends to operators. The syntax for denoting operator-overloading is to name a routine with the symbol of the operator and question marks where the arguments of the operation occur, e.g.:
    5152\begin{cfacode}
    5253int ++? (int op);                       //unary prefix increment
     
    101102
    102103\section{Parametric Polymorphism}
    103 Routines in \CFA can also be reused for multiple types. This is done using the \code{forall} clause which gives \CFA it's name. \code{forall} clauses allow seperatly compiled routines to support generic usage over multiple types. For example, the following sum function will work for any type which support construction from 0 and addition :
     104Routines in \CFA can also be reused for multiple types. This capability is done using the \code{forall} clause which gives \CFA its name. \code{forall} clauses allow separately compiled routines to support generic usage over multiple types. For example, the following sum function works for any type that supports construction from 0 and addition :
    104105\begin{cfacode}
    105106//constraint type, 0 and +
     
    116117\end{cfacode}
    117118
    118 Since writing constraints on types can become cumbersome for more constrained functions, \CFA also has the concept of traits. Traits are named collection of constraints which can be used both instead and in addition to regular constraints:
     119Since writing constraints on types can become cumbersome for more constrained functions, \CFA also has the concept of traits. Traits are named collection of constraints that can be used both instead and in addition to regular constraints:
    119120\begin{cfacode}
    120121trait sumable( otype T ) {
     
    130131
    131132\section{with Clause/Statement}
    132 Since \CFA lacks the concept of a receiver, certain functions end-up needing to repeat variable names often, to solve this \CFA offers the \code{with} statement which opens an aggregate scope making its fields directly accessible (like Pascal).
     133Since \CFA lacks the concept of a receiver, certain functions end-up needing to repeat variable names often. To remove this inconvenience, \CFA provides the \code{with} statement, which opens an aggregate scope making its fields directly accessible (like Pascal).
    133134\begin{cfacode}
    134135struct S { int i, j; };
    135 int mem(S & this) with this             //with clause
     136int mem(S & this) with (this)           //with clause
    136137        i = 1;                                          //this->i
    137138        j = 2;                                          //this->j
     
    140141        struct S1 { ... } s1;
    141142        struct S2 { ... } s2;
    142         with s1                                         //with statement
     143        with (s1)                                       //with statement
    143144        {
    144145                //access fields of s1
    145146                //without qualification
    146                 with s2                                 //nesting
     147                with (s2)                                       //nesting
    147148                {
    148149                        //access fields of s1 and s2
     
    150151                }
    151152        }
    152         with s1, s2                             //scopes open in parallel
     153        with (s1, s2)                           //scopes open in parallel
    153154        {
    154155                //access fields of s1 and s2
  • doc/proposals/concurrency/text/concurrency.tex

    rf5c3b6c r0fe4e62  
    44% ======================================================================
    55% ======================================================================
    6 Several tool can be used to solve concurrency challenges. Since many of these challenges appear with the use of mutable shared-state, some languages and libraries simply disallow mutable shared-state (Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, Akka (Scala)~\cite{Akka}). In these paradigms, interaction among concurrent objects relies on message passing~\cite{Thoth,Harmony,V-Kernel} or other paradigms closely relate to networking concepts (channels\cit for example). However, in languages that use routine calls as their core abstraction-mechanism, these approaches force a clear distinction between concurrent and non-concurrent paradigms (i.e., message passing versus routine call). This distinction in turn means that, in order to be effective, programmers need to learn two sets of designs patterns. While this distinction can be hidden away in library code, effective use of the librairy still has to take both paradigms into account.
     6Several tool can be used to solve concurrency challenges. Since many of these challenges appear with the use of mutable shared-state, some languages and libraries simply disallow mutable shared-state (Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, Akka (Scala)~\cite{Akka}). In these paradigms, interaction among concurrent objects relies on message passing~\cite{Thoth,Harmony,V-Kernel} or other paradigms closely relate to networking concepts (channels\cite{CSP,Go} for example). However, in languages that use routine calls as their core abstraction-mechanism, these approaches force a clear distinction between concurrent and non-concurrent paradigms (i.e., message passing versus routine call). This distinction in turn means that, in order to be effective, programmers need to learn two sets of designs patterns. While this distinction can be hidden away in library code, effective use of the librairy still has to take both paradigms into account.
    77
    88Approaches based on shared memory are more closely related to non-concurrent paradigms since they often rely on basic constructs like routine calls and shared objects. At the lowest level, concurrent paradigms are implemented as atomic operations and locks. Many such mechanisms have been proposed, including semaphores~\cite{Dijkstra68b} and path expressions~\cite{Campbell74}. However, for productivity reasons it is desireable to have a higher-level construct be the core concurrency paradigm~\cite{HPP:Study}.
    99
    10 An approach that is worth mentionning because it is gaining in popularity is transactionnal memory~\cite{Dice10}[Check citation]. While this approach is even pursued by system languages like \CC\cit, the performance and feature set is currently too restrictive to be the main concurrency paradigm for systems language, which is why it was rejected as the core paradigm for concurrency in \CFA.
     10An approach that is worth mentioning because it is gaining in popularity is transactionnal memory~\cite{Dice10}[Check citation]. While this approach is even pursued by system languages like \CC\cite{Cpp-Transactions}, the performance and feature set is currently too restrictive to be the main concurrency paradigm for systems language, which is why it was rejected as the core paradigm for concurrency in \CFA.
    1111
    1212One of the most natural, elegant, and efficient mechanisms for synchronization and communication, especially for shared-memory systems, is the \emph{monitor}. Monitors were first proposed by Brinch Hansen~\cite{Hansen73} and later described and extended by C.A.R.~Hoare~\cite{Hoare74}. Many programming languages---e.g., Concurrent Pascal~\cite{ConcurrentPascal}, Mesa~\cite{Mesa}, Modula~\cite{Modula-2}, Turing~\cite{Turing:old}, Modula-3~\cite{Modula-3}, NeWS~\cite{NeWS}, Emerald~\cite{Emerald}, \uC~\cite{Buhr92a} and Java~\cite{Java}---provide monitors as explicit language constructs. In addition, operating-system kernels and device drivers have a monitor-like structure, although they often use lower-level primitives such as semaphores or locks to simulate monitors. For these reasons, this project proposes monitors as the core concurrency-construct.
     
    1919
    2020\subsection{Synchronization}
    21 As for mutual-exclusion, low-level synchronisation primitives often offer good performance and good flexibility at the cost of ease of use. Again, higher-level mechanism often simplify usage by adding better coupling between synchronization and data, e.g.: message passing, or offering simple solution to otherwise involved challenges. An example is barging. As mentioned above, synchronization can be expressed as guaranteeing that event \textit{X} always happens before \textit{Y}. Most of the time, synchronisation happens around a critical section, where threads must acquire critical sections in a certain order. However, it may also be desirable to guarantee that event \textit{Z} does not occur between \textit{X} and \textit{Y}. Not satisfying this property called barging. For example, where event \textit{X} tries to effect event \textit{Y} but another thread acquires the critical section and emits \textit{Z} before \textit{Y}. Preventing or detecting barging is an involved challenge with low-level locks, which can be made much easier by higher-level constructs. This challenge is often split into two different methods, barging avoidance and barging prevention. Algorithms that use status flags and other flag variables to detect barging threads are said to be using barging avoidance while algorithms that baton-passing locks between threads instead of releasing the locks are said to be using barging prevention.
     21As for mutual-exclusion, low-level synchronisation primitives often offer good performance and good flexibility at the cost of ease of use. Again, higher-level mechanism often simplify usage by adding better coupling between synchronization and data, e.g.: message passing, or offering simpler solution to otherwise involved challenges. As mentioned above, synchronization can be expressed as guaranteeing that event \textit{X} always happens before \textit{Y}. Most of the time, synchronisation happens within a critical section, where threads must acquire mutual-exclusion in a certain order. However, it may also be desirable to guarantee that event \textit{Z} does not occur between \textit{X} and \textit{Y}. Not satisfying this property called barging. For example, where event \textit{X} tries to effect event \textit{Y} but another thread acquires the critical section and emits \textit{Z} before \textit{Y}. The classic exmaple is the thread that finishes using a ressource and unblocks a thread waiting to use the resource, but the unblocked thread must compete again to acquire the resource. Preventing or detecting barging is an involved challenge with low-level locks, which can be made much easier by higher-level constructs. This challenge is often split into two different methods, barging avoidance and barging prevention. Algorithms that use status flags and other flag variables to detect barging threads are said to be using barging avoidance while algorithms that baton-passing locks between threads instead of releasing the locks are said to be using barging prevention.
    2222
    2323% ======================================================================
     
    7171\end{tabular}
    7272\end{center}
    73 Notice how the counter is used without any explicit synchronisation and yet supports thread-safe semantics for both reading and writting.
    74 
    75 Here, the constructor(\code{?\{\}}) uses the \code{nomutex} keyword to signify that it does not acquire the monitor mutual-exclusion when constructing. This semantics is because an object not yet constructed should never be shared and therefore does not require mutual exclusion. The prefix increment operator uses \code{mutex} to protect the incrementing process from race conditions. Finally, there is a conversion operator from \code{counter_t} to \code{size_t}. This conversion may or may not require the \code{mutex} keyword depending on whether or not reading a \code{size_t} is an atomic operation.
    76 
    77 For maximum usability, monitors use \gls{multi-acq} semantics, which means a single thread can acquire multiple times the same monitor without deadlock. For example, figure \ref{fig:search} uses recursion and \gls{multi-acq} to print values inside a binary tree.
     73Notice how the counter is used without any explicit synchronisation and yet supports thread-safe semantics for both reading and writting, which is similar in usage to \CC \code{atomic} template.
     74
     75Here, the constructor(\code{?\{\}}) uses the \code{nomutex} keyword to signify that it does not acquire the monitor mutual-exclusion when constructing. This semantics is because an object not yet con\-structed should never be shared and therefore does not require mutual exclusion. The prefix increment operator uses \code{mutex} to protect the incrementing process from race conditions. Finally, there is a conversion operator from \code{counter_t} to \code{size_t}. This conversion may or may not require the \code{mutex} keyword depending on whether or not reading a \code{size_t} is an atomic operation.
     76
     77For maximum usability, monitors use \gls{multi-acq} semantics, which means a single thread can acquire the same monitor multiple times without deadlock. For example, figure \ref{fig:search} uses recursion and \gls{multi-acq} to print values inside a binary tree.
    7878\begin{figure}
    7979\label{fig:search}
     
    9595\end{figure}
    9696
    97 Having both \code{mutex} and \code{nomutex} keywords is redundant based on the meaning of a routine having neither of these keywords. For example, given a routine without qualifiers \code{void foo(counter_t & this)}, then it is reasonable that it should default to the safest option \code{mutex}, whereas assuming \code{nomutex} is unsafe and may cause subtle errors. In fact, \code{nomutex} is the "normal" parameter behaviour, with the \code{nomutex} keyword effectively stating explicitly that "this routine is not special". Another alternative is making exactly one of these keywords mandatory, which would provide the same semantics but without the ambiguity of supporting routines with neither keyword. Mandatory keywords would also have the added benefit of being self-documented but at the cost of extra typing. While there are several benefits to mandatory keywords, they do bring a few challenges. Mandatory keywords in \CFA would imply that the compiler must know without doubt whether or not a parameter is a monitor or not. Since \CFA relies heavily on traits as an abstraction mechanism, the distinction between a type that is a monitor and a type that looks like a monitor can become blurred. For this reason, \CFA only has the \code{mutex} keyword and uses no keyword to mean \code{nomutex}.
     97Having both \code{mutex} and \code{nomutex} keywords is redundant based on the meaning of a routine having neither of these keywords. For example, given a routine without qualifiers \code{void foo(counter_t & this)}, then it is reasonable that it should default to the safest option \code{mutex}, whereas assuming \code{nomutex} is unsafe and may cause subtle errors. In fact, \code{nomutex} is the ``normal'' parameter behaviour, with the \code{nomutex} keyword effectively stating explicitly that ``this routine is not special''. Another alternative is making exactly one of these keywords mandatory, which provides the same semantics but without the ambiguity of supporting routines with neither keyword. Mandatory keywords would also have the added benefit of being self-documented but at the cost of extra typing. While there are several benefits to mandatory keywords, they do bring a few challenges. Mandatory keywords in \CFA would imply that the compiler must know without doubt whether or not a parameter is a monitor or not. Since \CFA relies heavily on traits as an abstraction mechanism, the distinction between a type that is a monitor and a type that looks like a monitor can become blurred. For this reason, \CFA only has the \code{mutex} keyword and uses no keyword to mean \code{nomutex}.
    9898
    9999The next semantic decision is to establish when \code{mutex} may be used as a type qualifier. Consider the following declarations:
     
    113113int f5(monitor * mutex m []); //Not Okay : Array of unkown length
    114114\end{cfacode}
    115 Note that not all array functions are actually distinct in the type system sense. However, even the code generation could tell the difference, the extra information is still not sufficient to extend meaningfully the monitor call semantic.
    116 
    117 Unlike object-oriented monitors, where calling a mutex member \emph{implicitly} acquires mutual-exclusion often receives an object, \CFA uses an explicit mechanism to acquire mutual-exclusion. A consequence of this approach is that it extends naturally to multi-monitor calls.
     115Note that not all array functions are actually distinct in the type system. However, even if the code generation could tell the difference, the extra information is still not sufficient to extend meaningfully the monitor call semantic.
     116
     117Unlike object-oriented monitors, where calling a mutex member \emph{implicitly} acquires mutual-exclusion of the receiver object, \CFA uses an explicit mechanism to acquire mutual-exclusion. A consequence of this approach is that it extends naturally to multi-monitor calls.
    118118\begin{cfacode}
    119119int f(MonitorA & mutex a, MonitorB & mutex b);
     
    123123f(a,b);
    124124\end{cfacode}
    125 The capacity to acquire multiple locks before entering a critical section is called \emph{\gls{bulk-acq}}. In practice, writing multi-locking routines that do not lead to deadlocks is tricky. Having language support for such a feature is therefore a significant asset for \CFA. In the case presented above, \CFA guarantees that the order of aquisition is consistent across calls to routines using the same monitors as arguments. However, since \CFA monitors use \gls{multi-acq} locks, users can effectively force the acquiring order. For example, notice which routines use \code{mutex}/\code{nomutex} and how this affects aquiring order:
     125While OO monitors could be extended with a mutex qualifier for multiple-monitor calls, no example of this feature could be found. The capacity to acquire multiple locks before entering a critical section is called \emph{\gls{bulk-acq}}. In practice, writing multi-locking routines that do not lead to deadlocks is tricky. Having language support for such a feature is therefore a significant asset for \CFA. In the case presented above, \CFA guarantees that the order of aquisition is consistent across calls to different routines using the same monitors as arguments. This consistent ordering means acquiring multiple monitors in the way is safe from deadlock. However, users can still force the acquiring order. For example, notice which routines use \code{mutex}/\code{nomutex} and how this affects aquiring order:
    126126\begin{cfacode}
    127127void foo(A & mutex a, B & mutex b) { //acquire a & b
     
    139139The \gls{multi-acq} monitor lock allows a monitor lock to be acquired by both \code{bar} or \code{baz} and acquired again in \code{foo}. In the calls to \code{bar} and \code{baz} the monitors are acquired in opposite order.
    140140
    141 However, such use leads to the lock acquiring order problem. In the example above, the user uses implicit ordering in the case of function \code{foo} but explicit ordering in the case of \code{bar} and \code{baz}. This subtle mistake means that calling these routines concurrently may lead to deadlock and is therefore undefined behavior. As shown on several occasion\cit, solving this problem requires:
     141However, such use leads to the lock acquiring order problem. In the example above, the user uses implicit ordering in the case of function \code{foo} but explicit ordering in the case of \code{bar} and \code{baz}. This subtle mistake means that calling these routines concurrently may lead to deadlock and is therefore undefined behavior. As shown\cite{Lister77}, solving this problem requires:
    142142\begin{enumerate}
    143143        \item Dynamically tracking of the monitor-call order.
    144144        \item Implement rollback semantics.
    145145\end{enumerate}
    146 While the first requirement is already a significant constraint on the system, implementing a general rollback semantics in a C-like language is prohibitively complex \cit. In \CFA, users simply need to be carefull when acquiring multiple monitors at the same time or only use \gls{bulk-acq} of all the monitors.
    147 
    148 \Gls{multi-acq} and \gls{bulk-acq} can be used together in interesting ways, for example:
     146While the first requirement is already a significant constraint on the system, implementing a general rollback semantics in a C-like language is still prohibitively complex \cite{Dice10}. In \CFA, users simply need to be carefull when acquiring multiple monitors at the same time or only use \gls{bulk-acq} of all the monitors. While \CFA provides only a partial solution, many system provide no solution and the \CFA partial solution handles many useful cases.
     147
     148For example, \gls{multi-acq} and \gls{bulk-acq} can be used together in interesting ways:
    149149\begin{cfacode}
    150150monitor bank { ... };
     
    157157}
    158158\end{cfacode}
    159 This example shows a trivial solution to the bank account transfer problem\cit. Without \gls{multi-acq} and \gls{bulk-acq}, the solution to this problem is much more involved and requires carefull engineering.
    160 
    161 \subsubsection{\code{mutex} statement} \label{mutex-stmt}
    162 
    163 The call semantics discussed aboved have one software engineering issue, only a named routine can acquire the mutual-exclusion of a set of monitor. \CFA offers the \code{mutex} statement to workaround the need for unnecessary names, avoiding a major software engineering problem\cit. Listing \ref{lst:mutex-stmt} shows an example of the \code{mutex} statement, which introduces a new scope in which the mutual-exclusion of a set of monitor is acquired. Beyond naming, the \code{mutex} statement has no semantic difference from a routine call with \code{mutex} parameters.
     159This example shows a trivial solution to the bank-account transfer-problem\cite{BankTransfer}. Without \gls{multi-acq} and \gls{bulk-acq}, the solution to this problem is much more involved and requires carefull engineering.
     160
     161\subsection{\code{mutex} statement} \label{mutex-stmt}
     162
     163The call semantics discussed aboved have one software engineering issue, only a named routine can acquire the mutual-exclusion of a set of monitor. \CFA offers the \code{mutex} statement to workaround the need for unnecessary names, avoiding a major software engineering problem\cite{2FTwoHardThings}. Listing \ref{lst:mutex-stmt} shows an example of the \code{mutex} statement, which introduces a new scope in which the mutual-exclusion of a set of monitor is acquired. Beyond naming, the \code{mutex} statement has no semantic difference from a routine call with \code{mutex} parameters.
    164164
    165165\begin{figure}
     
    218218\end{cfacode}
    219219
    220 
    221 % ======================================================================
    222 % ======================================================================
    223 \section{Internal scheduling} \label{insched}
    224 % ======================================================================
    225 % ======================================================================
    226 In addition to mutual exclusion, the monitors at the core of \CFA's concurrency can also be used to achieve synchronisation. With monitors, this capability is generally achieved with internal or external scheduling as in\cit. Since internal scheduling within a single monitor is mostly a solved problem, this thesis concentrates on extending internal scheduling to multiple monitors. Indeed, like the \gls{bulk-acq} semantics, internal scheduling extends to multiple monitors in a way that is natural to the user but requires additional complexity on the implementation side.
     220Like threads and coroutines, monitors are defined in terms of traits with some additional language support in the form of the \code{monitor} keyword. The monitor trait is :
     221\begin{cfacode}
     222trait is_monitor(dtype T) {
     223        monitor_desc * get_monitor( T & );
     224        void ^?{}( T & mutex );
     225};
     226\end{cfacode}
     227Note that the destructor of a monitor must be a \code{mutex} routine. This requirement ensures that the destructor has mutual-exclusion. As with any object, any call to a monitor, using \code{mutex} or otherwise, is Undefined Behaviour after the destructor has run.
     228
     229% ======================================================================
     230% ======================================================================
     231\section{Internal scheduling} \label{intsched}
     232% ======================================================================
     233% ======================================================================
     234In addition to mutual exclusion, the monitors at the core of \CFA's concurrency can also be used to achieve synchronisation. With monitors, this capability is generally achieved with internal or external scheduling as in \cite{Hoare74}. Since internal scheduling within a single monitor is mostly a solved problem, this thesis concentrates on extending internal scheduling to multiple monitors. Indeed, like the \gls{bulk-acq} semantics, internal scheduling extends to multiple monitors in a way that is natural to the user but requires additional complexity on the implementation side.
    227235
    228236First, here is a simple example of such a technique:
     
    248256\end{cfacode}
    249257
    250 There are two details to note here. First, the \code{signal} is a delayed operation, it only unblocks the waiting thread when it reaches the end of the critical section. This semantic is needed to respect mutual-exclusion. Second, in \CFA, a \code{condition} variable can be stored/created independently of a monitor. Here routine \code{foo} waits for the \code{signal} from \code{bar} before making further progress, effectively ensuring a basic ordering.
    251 
    252 An important aspect of the implementation is that \CFA does not allow barging, which means that once function \code{bar} releases the monitor, foo is guaranteed to resume immediately after (unless some other thread waited on the same condition). This guarantees offers the benefit of not having to loop arount waits in order to guarantee that a condition is still met. The main reason \CFA offers this guarantee is that users can easily introduce barging if it becomes a necessity but adding barging prevention or barging avoidance is more involved without language support. Supporting barging prevention as well as extending internal scheduling to multiple monitors is the main source of complexity in the design of \CFA concurrency.
     258There are two details to note here. First, the \code{signal} is a delayed operation, it only unblocks the waiting thread when it reaches the end of the critical section. This semantic is needed to respect mutual-exclusion. The alternative is to return immediately after the call to \code{signal}, which is significantly more restrictive. Second, in \CFA, while it is common to store a \code{condition} as a field of the monitor, a \code{condition} variable can be stored/created independently of a monitor. Here routine \code{foo} waits for the \code{signal} from \code{bar} before making further progress, effectively ensuring a basic ordering.
     259
     260An important aspect of the implementation is that \CFA does not allow barging, which means that once function \code{bar} releases the monitor, \code{foo} is guaranteed to resume immediately after (unless some other thread waited on the same condition). This guarantees offers the benefit of not having to loop arount waits in order to guarantee that a condition is still met. The main reason \CFA offers this guarantee is that users can easily introduce barging if it becomes a necessity but adding barging prevention or barging avoidance is more involved without language support. Supporting barging prevention as well as extending internal scheduling to multiple monitors is the main source of complexity in the design of \CFA concurrency.
    253261
    254262% ======================================================================
     
    257265% ======================================================================
    258266% ======================================================================
    259 It is easier to understand the problem of multi-monitor scheduling using a series of pseudo-code. Note that for simplicity in the following snippets of pseudo-code, waiting and signalling is done using an implicit condition variable, like Java built-in monitors. Indeed, \code{wait} statements always use a single condition as paremeter and waits on the monitors associated with the condition.
     267It is easier to understand the problem of multi-monitor scheduling using a series of pseudo-code. Note that for simplicity in the following snippets of pseudo-code, waiting and signalling is done using an implicit condition variable, like Java built-in monitors. Indeed, \code{wait} statements always use the implicit condition as paremeter and explicitly names the monitors (A and B) associated with the condition. Note that in \CFA, condition variables are tied to a set of monitors on first use (called branding) which means that using internal scheduling with distinct sets of monitors requires one condition variable per set of monitors.
    260268
    261269\begin{multicols}{2}
     
    295303\end{pseudo}
    296304\end{multicols}
    297 This version uses \gls{bulk-acq} (denoted using the \& symbol), but the presence of multiple monitors does not add a particularly new meaning. Synchronization happens between the two threads in exactly the same way and order. The only difference is that mutual exclusion covers more monitors. On the implementation side, handling multiple monitors does add a degree of complexity as the next few examples demonstrate.
    298 
    299 While deadlock issues can occur when nesting monitors, these issues are only a symptom of the fact that locks, and by extension monitors, are not perfectly composable. For monitors, a well known deadlock problem is the Nested Monitor Problem\cit, which occurs when a \code{wait} is made on a thread that holds more than one monitor. For example, the following pseudo-code will run into the nested monitor problem :
     305This version uses \gls{bulk-acq} (denoted using the {\sf\&} symbol), but the presence of multiple monitors does not add a particularly new meaning. Synchronization happens between the two threads in exactly the same way and order. The only difference is that mutual exclusion covers more monitors. On the implementation side, handling multiple monitors does add a degree of complexity as the next few examples demonstrate.
     306
     307While deadlock issues can occur when nesting monitors, these issues are only a symptom of the fact that locks, and by extension monitors, are not perfectly composable. For monitors, a well known deadlock problem is the Nested Monitor Problem \cite{Lister77}, which occurs when a \code{wait} is made by a thread that holds more than one monitor. For example, the following pseudo-code runs into the nested-monitor problem :
    300308\begin{multicols}{2}
    301309\begin{pseudo}
     
    317325\end{pseudo}
    318326\end{multicols}
     327
     328The \code{wait} only releases monitor \code{B} so the signalling thread cannot acquire monitor \code{A} to get to the \code{signal}. Attempting release of all acquired monitors at the \code{wait} results in another set of problems such as releasing monitor \code{C}, which has nothing to do with the \code{signal}.
     329
    319330However, for monitors as for locks, it is possible to write a program using nesting without encountering any problems if nesting is done correctly. For example, the next pseudo-code snippet acquires monitors {\sf A} then {\sf B} before waiting, while only acquiring {\sf B} when signalling, effectively avoiding the nested monitor problem.
    320331
     
    339350\end{multicols}
    340351
    341 Listing \ref{lst:int-bulk-pseudo} shows an example where \gls{bulk-acq} adds a significant layer of complexity to the internal signalling semantics. Listing \ref{lst:int-bulk-cfa} shows the corresponding \CFA code which implements the pseudo-code in listing \ref{lst:int-bulk-pseudo}. Note that listing \ref{lst:int-bulk-cfa} uses non-\code{mutex} parameter to introduce monitor \code{b} into context. However, for the purpose of translating the given pseudo-code into \CFA-code any method of introducing new monitors into context, other than a \code{mutex} parameter, is acceptable, e.g. global variables, pointer parameters or using locals with the \code{mutex}-statement.
     352% ======================================================================
     353% ======================================================================
     354\subsection{Internal Scheduling - in depth}
     355% ======================================================================
     356% ======================================================================
     357
     358A larger example is presented to show complex issuesfor \gls{bulk-acq} and all the implementation options are analyzed. Listing \ref{lst:int-bulk-pseudo} shows an example where \gls{bulk-acq} adds a significant layer of complexity to the internal signalling semantics, and listing \ref{lst:int-bulk-cfa} shows the corresponding \CFA code which implements the pseudo-code in listing \ref{lst:int-bulk-pseudo}. For the purpose of translating the given pseudo-code into \CFA-code any method of introducing monitor into context, other than a \code{mutex} parameter, is acceptable, e.g., global variables, pointer parameters or using locals with the \code{mutex}-statement.
    342359
    343360\begin{figure}[!b]
     
    376393
    377394\begin{figure}[!b]
     395\begin{center}
     396\begin{cfacode}[xleftmargin=.4\textwidth]
     397monitor A a;
     398monitor B b;
     399condition c;
     400\end{cfacode}
     401\end{center}
    378402\begin{multicols}{2}
    379403Waiting thread
    380404\begin{cfacode}
    381 monitor A;
    382 monitor B;
    383 extern condition c;
    384 void foo(A & mutex a, B & b) {
     405mutex(a) {
    385406        //Code Section 1
    386407        mutex(a, b) {
     
    397418Signalling thread
    398419\begin{cfacode}
    399 monitor A;
    400 monitor B;
    401 extern condition c;
    402 void foo(A & mutex a, B & b) {
     420mutex(a) {
    403421        //Code Section 5
    404422        mutex(a, b) {
     
    415433\end{figure}
    416434
    417 It is particularly important to pay attention to code sections 4 and 8, which are where the existing semantics of internal scheduling need to be extended for multiple monitors. The root of the problem is that \gls{bulk-acq} is used in a context where one of the monitors is already acquired and is why it is important to define the behaviour of the previous pseudo-code. When the signaller thread reaches the location where it should "release A \& B" (line 16), it must actually transfer ownership of monitor B to the waiting thread. This ownership trasnfer is required in order to prevent barging. Since the signalling thread still needs monitor A, simply waking up the waiting thread is not an option because it would violate mutual exclusion. There are three options.
     435The complexity begins at code sections 4 and 8, which are where the existing semantics of internal scheduling need to be extended for multiple monitors. The root of the problem is that \gls{bulk-acq} is used in a context where one of the monitors is already acquired and is why it is important to define the behaviour of the previous pseudo-code. When the signaller thread reaches the location where it should ``release \code{A & B}'' (line 16), it must actually transfer ownership of monitor \code{B} to the waiting thread. This ownership trasnfer is required in order to prevent barging. Since the signalling thread still needs monitor \code{A}, simply waking up the waiting thread is not an option because it violates mutual exclusion. There are three options.
    418436
    419437\subsubsection{Delaying signals}
    420 The first more obvious solution to solve the problem of multi-monitor scheduling is to keep ownership of all locks until the last lock is ready to be transferred. It can be argued that that moment is the correct time to transfer ownership when the last lock is no longer needed because this semantics fits most closely to the behaviour of single monitor scheduling. This solution has the main benefit of transferring ownership of groups of monitors, which simplifies the semantics from mutiple objects to a single group of objects, effectively making the existing single monitor semantic viable by simply changing monitors to monitor groups.
     438The obvious solution to solve the problem of multi-monitor scheduling is to keep ownership of all locks until the last lock is ready to be transferred. It can be argued that that moment is when the last lock is no longer needed because this semantics fits most closely to the behaviour of single-monitor scheduling. This solution has the main benefit of transferring ownership of groups of monitors, which simplifies the semantics from mutiple objects to a single group of objects, effectively making the existing single-monitor semantic viable by simply changing monitors to monitor groups.
    421439\begin{multicols}{2}
    422440Waiter
     
    443461\end{multicols}
    444462However, this solution can become much more complicated depending on what is executed while secretly holding B (at line 10). Indeed, nothing prevents signalling monitor A on a different condition variable:
    445 \begin{multicols}{2}
    446 Thread 1
     463\begin{figure}
     464\begin{multicols}{3}
     465Thread $\alpha$
    447466\begin{pseudo}[numbers=left, firstnumber=1]
    448467acquire A
     
    453472\end{pseudo}
    454473
    455 Thread 2
    456 \begin{pseudo}[numbers=left, firstnumber=6]
    457 acquire A
    458         wait A
    459 release A
    460 \end{pseudo}
    461 
    462474\columnbreak
    463475
    464 Thread 3
    465 \begin{pseudo}[numbers=left, firstnumber=9]
     476Thread $\gamma$
     477\begin{pseudo}[numbers=left, firstnumber=1]
    466478acquire A
    467479        acquire A & B
    468480                signal A & B
    469481        release A & B
    470         //Secretly keep B here
    471482        signal A
    472483release A
    473 //Wakeup thread 1 or 2?
    474 //Who wakes up the other thread?
    475 \end{pseudo}
     484\end{pseudo}
     485
     486\columnbreak
     487
     488Thread $\beta$
     489\begin{pseudo}[numbers=left, firstnumber=1]
     490acquire A
     491        wait A
     492release A
     493\end{pseudo}
     494
    476495\end{multicols}
     496\caption{Dependency graph}
     497\label{lst:dependency}
     498\end{figure}
    477499
    478500The goal in this solution is to avoid the need to transfer ownership of a subset of the condition monitors. However, this goal is unreacheable in the previous example. Depending on the order of signals (line 12 and 15) two cases can happen.
     
    484506Note that ordering is not determined by a race condition but by whether signalled threads are enqueued in FIFO or FILO order. However, regardless of the answer, users can move line 15 before line 11 and get the reverse effect.
    485507
    486 In both cases, the threads need to be able to distinguish, on a per monitor basis, which ones need to be released and which ones need to be transferred, which means monitors cannot be handled as a single homogenous group and therefore invalidates the main benefit of this approach.
     508In both cases, the threads need to be able to distinguish, on a per monitor basis, which ones need to be released and which ones need to be transferred, which means monitors cannot be handled as a single homogenous group and therefore effectively precludes this approach.
    487509
    488510\subsubsection{Dependency graphs}
    489 In the Listing 1 pseudo-code, there is a solution which statisfies both barging prevention and mutual exclusion. If ownership of both monitors is transferred to the waiter when the signaller releases A and then the waiter transfers back ownership of A when it releases it, then the problem is solved. Dynamically finding the correct order is therefore the second possible solution. The problem it encounters is that it effectively boils down to resolving a dependency graph of ownership requirements. Here even the simplest of code snippets requires two transfers and it seems to increase in a manner closer to polynomial. For example, the following code, which is just a direct extension to three monitors, requires at least three ownership transfer and has multiple solutions:
     511In the listing \ref{lst:int-bulk-pseudo} pseudo-code, there is a solution which statisfies both barging prevention and mutual exclusion. If ownership of both monitors is transferred to the waiter when the signaller releases \code{A & B} and then the waiter transfers back ownership of \code{A} when it releases it, then the problem is solved (\code{B} is no longer in use at this point). Dynamically finding the correct order is therefore the second possible solution. The problem it encounters is that it effectively boils down to resolving a dependency graph of ownership requirements. Here even the simplest of code snippets requires two transfers and it seems to increase in a manner closer to polynomial. For example, the following code, which is just a direct extension to three monitors, requires at least three ownership transfer and has multiple solutions:
    490512
    491513\begin{multicols}{2}
     
    514536
    515537\begin{figure}
    516 \begin{multicols}{3}
    517 Thread $\alpha$
    518 \begin{pseudo}[numbers=left, firstnumber=1]
    519 acquire A
    520         acquire A & B
    521                 wait A & B
    522         release A & B
    523 release A
    524 \end{pseudo}
    525 
    526 \columnbreak
    527 
    528 Thread $\gamma$
    529 \begin{pseudo}[numbers=left, firstnumber=1]
    530 acquire A
    531         acquire A & B
    532                 signal A & B
    533         release A & B
    534         signal A
    535 release A
    536 \end{pseudo}
    537 
    538 \columnbreak
    539 
    540 Thread $\beta$
    541 \begin{pseudo}[numbers=left, firstnumber=1]
    542 acquire A
    543         wait A
    544 release A
    545 \end{pseudo}
    546 
    547 \end{multicols}
    548 \caption{Dependency graph}
    549 \label{lst:dependency}
    550 \end{figure}
    551 
    552 \begin{figure}
    553538\begin{center}
    554539\input{dependency}
    555540\end{center}
     541\caption{Dependency graph of the statements in listing \ref{lst:dependency}}
    556542\label{fig:dependency}
    557 \caption{Dependency graph of the statements in listing \ref{lst:dependency}}
    558 \end{figure}
    559 
    560 Listing \ref{lst:dependency} is the three thread example rewritten for dependency graphs as well as the corresponding dependency graph. Figure \ref{fig:dependency} shows the corresponding dependency graph that results, where every node is a statement of one of the three threads, and the arrows the dependency of that statement. The extra challenge is that this dependency graph is effectively post-mortem, but the run time system needs to be able to build and solve these graphs as the dependency unfolds. Resolving dependency graph being a complex and expensive endeavour, this solution is not the preffered one.
     543\end{figure}
     544
     545Listing \ref{lst:dependency} is the three thread example rewritten for dependency graphs. Figure \ref{fig:dependency} shows the corresponding dependency graph that results, where every node is a statement of one of the three threads, and the arrows the dependency of that statement (e.g., $\alpha1$ must happen before $\alpha2$). The extra challenge is that this dependency graph is effectively post-mortem, but the runtime system needs to be able to build and solve these graphs as the dependency unfolds. Resolving dependency graph being a complex and expensive endeavour, this solution is not the preffered one.
    561546
    562547\subsubsection{Partial signalling} \label{partial-sig}
    563 Finally, the solution that is chosen for \CFA is to use partial signalling. Consider the following case:
    564 
    565 \begin{multicols}{2}
    566 \begin{pseudo}[numbers=left]
    567 acquire A
    568         acquire A & B
    569                 wait A & B
    570         release A & B
    571 release A
    572 \end{pseudo}
    573 
    574 \columnbreak
    575 
    576 \begin{pseudo}[numbers=left, firstnumber=6]
    577 acquire A
    578         acquire A & B
    579                 signal A & B
    580         release A & B
    581         //... More code
    582 release A
    583 \end{pseudo}
    584 \end{multicols}
    585 The partial signalling solution transfers ownership of monitor B at lines 10 but does not wake the waiting thread since it is still using monitor A. Only when it reaches line 11 does it actually wakeup the waiting thread. This solution has the benefit that complexity is encapsulated into only two actions, passing monitors to the next owner when they should be release and conditionally waking threads if all conditions are met. This solution has a much simpler implementation than a dependency graph solving algorithm which is why it was chosen.
     548Finally, the solution that is chosen for \CFA is to use partial signalling. Again using listing \ref{lst:int-bulk-pseudo}, the partial signalling solution transfers ownership of monitor B at lines 10 but does not wake the waiting thread since it is still using monitor A. Only when it reaches line 11 does it actually wakeup the waiting thread. This solution has the benefit that complexity is encapsulated into only two actions, passing monitors to the next owner when they should be release and conditionally waking threads if all conditions are met. This solution has a much simpler implementation than a dependency graph solving algorithm which is why it was chosen. Furthermore, after being fully implemented, this solution does not appear to have any downsides worth mentionning.
    586549
    587550% ======================================================================
     
    590553% ======================================================================
    591554% ======================================================================
    592 An important note is that, until now, signalling a monitor was a delayed operation. The ownership of the monitor is transferred only when the monitor would have otherwise been released, not at the point of the \code{signal} statement. However, in some cases, it may be more convenient for users to immediately transfer ownership to the thread that is waiting for cooperation, which is achieved using the \code{signal_block} routine\footnote{name to be discussed}.
    593 
    594 The example in listing \ref{lst:datingservice} highlights the difference in behaviour. As mentioned, \code{signal} only transfers ownership once the current critical section exits, this behaviour cause the need for additional synchronisation when a two-way handshake is needed. To avoid this extraneous synchronisation, the \code{condition} type offers the \code{signal_block} routine which handle two-way handshakes as shown in the example. This removes the need for a second condition variables and simplifies programming. Like every other monitor semantic, \code{signal_block} uses barging prevention which means mutual-exclusion is baton-passed both on the frond-end and the back-end of the call to \code{signal_block}, meaning no other thread can acquire the monitor neither before nor after the call.
    595555\begin{figure}
    596556\begin{tabular}{|c|c|}
     
    622582                girlPhoneNo = phoneNo;
    623583
    624                 //wake boy fron chair
     584                //wake boy from chair
    625585                signal(exchange);
    626586        }
     
    669629                girlPhoneNo = phoneNo;
    670630
    671                 //wake boy fron chair
     631                //wake boy from chair
    672632                signal(exchange);
    673633        }
     
    696656\label{lst:datingservice}
    697657\end{figure}
     658An important note is that, until now, signalling a monitor was a delayed operation. The ownership of the monitor is transferred only when the monitor would have otherwise been released, not at the point of the \code{signal} statement. However, in some cases, it may be more convenient for users to immediately transfer ownership to the thread that is waiting for cooperation, which is achieved using the \code{signal_block} routine\footnote{name to be discussed}.
     659
     660The example in listing \ref{lst:datingservice} highlights the difference in behaviour. As mentioned, \code{signal} only transfers ownership once the current critical section exits, this behaviour requires additional synchronisation when a two-way handshake is needed. To avoid this extraneous synchronisation, the \code{condition} type offers the \code{signal_block} routine, which handles the two-way handshake as shown in the example. This removes the need for a second condition variables and simplifies programming. Like every other monitor semantic, \code{signal_block} uses barging prevention, which means mutual-exclusion is baton-passed both on the frond-end and the back-end of the call to \code{signal_block}, meaning no other thread can acquire the monitor neither before nor after the call.
    698661
    699662% ======================================================================
     
    702665% ======================================================================
    703666% ======================================================================
    704 An alternative to internal scheduling is to use external scheduling.
     667An alternative to internal scheduling is external scheduling, e.g., in \uC.
    705668\begin{center}
    706 \begin{tabular}{|c|c|}
    707 Internal Scheduling & External Scheduling \\
     669\begin{tabular}{|c|c|c|}
     670Internal Scheduling & External Scheduling & Go\\
    708671\hline
    709 \begin{ucppcode}
     672\begin{ucppcode}[tabsize=3]
    710673_Monitor Semaphore {
    711674        condition c;
     
    713676public:
    714677        void P() {
    715                 if(inUse) wait(c);
     678                if(inUse)
     679                        wait(c);
    716680                inUse = true;
    717681        }
     
    721685        }
    722686}
    723 \end{ucppcode}&\begin{ucppcode}
     687\end{ucppcode}&\begin{ucppcode}[tabsize=3]
    724688_Monitor Semaphore {
    725689
     
    727691public:
    728692        void P() {
    729                 if(inUse) _Accept(V);
     693                if(inUse)
     694                        _Accept(V);
    730695                inUse = true;
    731696        }
     
    735700        }
    736701}
    737 \end{ucppcode}
     702\end{ucppcode}&\begin{gocode}[tabsize=3]
     703type MySem struct {
     704        inUse bool
     705        c     chan bool
     706}
     707
     708// acquire
     709func (s MySem) P() {
     710        if s.inUse {
     711                select {
     712                case <-s.c:
     713                }
     714        }
     715        s.inUse = true
     716}
     717
     718// release
     719func (s MySem) V() {
     720        s.inUse = false
     721
     722        //This actually deadlocks
     723        //when single thread
     724        s.c <- false
     725}
     726\end{gocode}
    738727\end{tabular}
    739728\end{center}
    740 This method is more constrained and explicit, which helps users tone down the undeterministic nature of concurrency. Indeed, as the following examples demonstrates, external scheduling allows users to wait for events from other threads without the concern of unrelated events occuring. External scheduling can generally be done either in terms of control flow (e.g., \uC with \code{_Accept}) or in terms of data (e.g. Go with channels). Of course, both of these paradigms have their own strenghts and weaknesses but for this project control-flow semantics were chosen to stay consistent with the rest of the languages semantics. Two challenges specific to \CFA arise when trying to add external scheduling with loose object definitions and multi-monitor routines. The previous example shows a simple use \code{_Accept} versus \code{wait}/\code{signal} and its advantages. Note that while other languages often use \code{accept}/\code{select} as the core external scheduling keyword, \CFA uses \code{waitfor} to prevent name collisions with existing socket \acrshort{api}s.
    741 
    742 In the case of internal scheduling, the call to \code{wait} only guarantees that \code{V} is the last routine to access the monitor. This entails that a third routine, say \code{isInUse()}, may have acquired mutual exclusion several times while routine \code{P} was waiting. On the other hand, external scheduling guarantees that while routine \code{P} was waiting, no routine other than \code{V} could acquire the monitor.
     729This method is more constrained and explicit, which helps users tone down the undeterministic nature of concurrency. Indeed, as the following examples demonstrates, external scheduling allows users to wait for events from other threads without the concern of unrelated events occuring. External scheduling can generally be done either in terms of control flow (e.g., \uC with \code{_Accept}) or in terms of data (e.g., Go with channels). Of course, both of these paradigms have their own strenghts and weaknesses but for this project control-flow semantics were chosen to stay consistent with the rest of the languages semantics. Two challenges specific to \CFA arise when trying to add external scheduling with loose object definitions and multi-monitor routines. The previous example shows a simple use \code{_Accept} versus \code{wait}/\code{signal} and its advantages. Note that while other languages often use \code{accept}/\code{select} as the core external scheduling keyword, \CFA uses \code{waitfor} to prevent name collisions with existing socket \acrshort{api}s.
     730
     731For the \code{P} member above using internal scheduling, the call to \code{wait} only guarantees that \code{V} is the last routine to access the monitor, allowing a third routine, say \code{isInUse()}, acquire mutual exclusion several times while routine \code{P} is waiting. On the other hand, external scheduling guarantees that while routine \code{P} is waiting, no routine other than \code{V} can acquire the monitor.
    743732
    744733% ======================================================================
     
    747736% ======================================================================
    748737% ======================================================================
    749 In \uC, monitor declarations include an exhaustive list of monitor operations. Since \CFA is not object oriented it becomes both more difficult to implement but also less clear for the user:
     738In \uC, monitor declarations include an exhaustive list of monitor operations. Since \CFA is not object oriented, monitors become both more difficult to implement and less clear for a user:
    750739
    751740\begin{cfacode}
     
    782771For the first two conditions, it is easy to implement a check that can evaluate the condition in a few instruction. However, a fast check for \pscode{monitor accepts me} is much harder to implement depending on the constraints put on the monitors. Indeed, monitors are often expressed as an entry queue and some acceptor queue as in the following figure:
    783772
     773\begin{figure}[H]
    784774\begin{center}
    785775{\resizebox{0.4\textwidth}{!}{\input{monitor}}}
    786776\end{center}
    787 
    788 There are other alternatives to these pictures, but in the case of this picture, implementing a fast accept check is relatively easy. Indeed simply updating a bitmask when the acceptor queue changes is enough to have a check that executes in a single instruction, even with a fairly large number (e.g. 128) of mutex members. This technique cannot be used in \CFA because it relies on the fact that the monitor type declares all the acceptable routines. For OO languages this does not compromise much since monitors already have an exhaustive list of member routines. However, for \CFA this is not the case; routines can be added to a type anywhere after its declaration. Its important to note that the bitmask approach does not actually require an exhaustive list of routines, but it requires a dense unique ordering of routines with an upper-bound and that ordering must be consistent across translation units.
    789 The alternative is to have a picture like this one:
     777\label{fig:monitor}
     778\end{figure}
     779
     780There are other alternatives to these pictures, but in the case of this picture, implementing a fast accept check is relatively easy. Restricted to a fixed number of mutex members, N, the accept check reduces to updating a bitmask when the acceptor queue changes, a check that executes in a single instruction even with a fairly large number (e.g., 128) of mutex members. This technique cannot be used in \CFA because it relies on the fact that the monitor type enumerates (declares) all the acceptable routines. For OO languages this does not compromise much since monitors already have an exhaustive list of member routines. However, for \CFA this is not the case; routines can be added to a type anywhere after its declaration. It is important to note that the bitmask approach does not actually require an exhaustive list of routines, but it requires a dense unique ordering of routines with an upper-bound and that ordering must be consistent across translation units.
     781The alternative is to alter the implementeation like this:
    790782
    791783\begin{center}
     
    793785\end{center}
    794786
    795 Not storing the mask inside the monitor means that the storage for the mask information can vary between calls to \code{waitfor}, allowing for more flexibility and extensions. Storing an array of function-pointers would solve the issue of uniquely identifying acceptable routines. However, the single instruction bitmask compare has been replaced by dereferencing a pointer followed by a linear search. Furthermore, supporting nested external scheduling may now require additionnal searches on calls to waitfor to check if a routine is already queued in.
    796 
    797 Note that in the second picture, tasks need to always keep track of through which routine they are attempting to acquire the monitor and the routine mask needs to have both a function pointer and a set of monitors, as will be discussed in the next section. These details where omitted from the picture for the sake of simplifying the representation.
    798 
    799 At this point we must make a decision between flexibility and performance. Many design decisions in \CFA achieve both flexibility and performance, for example polymorphic routines add significant flexibility but inlining them means the optimizer can easily remove any runtime cost. Here however, the cost of flexibility cannot be trivially removed. In the end, the most flexible approach has been chosen since it allows users to write programs that would otherwise be prohibitively hard to write. This decision is based on the assumption that writing fast but inflexible locks is closer to a solved problems than writing locks that are as flexible as external scheduling in \CFA.
     787Generating a mask dynamically means that the storage for the mask information can vary between calls to \code{waitfor}, allowing for more flexibility and extensions. Storing an array of accepted function-pointers replaces the single instruction bitmask compare with dereferencing a pointer followed by a linear search. Furthermore, supporting nested external scheduling (e.g., listing \ref{lst:nest-ext}) may now require additionnal searches on calls to \code{waitfor} statement to check if a routine is already queued in.
     788
     789\begin{figure}
     790\begin{cfacode}
     791monitor M {};
     792void foo( M & mutex a ) {}
     793void bar( M & mutex b ) {
     794        //Nested in the waitfor(bar, c) call
     795        waitfor(foo, b);
     796}
     797void baz( M & mutex c ) {
     798        waitfor(bar, c);
     799}
     800
     801\end{cfacode}
     802\caption{Example of nested external scheduling}
     803\label{lst:nest-ext}
     804\end{figure}
     805
     806Note that in the second picture, tasks need to always keep track of which routine they are attempting to acquire the monitor and the routine mask needs to have both a function pointer and a set of monitors, as will be discussed in the next section. These details where omitted from the picture for the sake of simplifying the representation.
     807
     808At this point, a decision must be made between flexibility and performance. Many design decisions in \CFA achieve both flexibility and performance, for example polymorphic routines add significant flexibility but inlining them means the optimizer can easily remove any runtime cost. Here however, the cost of flexibility cannot be trivially removed. In the end, the most flexible approach has been chosen since it allows users to write programs that would otherwise be prohibitively hard to write. This decision is based on the assumption that writing fast but inflexible locks is closer to a solved problems than writing locks that are as flexible as external scheduling in \CFA.
    800809
    801810% ======================================================================
     
    811820void f(M & mutex a);
    812821
    813 void g(M & mutex a, M & mutex b) {
    814         waitfor(f); //ambiguous, keep a pass b or other way around?
     822void g(M & mutex b, M & mutex c) {
     823        waitfor(f); //two monitors M => unkown which to pass to f(M & mutex)
    815824}
    816825\end{cfacode}
     
    828837\end{cfacode}
    829838
    830 This syntax is unambiguous. Both locks are acquired and kept. When routine \code{f} is called, the lock for monitor \code{b} is temporarily transferred from \code{g} to \code{f} (while \code{g} still holds lock \code{a}). This behavior can be extended to multi-monitor waitfor statement as follows.
     839This syntax is unambiguous. Both locks are acquired and kept by \code{g}. When routine \code{f} is called, the lock for monitor \code{b} is temporarily transferred from \code{g} to \code{f} (while \code{g} still holds lock \code{a}). This behavior can be extended to multi-monitor \code{waitfor} statement as follows.
    831840
    832841\begin{cfacode}
     
    842851Note that the set of monitors passed to the \code{waitfor} statement must be entirely contained in the set of monitors already acquired in the routine. \code{waitfor} used in any other context is Undefined Behaviour.
    843852
    844 An important behavior to note is that what happens when a set of monitors only match partially :
     853An important behavior to note is when a set of monitors only match partially :
    845854
    846855\begin{cfacode}
     
    865874\end{cfacode}
    866875
    867 While the equivalent can happen when using internal scheduling, the fact that conditions are specific to a set of monitors means that users have to use two different condition variables. In both cases, partially matching monitor sets does not wake-up the waiting thread. It is also important to note that in the case of external scheduling, as for routine calls, the order of parameters is important; \code{waitfor(f,a,b)} and \code{waitfor(f,b,a)} are to distinct waiting condition.
     876While the equivalent can happen when using internal scheduling, the fact that conditions are specific to a set of monitors means that users have to use two different condition variables. In both cases, partially matching monitor sets does not wake-up the waiting thread. It is also important to note that in the case of external scheduling, as for routine calls, the order of parameters is irrelevant; \code{waitfor(f,a,b)} and \code{waitfor(f,b,a)} are indistinguishable waiting condition.
    868877
    869878% ======================================================================
     
    873882% ======================================================================
    874883
    875 Syntactically, the \code{waitfor} statement takes a function identifier and a set of monitors. While the set of monitors can be any list of expression, the function name is more restricted. This is because the compiler validates at compile time the validity of the waitfor statement. It checks that the set of monitor passed in matches the requirements for a function call. Listing \ref{lst:waitfor} shows various usage of the waitfor statement and which are acceptable. The choice of the function type is made ignoring any non-\code{mutex} parameter. One limitation of the current implementation is that it does not handle overloading.
     884Syntactically, the \code{waitfor} statement takes a function identifier and a set of monitors. While the set of monitors can be any list of expression, the function name is more restricted because the compiler validates at compile time the validity of the function type and the parameters used with the \code{waitfor} statement. It checks that the set of monitor passed in matches the requirements for a function call. Listing \ref{lst:waitfor} shows various usage of the waitfor statement and which are acceptable. The choice of the function type is made ignoring any non-\code{mutex} parameter. One limitation of the current implementation is that it does not handle overloading.
    876885\begin{figure}
    877886\begin{cfacode}
     
    898907        waitfor(f2, a1, a2); //Incorrect : Mutex arguments don't match
    899908        waitfor(f1, 1);      //Incorrect : 1 not a mutex argument
    900         waitfor(f4, a1);     //Incorrect : f9 not a function
    901         waitfor(*fp, a1 );   //Incorrect : fp not a identifier
     909        waitfor(f9, a1);     //Incorrect : f9 function does not exist
     910        waitfor(*fp, a1 );   //Incorrect : fp not an identifier
    902911        waitfor(f4, a1);     //Incorrect : f4 ambiguous
    903912
     
    909918\end{figure}
    910919
    911 Finally, for added flexibility, \CFA supports constructing complex waitfor mask using the \code{or}, \code{timeout} and \code{else}. Indeed, multiple \code{waitfor} can be chained together using \code{or}; this chain will form a single statement which will baton-pass to any one function that fits one of the function+monitor set which was passed in. To eanble users to tell which was the accepted function, \code{waitfor}s are followed by a statement (including the null statement \code{;}) or a compound statement. When multiple \code{waitfor} are chained together, only the statement corresponding to the accepted function is executed. A \code{waitfor} chain can also be followed by a \code{timeout}, to signify an upper bound on the wait, or an \code{else}, to signify that the call should be non-blocking, that is only check of a matching function already arrived and return immediately otherwise. Any and all of these clauses can be preceded by a \code{when} condition to dynamically construct the mask based on some current state. Listing \ref{lst:waitfor2}, demonstrates several complex masks and some incorrect ones.
     920Finally, for added flexibility, \CFA supports constructing complex \code{waitfor} mask using the \code{or}, \code{timeout} and \code{else}. Indeed, multiple \code{waitfor} can be chained together using \code{or}; this chain forms a single statement that uses baton-pass to any one function that fits one of the function+monitor set passed in. To eanble users to tell which accepted function is accepted, \code{waitfor}s are followed by a statement (including the null statement \code{;}) or a compound statement. When multiple \code{waitfor} are chained together, only the statement corresponding to the accepted function is executed. A \code{waitfor} chain can also be followed by a \code{timeout}, to signify an upper bound on the wait, or an \code{else}, to signify that the call should be non-blocking, that is only check of a matching function call already arrived and return immediately otherwise. Any and all of these clauses can be preceded by a \code{when} condition to dynamically construct the mask based on some current state. Listing \ref{lst:waitfor2}, demonstrates several complex masks and some incorrect ones.
    912921
    913922\begin{figure}
     
    973982\label{lst:waitfor2}
    974983\end{figure}
     984
     985% ======================================================================
     986% ======================================================================
     987\subsection{Waiting for the destructor}
     988% ======================================================================
     989% ======================================================================
     990An interesting use for the \code{waitfor} statement is destructor semantics. Indeed, the \code{waitfor} statement can accept any \code{mutex} routine, which includes the destructor (see section \ref{data}). However, with the semantics discussed until now, waiting for the destructor does not make any sense since using an object after its destructor is called is undefined behaviour. The simplest approach is to disallow \code{waitfor} on a destructor. However, a more expressive approach is to flip execution ordering when waiting for the destructor, meaning that waiting for the destructor allows the destructor to run after the current \code{mutex} routine, similarly to how a condition is signalled.
     991\begin{figure}
     992\begin{cfacode}
     993monitor Executer {};
     994struct  Action;
     995
     996void ^?{}   (Executer & mutex this);
     997void execute(Executer & mutex this, const Action & );
     998void run    (Executer & mutex this) {
     999        while(true) {
     1000                   waitfor(execute, this);
     1001                or waitfor(^?{}   , this) {
     1002                        break;
     1003                }
     1004        }
     1005}
     1006\end{cfacode}
     1007\caption{Example of an executor which executes action in series until the destructor is called.}
     1008\label{lst:dtor-order}
     1009\end{figure}
     1010For example, listing \ref{lst:dtor-order} shows an example of an executor with an infinite loop, which waits for the destructor to break out of this loop. Switching the semantic meaning introduces an idiomatic way to terminate a task and/or wait for its termination via destruction.
  • doc/proposals/concurrency/text/future.tex

    rf5c3b6c r0fe4e62  
    55% ======================================================================
    66
    7 Concurrency and parallelism is still a very active field that strongly benefits from hardware advances. As such certain features that aren't necessarily mature enough in their current state could become relevant in the lifetime of \CFA.
    8 \section{Non-Blocking IO}
     7\section{Flexible Scheduling} \label{futur:sched}
     8An important part of concurrency is scheduling. Different scheduling algorithm can affact peformance (both in terms of average and variation). However, no single scheduler is optimal for all workloads and therefore there is value in being able to change the scheduler for given programs. One solution is to offer various tweaking options to users, allowing the scheduler to be adjusted the to requirements of the workload. However, in order to be truly flexible, it would be interesting to allow users to add arbitrary data and arbirary scheduling algorithms to the scheduler. For example, a web server could attach Type-of-Service information to threads and have a ``ToS aware'' scheduling algorithm tailored to this specific web server. This path of flexible schedulers will be explored for \CFA.
     9
     10\section{Non-Blocking IO} \label{futur:nbio}
     11While most of the parallelism tools
     12However, many modern workloads are not bound on computation but on IO operations, an common case being webservers and XaaS (anything as a service). These type of workloads often require significant engineering around amortising costs of blocking IO operations. While improving throughtput of these operations is outside what \CFA can do as a language, it can help users to make better use of the CPU time otherwise spent waiting on IO operations. The current trend is to use asynchronous programming using tools like callbacks and/or futurs and promises\cite. However, while these are valid solutions, they lead to code that is harder to read and maintain because it is much less linear
     13
     14\section{Other concurrency tools} \label{futur:tools}
     15While monitors offer a flexible and powerful concurent core for \CFA, other concurrency tools are also necessary for a complete multi-paradigm concurrency package. Example of such tools can include simple locks and condition variables, futures and promises\cite{promises}, and executors. These additional features are useful when monitors offer a level of abstraction which is indaquate for certain tasks.
     16
     17\section{Implicit threading} \label{futur:implcit}
     18Simpler applications can benefit greatly from having implicit parallelism. That is, parallelism that does not rely on the user to write concurrency. This type of parallelism can be achieved both at the language level and at the library level. The cannonical example of implcit parallelism is parallel for loops, which are the simplest example of a divide and conquer algorithm\cite{uC++book}. Listing \ref{lst:parfor} shows three different code examples that accomplish pointwise sums of large arrays. Note that none of these example explicitly declare any concurrency or parallelism objects.
     19
     20\begin{figure}
     21\begin{center}
     22\begin{tabular}[t]{|c|c|c|}
     23Sequential & Library Parallel & Language Parallel \\
     24\begin{cfacode}[tabsize=3]
     25void big_sum(
     26        int* a, int* b,
     27        int* o,
     28        size_t len)
     29{
     30        for(
     31                int i = 0;
     32                i < len;
     33                ++i )
     34        {
     35                o[i]=a[i]+b[i];
     36        }
     37}
    938
    1039
    11 \section{Other concurrency tools}
    1240
    1341
    14 \section{Implicit threading}
    15 % Finally, simpler applications can benefit greatly from having implicit parallelism. That is, parallelism that does not rely on the user to write concurrency. This type of parallelism can be achieved both at the language level and at the system level.
    16 %
    17 % \begin{center}
    18 % \begin{tabular}[t]{|c|c|c|}
    19 % Sequential & System Parallel & Language Parallel \\
    20 % \begin{lstlisting}
    21 % void big_sum(int* a, int* b,
    22 %                int* out,
    23 %                size_t length)
    24 % {
    25 %       for(int i = 0; i < length; ++i ) {
    26 %               out[i] = a[i] + b[i];
    27 %       }
    28 % }
    29 %
    30 %
    31 %
    32 %
    33 %
    34 % int* a[10000];
    35 % int* b[10000];
    36 % int* c[10000];
    37 % //... fill in a and b ...
    38 % big_sum(a, b, c, 10000);
    39 % \end{lstlisting} &\begin{lstlisting}
    40 % void big_sum(int* a, int* b,
    41 %                int* out,
    42 %                size_t length)
    43 % {
    44 %       range ar(a, a + length);
    45 %       range br(b, b + length);
    46 %       range or(out, out + length);
    47 %       parfor( ai, bi, oi,
    48 %       [](int* ai, int* bi, int* oi) {
    49 %               oi = ai + bi;
    50 %       });
    51 % }
    52 %
    53 % int* a[10000];
    54 % int* b[10000];
    55 % int* c[10000];
    56 % //... fill in a and b ...
    57 % big_sum(a, b, c, 10000);
    58 % \end{lstlisting}&\begin{lstlisting}
    59 % void big_sum(int* a, int* b,
    60 %                int* out,
    61 %                size_t length)
    62 % {
    63 %       for (ai, bi, oi) in (a, b, out) {
    64 %               oi = ai + bi;
    65 %       }
    66 % }
    67 %
    68 %
    69 %
    70 %
    71 %
    72 % int* a[10000];
    73 % int* b[10000];
    74 % int* c[10000];
    75 % //... fill in a and b ...
    76 % big_sum(a, b, c, 10000);
    77 % \end{lstlisting}
    78 % \end{tabular}
    79 % \end{center}
    80 %
     42
     43int* a[10000];
     44int* b[10000];
     45int* c[10000];
     46//... fill in a & b
     47big_sum(a,b,c,10000);
     48\end{cfacode} &\begin{cfacode}[tabsize=3]
     49void big_sum(
     50        int* a, int* b,
     51        int* o,
     52        size_t len)
     53{
     54        range ar(a, a+len);
     55        range br(b, b+len);
     56        range or(o, o+len);
     57        parfor( ai, bi, oi,
     58        [](     int* ai,
     59                int* bi,
     60                int* oi)
     61        {
     62                oi=ai+bi;
     63        });
     64}
    8165
    8266
    83 \section{Multiple Paradigms}
     67int* a[10000];
     68int* b[10000];
     69int* c[10000];
     70//... fill in a & b
     71big_sum(a,b,c,10000);
     72\end{cfacode}&\begin{cfacode}[tabsize=3]
     73void big_sum(
     74        int* a, int* b,
     75        int* o,
     76        size_t len)
     77{
     78        parfor (ai,bi,oi)
     79            in (a, b, o )
     80        {
     81                oi = ai + bi;
     82        }
     83}
    8484
    8585
    86 \section{Transactions}
     86
     87
     88
     89
     90
     91int* a[10000];
     92int* b[10000];
     93int* c[10000];
     94//... fill in a & b
     95big_sum(a,b,c,10000);
     96\end{cfacode}
     97\end{tabular}
     98\end{center}
     99\caption{For loop to sum numbers: Sequential, using library parallelism and language parallelism.}
     100\label{lst:parfor}
     101\end{figure}
     102
     103Implicit parallelism is a general solution and therefore has its limitations. However, it is a quick and simple approach to parallelism which may very well be sufficient for smaller applications and reduces the amount of boiler-plate that is needed to start benefiting from parallelism in modern CPUs.
     104
     105
  • doc/proposals/concurrency/text/internals.tex

    rf5c3b6c r0fe4e62  
    11
    22\chapter{Behind the scene}
    3 
    4 
    5 % ======================================================================
    6 % ======================================================================
    7 \section{Implementation Details: Interaction with polymorphism}
    8 % ======================================================================
    9 % ======================================================================
    10 Depending on the choice of semantics for when monitor locks are acquired, interaction between monitors and \CFA's concept of polymorphism can be complex to support. However, it is shown that entry-point locking solves most of the issues.
    11 
    12 First of all, interaction between \code{otype} polymorphism and monitors is impossible since monitors do not support copying. Therefore, the main question is how to support \code{dtype} polymorphism. Since a monitor's main purpose is to ensure mutual exclusion when accessing shared data, this implies that mutual exclusion is only required for routines that do in fact access shared data. However, since \code{dtype} polymorphism always handles incomplete types (by definition), no \code{dtype} polymorphic routine can access shared data since the data requires knowledge about the type. Therefore, the only concern when combining \code{dtype} polymorphism and monitors is to protect access to routines.
    13 
    14 Before looking into complex control-flow, it is important to present the difference between the two acquiring options : callsite and entry-point locking, i.e. acquiring the monitors before making a mutex routine call or as the first operation of the mutex routine-call. For example:
     3There are several challenges specific to \CFA when implementing concurrency. These challenges are a direct result of \gls{bulk-acq} and loose object-definitions. These two constraints are the root cause of most design decisions in the implementation. Furthermore, to avoid contention from dynamically allocating memory in a concurrent environment, the internal-scheduling design is (almost) entirely free of mallocs. This is to avoid the chicken and egg problem \cite{Chicken} of having a memory allocator that relies on the threading system and a threading system that relies on the runtime. This extra goal, means that memory management is a constant concern in the design of the system.
     4
     5The main memory concern for concurrency is queues. All blocking operations are made by parking threads onto queues. The queue design needs to be intrusive\cite{IntrusiveData} to avoid the need for memory allocation, which entails that all the nodes need specific fields to keep track of all needed information. Since many concurrency operations can use an unbound amount of memory (depending on \gls{bulk-acq}), statically defining information in the intrusive fields of threads is insufficient. The only variable sized container that does not require memory allocation is the callstack, which is heavily used in the implementation of internal scheduling. Particularly variable length arrays, which are used extensively.
     6
     7Since stack allocation is based around scope, the first step of the implementation is to identify the scopes that are available to store the information, and which of these can have a variable length. The threads and the condition both allow a fixed amount of memory to be stored, while mutex-routines and the actual blocking call allow for an unbound amount (though the later is preferable in terms of performance).
     8
     9Note that since the major contributions of this thesis are extending monitor semantics to \gls{bulk-acq} and loose object definitions, any challenges that are not resulting of these characteristiques of \CFA are considered as solved problems and therefore not discussed further.
     10
     11% ======================================================================
     12% ======================================================================
     13\section{Mutex routines}
     14% ======================================================================
     15% ======================================================================
     16
     17The first step towards the monitor implementation is simple mutex-routines using monitors. In the single monitor case, this is done using the entry/exit procedure highlighted in listing \ref{lst:entry1}. This entry/exit procedure does not actually have to be extended to support multiple monitors, indeed it is sufficient to enter/leave monitors one-by-one as long as the order is correct to prevent deadlocks\cite{Havender68}. In \CFA, ordering of monitor relies on memory ordering, this is sufficient because all objects are guaranteed to have distinct non-overlaping memory layouts and mutual-exclusion for a monitor is only defined for its lifetime, meaning that destroying a monitor while it is acquired is undefined behavior. When a mutex call is made, the concerned monitors are agregated into a variable-length pointer array and sorted based on pointer values. This array presists for the entire duration of the mutual-exclusion and its ordering reused extensively.
    1518\begin{figure}
    16 \label{fig:locking-site}
    17 \begin{center}
    18 \setlength\tabcolsep{1.5pt}
     19\begin{multicols}{2}
     20Entry
     21\begin{pseudo}
     22if monitor is free
     23        enter
     24elif already own the monitor
     25        continue
     26else
     27        block
     28increment recursions
     29\end{pseudo}
     30\columnbreak
     31Exit
     32\begin{pseudo}
     33decrement recursion
     34if recursion == 0
     35        if entry queue not empty
     36                wake-up thread
     37\end{pseudo}
     38\end{multicols}
     39\caption{Initial entry and exit routine for monitors}
     40\label{lst:entry1}
     41\end{figure}
     42
     43\subsection{ Details: Interaction with polymorphism}
     44Depending on the choice of semantics for when monitor locks are acquired, interaction between monitors and \CFA's concept of polymorphism can be more complex to support. However, it is shown that entry-point locking solves most of the issues.
     45
     46First of all, interaction between \code{otype} polymorphism and monitors is impossible since monitors do not support copying. Therefore, the main question is how to support \code{dtype} polymorphism. It is important to present the difference between the two acquiring options : callsite and entry-point locking, i.e. acquiring the monitors before making a mutex routine call or as the first operation of the mutex routine-call. For example:
     47\begin{figure}[H]
     48\begin{center}
    1949\begin{tabular}{|c|c|c|}
    2050Mutex & \gls{callsite-locking} & \gls{entry-point-locking} \\
     
    6696\end{tabular}
    6797\end{center}
    68 \caption{Callsite vs entry-point locking for mutex calls}
    69 \end{figure}
    70 
    71 
    72 Note the \code{mutex} keyword relies on the type system, which means that in cases where a generic monitor routine is actually desired, writing a mutex routine is possible with the proper trait, which is possible because monitors are designed in terms a trait. For example:
     98\caption{Call-site vs entry-point locking for mutex calls}
     99\label{fig:locking-site}
     100\end{figure}
     101
     102Note the \code{mutex} keyword relies on the type system, which means that in cases where a generic monitor routine is desired, writing the mutex routine is possible with the proper trait, for example:
    73103\begin{cfacode}
    74 //Incorrect: T is not a monitor
     104//Incorrect: T may not be monitor
    75105forall(dtype T)
    76106void foo(T * mutex t);
     
    81111\end{cfacode}
    82112
    83 
    84 % ======================================================================
    85 % ======================================================================
    86 \section{Internal scheduling: Implementation} \label{inschedimpl}
    87 % ======================================================================
    88 % ======================================================================
    89 There are several challenges specific to \CFA when implementing internal scheduling. These challenges are direct results of \gls{bulk-acq} and loose object definitions. These two constraints are to root cause of most design decisions in the implementation of internal scheduling. Furthermore, to avoid the head-aches of dynamically allocating memory in a concurrent environment, the internal-scheduling design is entirely free of mallocs and other dynamic memory allocation scheme. This is to avoid the chicken and egg problem \cite{Chicken} of having a memory allocator that relies on the threading system and a threading system that relies on the runtime. This extra goal, means that memory management is a constant concern in the design of the system.
    90 
    91 The main memory concern for concurrency is queues. All blocking operations are made by parking threads onto queues. These queues need to be intrinsic\cit to avoid the need memory allocation. This entails that all the fields needed to keep track of all needed information. Since internal scheduling can use an unbound amount of memory (depending on \gls{bulk-acq}) statically defining information information in the intrusive fields of threads is insufficient. The only variable sized container that does not require memory allocation is the callstack, which is heavily used in the implementation of internal scheduling. Particularly the GCC extension variable length arrays which is used extensively.
    92 
    93 Since stack allocation is based around scope, the first step of the implementation is to identify the scopes that are available to store the information, and which of these can have a variable length. In the case of external scheduling, the threads and the condition both allow a fixed amount of memory to be stored, while mutex-routines and the actual blocking call allow for an unbound amount (though adding too much to the mutex routine stack size can become expansive faster).
    94 
    95 The following figure is the traditionnal illustration of a monitor :
    96 
     113Both entry-point and callsite locking are feasible implementations. The current \CFA implementations uses entry-point locking because it requires less work when using \gls{raii}, effectively transferring the burden of implementation to object construction/destruction. The same could be said of callsite locking, the difference being that the later does not necessarily have an existing scope that matches exactly the scope of the mutual exclusion, i.e.: the function body. Furthermore, entry-point locking requires less code generation since any useful routine is called at least as often as it is define, there can be only one entry-point but many callsites.
     114
     115% ======================================================================
     116% ======================================================================
     117\section{Threading} \label{impl:thread}
     118% ======================================================================
     119% ======================================================================
     120
     121Figure \ref{fig:system1} shows a high-level picture if the \CFA runtime system in regards to concurrency. Each component of the picture is explained in details in the fllowing sections.
     122
     123\begin{figure}
     124\begin{center}
     125{\resizebox{\textwidth}{!}{\input{system.pstex_t}}}
     126\end{center}
     127\caption{Overview of the entire system}
     128\label{fig:system1}
     129\end{figure}
     130
     131\subsection{Context Switching}
     132As mentionned in section \ref{coroutine}, coroutines are a stepping stone for implementing threading. This is because they share the same mechanism for context-switching between different stacks. To improve performance and simplicity, context-switching is implemented using the following assumption: all context-switches happen inside a specific function call. This assumption means that the context-switch only has to copy the callee-saved registers onto the stack and then switch the stack registers with the ones of the target coroutine/thread. Note that the instruction pointer can be left untouched since the context-switch is always inside the same function. Threads however do not context-switch between each other directly. They context-switch to the scheduler. This method is called a 2-step context-switch and has the advantage of having a clear distinction between user code and the kernel where scheduling and other system operation happen. Obiously, this has the cost of doubling the context-switch cost because threads must context-switch to an intermediate stack. However, the performance of the 2-step context-switch is still superior to a \code{pthread_yield}(see section \ref{results}). additionally, for users in need for optimal performance, it is important to note that having a 2-step context-switch as the default does not prevent \CFA from offering a 1-step context-switch to use manually (or as part of monitors). This option is not currently present in \CFA but the changes required to add it are strictly additive.
     133
     134\subsection{Processors}
     135Parallelism in \CFA is built around using processors to specify how much parallelism is desired. \CFA processors are object wrappers around kernel threads, specifically pthreads in the current implementation of \CFA. Indeed, any parallelism must go through operating-system librairies. However, \glspl{uthread} are still the main source of concurrency, processors are simply the underlying source of parallelism. Indeed, processor \glspl{kthread} simply fetch a \glspl{uthread} from the scheduler and run, they are effectively executers for user-threads. The main benefit of this approach is that it offers a well defined boundary between kernel code and user code, for example, kernel thread quiescing, scheduling and interrupt handling. Processors internally use coroutines to take advantage of the existing context-switching semantics.
     136
     137\subsection{Stack management}
     138One of the challenges of this system is to reduce the footprint as much as possible. Specifically, all pthreads created also have a stack created with them, which should be used as much as possible. Normally, coroutines also create there own stack to run on, however, in the case of the coroutines used for processors, these coroutines run directly on the kernel thread stack, effectively stealing the processor stack. The exception to this rule is the Main Processor, i.e. the initial kernel thread that is given to any program. In order to respect user expectations, the stack of the initial kernel thread, the main stack of the program, is used by the main user thread rather than the main processor.
     139
     140\subsection{Preemption} \label{preemption}
     141Finally, an important aspect for any complete threading system is preemption. As mentionned in chapter \ref{basics}, preemption introduces an extra degree of uncertainty, which enables users to have multiple threads interleave transparently, rather than having to cooperate among threads for proper scheduling and CPU distribution. Indeed, preemption is desireable because it adds a degree of isolation among threads. In a fully cooperative system, any thread that runs into a long loop can starve other threads, while in a preemptive system starvation can still occur but it does not rely on every thread having to yield or block on a regular basis, which reduces significantly a programmer burden. Obviously, preemption is not optimal for every workload, however any preemptive system can become a cooperative system by making the time-slices extremely large. Which is why \CFA uses a preemptive threading system.
     142
     143Preemption in \CFA is based on kernel timers, which are used to run a discrete-event simulation. Every processor keeps track of the current time and registers an expiration time with the preemption system. When the preemption system receives a change in preemption, it sorts these expiration times in a list and sets a kernel timer for the closest one, effectively stepping between preemption events on each signals sent by the timer. These timers use the linux signal {\tt SIGALRM}, which is delivered to the process rather than the kernel-thread. This results in an implementation problem,because when delivering signals to a process, the kernel documentation states that the signal can be delivered to any kernel thread for which the signal is not blocked i.e. :
     144\begin{quote}
     145A process-directed signal may be delivered to any one of the threads that does not currently have the signal blocked. If more than one of the threads has the signal unblocked, then the kernel chooses an arbitrary thread to which to deliver the signal.
     146SIGNAL(7) - Linux Programmer's Manual
     147\end{quote}
     148For the sake of simplicity and in order to prevent the case of having two threads receiving alarms simultaneously, \CFA programs block the {\tt SIGALRM} signal on every thread except one. Now because of how involontary context-switches are handled, the kernel thread handling {\tt SIGALRM} cannot also be a processor thread.
     149
     150Involuntary context-switching is done by sending signal {\tt SIGUSER1} to the corresponding processor and having the thread yield from inside the signal handler. Effectively context-switching away from the signal-handler back to the kernel and the signal-handler frame is eventually unwound when the thread is scheduled again. This approach means that a signal-handler can start on one kernel thread and terminate on a second kernel thread (but the same user thread). It is important to note that signal-handlers save and restore signal masks because user-thread migration can cause signal mask to migrate from one kernel thread to another. This behaviour is only a problem if all kernel threads among which a user thread can migrate differ in terms of signal masks\footnote{Sadly, official POSIX documentation is silent on what distiguishes ``async-signal-safe'' functions from other functions}. However, since the kernel thread hanlding preemption requires a different signal mask, executing user threads on the kernel alarm thread can cause deadlocks. For this reason, the alarm thread is on a tight loop around a system call to \code{sigwaitinfo}, requiring very little CPU time for preemption. One final detail about the alarm thread is how to wake it when additional communication is required (e.g., on thread termination). This unblocking is also done using {\tt SIGALRM}, but sent throught the \code{pthread_sigqueue}. Indeed, \code{sigwait} can differentiate signals sent from \code{pthread_sigqueue} from signals sent from alarms or the kernel.
     151
     152\subsection{Scheduler}
     153Finally, an aspect that was not mentionned yet is the scheduling algorithm. Currently, the \CFA scheduler uses a single ready queue for all processors, which is the simplest approach to scheduling. Further discussion on scheduling is present in section \label{futur:sched}.
     154
     155% ======================================================================
     156% ======================================================================
     157\section{Internal scheduling} \label{impl:intsched}
     158% ======================================================================
     159% ======================================================================
     160The following figure is the traditional illustration of a monitor (repeated from page~\pageref{fig:monitor} for convenience) :
     161
     162\begin{figure}[H]
    97163\begin{center}
    98164{\resizebox{0.4\textwidth}{!}{\input{monitor}}}
    99165\end{center}
    100 
    101 For \CFA, the previous picture does not have support for blocking multiple monitors on a single condition. To support \gls{bulk-acq} two changes to this picture are required. First, it doesn't make sense to tie the condition to a single monitor since blocking two monitors as one would require arbitrarily picking a monitor to hold the condition. Secondly, the object waiting on the conditions and AS-stack cannot simply contain the waiting thread since a single thread can potentially wait on multiple monitors. As mentionned in section \ref{inschedimpl}, the handling in multiple monitors is done by partially passing, which entails that each concerned monitor needs to have a node object. However, for waiting on the condition, since all threads need to wait together, a single object needs to be queued in the condition. Moving out the condition and updating the node types yields :
    102 
     166\caption{Traditional illustration of a monitor}
     167\label{fig:monitor}
     168\end{figure}
     169
     170This picture has several components, the two most important being the entry-queue and the AS-stack. The entry-queue is an (almost) FIFO list where threads waiting to enter are parked, while the acceptor-signalor (AS) stack is a FILO list used for threads that have been signalled or otherwise marked as running next.
     171
     172For \CFA, this picture does not have support for blocking multiple monitors on a single condition. To support \gls{bulk-acq} two changes to this picture are required. First, it is non longer helpful to attach the condition to a single monitor. Secondly, the thread waiting on the conditions has to be seperated multiple monitors, which yields :
     173
     174\begin{figure}[H]
    103175\begin{center}
    104176{\resizebox{0.8\textwidth}{!}{\input{int_monitor}}}
    105177\end{center}
    106 
    107 \newpage
    108 
    109 This picture and the proper entry and leave algorithms is the fundamental implementation of internal scheduling.
    110 
     178\caption{Illustration of \CFA monitor}
     179\label{fig:monitor_cfa}
     180\end{figure}
     181
     182This picture and the proper entry and leave algorithms is the fundamental implementation of internal scheduling (see listing \ref{lst:entry2}). Note that when threads are moved from the condition to the AS-stack, it splits the thread into to pieces. The thread is woken up when all the pieces have moved from the AS-stacks to the active thread seat. In this picture, the threads are split into halves but this is only because there are two monitors in this picture. For a specific signaling operation every monitor needs a piece of thread on its AS-stack.
     183
     184\begin{figure}[b]
    111185\begin{multicols}{2}
    112186Entry
    113 \begin{pseudo}[numbers=left]
     187\begin{pseudo}
    114188if monitor is free
    115189        enter
    116 elif I already own the monitor
     190elif already own the monitor
    117191        continue
    118192else
     
    123197\columnbreak
    124198Exit
    125 \begin{pseudo}[numbers=left, firstnumber=8]
     199\begin{pseudo}
    126200decrement recursion
    127201if recursion == 0
     
    135209\end{pseudo}
    136210\end{multicols}
    137 
    138 Some important things to notice about the exit routine. The solution discussed in \ref{inschedimpl} can be seen on line 11 of the previous pseudo code. Basically, the solution boils down to having a seperate data structure for the condition queue and the AS-stack, and unconditionally transferring ownership of the monitors but only unblocking the thread when the last monitor has trasnferred ownership. This solution is safe as well as preventing any potential barging.
    139 
    140 % ======================================================================
    141 % ======================================================================
    142 \section{Implementation Details: External scheduling queues}
    143 % ======================================================================
    144 % ======================================================================
    145 To support multi-monitor external scheduling means that some kind of entry-queues must be used that is aware of both monitors. However, acceptable routines must be aware of the entry queues which means they must be stored inside at least one of the monitors that will be acquired. This in turn adds the requirement a systematic algorithm of disambiguating which queue is relavant regardless of user ordering. The proposed algorithm is to fall back on monitors lock ordering and specify that the monitor that is acquired first is the lock with the relevant entry queue. This assumes that the lock acquiring order is static for the lifetime of all concerned objects but that is a reasonable constraint. This algorithm choice has two consequences, the entry queue of the highest priority monitor is no longer a true FIFO queue and the queue of the lowest priority monitor is both required and probably unused. The queue can no longer be a FIFO queue because instead of simply containing the waiting threads in order arrival, they also contain the second mutex. Therefore, another thread with the same highest priority monitor but a different lowest priority monitor may arrive first but enter the critical section after a thread with the correct pairing. Secondly, since it may not be known at compile time which monitor will be the lowest priority monitor, every monitor needs to have the correct queues even though it is probable that half the multi-monitor queues will go unused for the entire duration of the program.
    146 
    147 
    148 \section{Internals}
    149 The complete mask can be pushed to any one, we are in a context where we already have full ownership of (at least) every concerned monitor and therefore monitors will refuse all calls no matter what.
     211\caption{Entry and exit routine for monitors with internal scheduling}
     212\label{lst:entry2}
     213\end{figure}
     214
     215Some important things to notice about the exit routine. The solution discussed in \ref{intsched} can be seen in the exit routine of listing \ref{lst:entry2}. Basically, the solution boils down to having a seperate data structure for the condition queue and the AS-stack, and unconditionally transferring ownership of the monitors but only unblocking the thread when the last monitor has transferred ownership. This solution is deadlock safe as well as preventing any potential barging. The data structure used for the AS-stack are reused extensively for external scheduling, but in the case of internal scheduling, the data is allocated using variable-length arrays on the callstack of the \code{wait} and \code{signal_block} routines.
     216
     217\begin{figure}[H]
     218\begin{center}
     219{\resizebox{0.8\textwidth}{!}{\input{monitor_structs.pstex_t}}}
     220\end{center}
     221\caption{Data structures involved in internal/external scheduling}
     222\label{fig:structs}
     223\end{figure}
     224
     225Figure \ref{fig:structs} shows a high level representation of these data-structures. The main idea behind them is that, while figure \ref{fig:monitor_cfa} is a nice illustration in theory, in practice breaking a threads into multiple pieces to put unto intrusive stacks does not make sense. The \code{condition node} is the data structure that is queued into a condition variable and, when signaled, the condition queue is popped and each \code{condition criterion} are moved to the AS-stack. Once all the criterion have be popped from their respective AS-stacks, the thread is woken-up, which is what is shown in listing \ref{lst:entry2}.
     226
     227% ======================================================================
     228% ======================================================================
     229\section{External scheduling}
     230% ======================================================================
     231% ======================================================================
     232Similarly to internal scheduling, external scheduling for multiple monitors relies on the idea that waiting-thread queues are no longer specific to a single monitor, as mentionned in section \ref{extsched}. For internal scheduling, these queues are part of condition variables which are still unique for a given scheduling operation (e.g., no single statment uses multiple conditions). However, in the case of external scheduling, there is no equivalent object which is associated with \code{waitfor} statements. This absence means the queues holding the waiting threads must be stored inside at least one of the monitors that is acquired. The monitors being the only objects that have sufficient lifetime and are available on both sides of the \code{waitfor} statment. This requires an algorithm to choose which monitor holds the relevant queue. It is also important that said algorithm be independent of the order in which users list parameters. The proposed algorithm is to fall back on monitor lock ordering and specify that the monitor that is acquired first is the one with the relevant wainting queue. This assumes that the lock acquiring order is static for the lifetime of all concerned objects but that is a reasonable constraint.
     233
     234This algorithm choice has two consequences :
     235\begin{itemize}
     236        \item The queue of the highest priority monitor is no longer a true FIFO queue because threads can be moved to the front of the queue. These queues need to contain a set of monitors for each of the waiting threads. Therefore, another thread whose set contains the same highest priority monitor but different lower priority monitors may arrive first but enter the critical section after a thread with the correct pairing.
     237        \item The queue of the lowest priority monitor is both required and potentially unused. Indeed, since it is not known at compile time which monitor will be the lowest priority monitor, every monitor needs to have the correct queues even though it is possible that some queues will go unused for the entire duration of the program, for example if a monitor is only used in a specific pair.
     238\end{itemize}
     239
     240Therefore, the following modifications need to be made to support external scheduling :
     241\begin{itemize}
     242        \item The threads waiting on the entry-queue need to keep track of which routine is trying to enter, and using which set of monitors. The \code{mutex} routine already has all the required information on its stack so the thread only needs to keep a pointer to that information.
     243        \item The monitors need to keep a mask of acceptable routines. This mask contains for each acceptable routine, a routine pointer and an array of monitors to go with it. It also needs storage to keep track of which routine was accepted. Since this information is not specific to any monitor, the monitors actually contain a pointer to an integer on the stack of the waiting thread. Note that the complete mask can be pushed to any owned monitors, regardless of \code{when} statements, the \code{waitfor} statement is used in a context where the thread already has full ownership of (at least) every concerned monitor and therefore monitors will refuse all calls no matter what.
     244        \item The entry/exit routine need to be updated as shown in listing \ref{lst:entry3}.
     245\end{itemize}
     246
     247\subsection{External scheduling - destructors}
     248Finally, to support the ordering inversion of destructors, the code generation needs to be modified to use a special entry routine. This routine is needed because of the storage requirements of the call order inversion. Indeed, when waiting for the destructors, storage is need for the waiting context and the lifetime of said storage needs to outlive the waiting operation it is needed for. For regular \code{waitfor} statements, the callstack of the routine itself matches this requirement but it is no longer the case when waiting for the destructor since it is pushed on to the AS-stack for later. The waitfor semantics can then be adjusted correspondingly, as seen in listing \ref{lst:entry-dtor}
     249
     250\begin{figure}
     251\begin{multicols}{2}
     252Entry
     253\begin{pseudo}
     254if monitor is free
     255        enter
     256elif already own the monitor
     257        continue
     258elif matches waitfor mask
     259        push criterions to AS-stack
     260        continue
     261else
     262        block
     263increment recursion
     264\end{pseudo}
     265\columnbreak
     266Exit
     267\begin{pseudo}
     268decrement recursion
     269if recursion == 0
     270        if signal_stack not empty
     271                set_owner to thread
     272                if all monitors ready
     273                        wake-up thread
     274                endif
     275        endif
     276
     277        if entry queue not empty
     278                wake-up thread
     279        endif
     280\end{pseudo}
     281\end{multicols}
     282\caption{Entry and exit routine for monitors with internal scheduling and external scheduling}
     283\label{lst:entry3}
     284\end{figure}
     285
     286\begin{figure}
     287\begin{multicols}{2}
     288Destructor Entry
     289\begin{pseudo}
     290if monitor is free
     291        enter
     292elif already own the monitor
     293        increment recursion
     294        return
     295create wait context
     296if matches waitfor mask
     297        reset mask
     298        push self to AS-stack
     299        baton pass
     300else
     301        wait
     302increment recursion
     303\end{pseudo}
     304\columnbreak
     305Waitfor
     306\begin{pseudo}
     307if matching thread is already there
     308        if found destructor
     309                push destructor to AS-stack
     310                unlock all monitors
     311        else
     312                push self to AS-stack
     313                baton pass
     314        endif
     315        return
     316endif
     317if non-blocking
     318        Unlock all monitors
     319        Return
     320endif
     321
     322push self to AS-stack
     323set waitfor mask
     324block
     325return
     326\end{pseudo}
     327\end{multicols}
     328\caption{Pseudo code for the \code{waitfor} routine and the \code{mutex} entry routine for destructors}
     329\label{lst:entry-dtor}
     330\end{figure}
  • doc/proposals/concurrency/text/intro.tex

    rf5c3b6c r0fe4e62  
    33% ======================================================================
    44
    5 This thesis provides a minimal concurrency \acrshort{api} that is simple, efficient and can be reused to build higher-level features. The simplest possible concurrency system is a thread and a lock but this low-level approach is hard to master. An easier approach for users is to support higher-level constructs as the basis of concurrency. Indeed, for highly productive concurrent programming, high-level approaches are much more popular~\cite{HPP:Study}. Examples are task based, message passing and implicit threading. The high-level approach and its minimal \acrshort{api} are tested in a dialect of C, call \CFA. [Is there value to say that this thesis is also an early definition of the \CFA language and library in regards to concurrency?]
     5This thesis provides a minimal concurrency \acrshort{api} that is simple, efficient and can be reused to build higher-level features. The simplest possible concurrency system is a thread and a lock but this low-level approach is hard to master. An easier approach for users is to support higher-level constructs as the basis of concurrency. Indeed, for highly productive concurrent programming, high-level approaches are much more popular~\cite{HPP:Study}. Examples are task based, message passing and implicit threading. The high-level approach and its minimal \acrshort{api} are tested in a dialect of C, call \CFA. Furthermore, the proposed \acrshort{api} doubles as an early definition of the \CFA language and library. This thesis also comes with an implementation of the concurrency library for \CFA as well as all the required language features added to the source-to-source translator.
    66
    77There are actually two problems that need to be solved in the design of concurrency for a programming language: which concurrency and which parallelism tools are available to the programmer. While these two concepts are often combined, they are in fact distinct, requiring different tools~\cite{Buhr05a}. Concurrency tools need to handle mutual exclusion and synchronization, while parallelism tools are about performance, cost and resource utilization.
  • doc/proposals/concurrency/text/parallelism.tex

    rf5c3b6c r0fe4e62  
    1515Examples of languages that support \glspl{uthread} are Erlang~\cite{Erlang} and \uC~\cite{uC++book}.
    1616
    17 \subsection{Fibers : user-level threads without preemption}
    18 A popular varient of \glspl{uthread} is what is often refered to as \glspl{fiber}. However, \glspl{fiber} do not present meaningful semantical differences with \glspl{uthread}. Advocates of \glspl{fiber} list their high performance and ease of implementation as majors strenghts of \glspl{fiber} but the performance difference between \glspl{uthread} and \glspl{fiber} is controversial, and the ease of implementation, while true, is a weak argument in the context of language design. Therefore this proposal largely ignore fibers.
     17\subsection{Fibers : user-level threads without preemption} \label{fibers}
     18A popular varient of \glspl{uthread} is what is often refered to as \glspl{fiber}. However, \glspl{fiber} do not present meaningful semantical differences with \glspl{uthread}. The significant difference between \glspl{uthread} and \glspl{fiber} is the lack of \gls{preemption} in the later one. Advocates of \glspl{fiber} list their high performance and ease of implementation as majors strenghts of \glspl{fiber} but the performance difference between \glspl{uthread} and \glspl{fiber} is controversial, and the ease of implementation, while true, is a weak argument in the context of language design. Therefore this proposal largely ignores fibers.
    1919
    2020An example of a language that uses fibers is Go~\cite{Go}
     
    2626
    2727\subsection{Paradigm performance}
    28 While the choice between the three paradigms listed above may have significant performance implication, it is difficult to pindown the performance implications of chosing a model at the language level. Indeed, in many situations one of these paradigms may show better performance but it all strongly depends on the workload. Having a large amount of mostly independent units of work to execute almost guarantess that the \gls{pool} based system has the best performance thanks to the lower memory overhead (i.e., not thread stack per job). However, interactions among jobs can easily exacerbate contention. User-level threads allow fine-grain context switching, which results in better resource utilisation, but a context switch is more expensive and the extra control means users need to tweak more variables to get the desired performance. Finally, if the units of uninterrupted work are large enough the paradigm choice is largely amortised by the actual work done.
    29 
    30 \TODO
     28While the choice between the three paradigms listed above may have significant performance implication, it is difficult to pindown the performance implications of chosing a model at the language level. Indeed, in many situations one of these paradigms may show better performance but it all strongly depends on the workload. Having a large amount of mostly independent units of work to execute almost guarantess that the \gls{pool} based system has the best performance thanks to the lower memory overhead (i.e., no thread stack per job). However, interactions among jobs can easily exacerbate contention. User-level threads allow fine-grain context switching, which results in better resource utilisation, but a context switch is more expensive and the extra control means users need to tweak more variables to get the desired performance. Finally, if the units of uninterrupted work are large enough the paradigm choice is largely amortised by the actual work done.
    3129
    3230\section{The \protect\CFA\ Kernel : Processors, Clusters and Threads}\label{kernel}
    3331
     32\Glspl{cfacluster} have not been fully implmented in the context of this thesis, currently \CFA only supports one \gls{cfacluster}, the initial one. The objective of \gls{cfacluster} is to group \gls{kthread} with identical settings together. \Glspl{uthread} can be scheduled on a \glspl{kthread} of a given \gls{cfacluster}, allowing organization between \glspl{kthread} and \glspl{uthread}. It is important that \glspl{kthread} belonging to a same \glspl{cfacluster} have homogenous settings, otherwise migrating a \gls{uthread} from one \gls{kthread} to the other can cause issues.
    3433
    3534\subsection{Future Work: Machine setup}\label{machine}
    36 While this was not done in the context of this thesis, another important aspect of clusters is affinity. While many common desktop and laptop PCs have homogeneous CPUs, other devices often have more heteregenous setups. For example, system using \acrshort{numa} configurations may benefit from users being able to tie clusters and/or kernel threads to certains CPU cores. OS support for CPU affinity is now common \cit, which means it is both possible and desirable for \CFA to offer an abstraction mechanism for portable CPU affinity.
     35While this was not done in the context of this thesis, another important aspect of clusters is affinity. While many common desktop and laptop PCs have homogeneous CPUs, other devices often have more heteregenous setups. For example, system using \acrshort{numa} configurations may benefit from users being able to tie clusters and\/or kernel threads to certains CPU cores. OS support for CPU affinity is now common \cite{affinityLinux, affinityWindows, affinityFreebsd, affinityNetbsd, affinityMacosx} which means it is both possible and desirable for \CFA to offer an abstraction mechanism for portable CPU affinity.
    3736
    38 \subsection{Paradigms}\label{cfaparadigms}
    39 Given these building blocks, it is possible to reproduce all three of the popular paradigms. Indeed, \glspl{uthread} is the default paradigm in \CFA. However, disabling \glspl{preemption} on the \gls{cfacluster} means \glspl{cfathread} effectively become \glspl{fiber}. Since several \glspl{cfacluster} with different scheduling policy can coexist in the same application, this allows \glspl{fiber} and \glspl{uthread} to coexist in the runtime of an application. Finally, it is possible to build executors for thread pools from \glspl{uthread} or \glspl{fiber}.
     37% \subsection{Paradigms}\label{cfaparadigms}
     38% Given these building blocks, it is possible to reproduce all three of the popular paradigms. Indeed, \glspl{uthread} is the default paradigm in \CFA. However, disabling \gls{preemption} on the \gls{cfacluster} means \glspl{cfathread} effectively become \glspl{fiber}. Since several \glspl{cfacluster} with different scheduling policy can coexist in the same application, this allows \glspl{fiber} and \glspl{uthread} to coexist in the runtime of an application. Finally, it is possible to build executors for thread pools from \glspl{uthread} or \glspl{fiber}.
  • doc/proposals/concurrency/text/together.tex

    rf5c3b6c r0fe4e62  
    77
    88\section{Threads as monitors}
    9 As it was subtely alluded in section \ref{threads}, \code{threads} in \CFA are in fact monitors. This means that all the monitors features are available when using threads. For example, here is a very simple two thread pipeline that could be used for a simulator of a game engine :
     9As it was subtely alluded in section \ref{threads}, \code{threads} in \CFA are in fact monitors, which means that all monitor features are available when using threads. For example, here is a very simple two thread pipeline that could be used for a simulator of a game engine :
    1010\begin{cfacode}
    1111// Visualization declaration
     
    3636}
    3737\end{cfacode}
    38 One of the obvious complaints of the previous code snippet (other than its toy-like simplicity) is that it does not handle exit conditions and just goes on for ever. Luckily, the monitor semantics can also be used to clearly enforce a shutdown order in a concise manner :
     38One of the obvious complaints of the previous code snippet (other than its toy-like simplicity) is that it does not handle exit conditions and just goes on forever. Luckily, the monitor semantics can also be used to clearly enforce a shutdown order in a concise manner :
    3939\begin{cfacode}
    4040// Visualization declaration
     
    7272        }
    7373}
     74
     75// Call destructor for simulator once simulator finishes
     76// Call destructor for renderer to signify shutdown
    7477\end{cfacode}
    7578
    7679\section{Fibers \& Threads}
     80As mentionned in section \ref{preemption}, \CFA uses preemptive threads by default but can use fibers on demand. Currently, using fibers is done by adding the following line of code to the program~:
     81\begin{cfacode}
     82unsigned int default_preemption() {
     83        return 0;
     84}
     85\end{cfacode}
     86This function is called by the kernel to fetch the default preemption rate, where 0 signifies an infinite time-slice i.e. no preemption. However, once clusters are fully implemented, it will be possible to create fibers and uthreads in on the same system :
     87\begin{figure}
     88\begin{cfacode}
     89//Cluster forward declaration
     90struct cluster;
     91
     92//Processor forward declaration
     93struct processor;
     94
     95//Construct clusters with a preemption rate
     96void ?{}(cluster& this, unsigned int rate);
     97//Construct processor and add it to cluster
     98void ?{}(processor& this, cluster& cluster);
     99//Construct thread and schedule it on cluster
     100void ?{}(thread& this, cluster& cluster);
     101
     102//Declare two clusters
     103cluster thread_cluster = { 10`ms };                     //Preempt every 10 ms
     104cluster fibers_cluster = { 0 };                         //Never preempt
     105
     106//Construct 4 processors
     107processor processors[4] = {
     108        //2 for the thread cluster
     109        thread_cluster;
     110        thread_cluster;
     111        //2 for the fibers cluster
     112        fibers_cluster;
     113        fibers_cluster;
     114};
     115
     116//Declares thread
     117thread UThread {};
     118void ?{}(UThread& this) {
     119        //Construct underlying thread to automatically
     120        //be scheduled on the thread cluster
     121        (this){ thread_cluster }
     122}
     123
     124void main(UThread & this);
     125
     126//Declares fibers
     127thread Fiber {};
     128void ?{}(Fiber& this) {
     129        //Construct underlying thread to automatically
     130        //be scheduled on the fiber cluster
     131        (this.__thread){ fibers_cluster }
     132}
     133
     134void main(Fiber & this);
     135\end{cfacode}
     136\end{figure}
  • doc/proposals/concurrency/thesis.tex

    rf5c3b6c r0fe4e62  
    3535\usepackage[pagewise]{lineno}
    3636\usepackage{fancyhdr}
     37\usepackage{float}
    3738\renewcommand{\linenumberfont}{\scriptsize\sffamily}
     39\usepackage{siunitx}
     40\sisetup{ binary-units=true }
    3841\input{style}                                                   % bespoke macros used in the document
    3942\usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
     
    107110\input{together}
    108111
     112\input{results}
     113
    109114\input{future}
    110115
  • doc/proposals/concurrency/version

    rf5c3b6c r0fe4e62  
    1 0.10.212
     10.11.129
  • src/Common/Debug.h

    rf5c3b6c r0fe4e62  
    2424#include "SynTree/Declaration.h"
    2525
    26 /// debug codegen a translation unit
    27 static inline void debugCodeGen( const std::list< Declaration * > & translationUnit, const std::string & label ) {
    28         std::list< Declaration * > decls;
     26#define DEBUG
    2927
    30         filter( translationUnit.begin(), translationUnit.end(), back_inserter( decls ), []( Declaration * decl ) {
    31                 return ! LinkageSpec::isBuiltin( decl->get_linkage() );
    32         });
     28namespace Debug {
     29        /// debug codegen a translation unit
     30        static inline void codeGen( __attribute__((unused)) const std::list< Declaration * > & translationUnit, __attribute__((unused)) const std::string & label ) {
     31        #ifdef DEBUG
     32                std::list< Declaration * > decls;
    3333
    34         std::cerr << "======" << label << "======" << std::endl;
    35         CodeGen::generate( decls, std::cerr, false, true );
    36 } // dump
     34                filter( translationUnit.begin(), translationUnit.end(), back_inserter( decls ), []( Declaration * decl ) {
     35                        return ! LinkageSpec::isBuiltin( decl->get_linkage() );
     36                });
     37
     38                std::cerr << "======" << label << "======" << std::endl;
     39                CodeGen::generate( decls, std::cerr, false, true );
     40        #endif
     41        } // dump
     42
     43        static inline void treeDump( __attribute__((unused)) const std::list< Declaration * > & translationUnit, __attribute__((unused)) const std::string & label ) {
     44        #ifdef DEBUG
     45                std::list< Declaration * > decls;
     46
     47                filter( translationUnit.begin(), translationUnit.end(), back_inserter( decls ), []( Declaration * decl ) {
     48                        return ! LinkageSpec::isBuiltin( decl->get_linkage() );
     49                });
     50
     51                std::cerr << "======" << label << "======" << std::endl;
     52                printAll( decls, std::cerr );
     53        #endif
     54        } // dump
     55}
    3756
    3857// Local Variables: //
  • src/Concurrency/Keywords.cc

    rf5c3b6c r0fe4e62  
    553553                        ),
    554554                        new ListInit(
    555                                 map_range < std::list<Initializer*> > ( args, [this](DeclarationWithType * var ){
     555                                map_range < std::list<Initializer*> > ( args, [](DeclarationWithType * var ){
    556556                                        Type * type = var->get_type()->clone();
    557557                                        type->set_mutex( false );
  • src/InitTweak/GenInit.cc

    rf5c3b6c r0fe4e62  
    214214                }
    215215                // a type is managed if it appears in the map of known managed types, or if it contains any polymorphism (is a type variable or generic type containing a type variable)
    216                 return managedTypes.find( SymTab::Mangler::mangle( type ) ) != managedTypes.end() || GenPoly::isPolyType( type );
     216                return managedTypes.find( SymTab::Mangler::mangleConcrete( type ) ) != managedTypes.end() || GenPoly::isPolyType( type );
    217217        }
    218218
     
    232232                        Type * type = InitTweak::getPointerBase( params.front()->get_type() );
    233233                        assert( type );
    234                         managedTypes.insert( SymTab::Mangler::mangle( type ) );
     234                        managedTypes.insert( SymTab::Mangler::mangleConcrete( type ) );
    235235                }
    236236        }
     
    242242                        if ( ObjectDecl * field = dynamic_cast< ObjectDecl * >( member ) ) {
    243243                                if ( isManaged( field ) ) {
     244                                        // generic parameters should not play a role in determining whether a generic type is constructed - construct all generic types, so that
     245                                        // polymorphic constructors make generic types managed types
    244246                                        StructInstType inst( Type::Qualifiers(), aggregateDecl );
    245                                         managedTypes.insert( SymTab::Mangler::mangle( &inst ) );
     247                                        managedTypes.insert( SymTab::Mangler::mangleConcrete( &inst ) );
    246248                                        break;
    247249                                }
  • src/InitTweak/InitTweak.cc

    rf5c3b6c r0fe4e62  
    9999        class InitExpander::ExpanderImpl {
    100100        public:
     101                virtual ~ExpanderImpl() = default;
    101102                virtual std::list< Expression * > next( std::list< Expression * > & indices ) = 0;
    102103                virtual Statement * buildListInit( UntypedExpr * callExpr, std::list< Expression * > & indices ) = 0;
     
    106107        public:
    107108                InitImpl( Initializer * init ) : init( init ) {}
     109                virtual ~InitImpl() = default;
    108110
    109111                virtual std::list< Expression * > next( __attribute((unused)) std::list< Expression * > & indices ) {
     
    122124        public:
    123125                ExprImpl( Expression * expr ) : arg( expr ) {}
    124 
    125                 ~ExprImpl() { delete arg; }
     126                virtual ~ExprImpl() { delete arg; }
    126127
    127128                virtual std::list< Expression * > next( std::list< Expression * > & indices ) {
  • src/ResolvExpr/AlternativeFinder.cc

    rf5c3b6c r0fe4e62  
    2222#include <memory>                  // for allocator_traits<>::value_type
    2323#include <utility>                 // for pair
     24#include <vector>                  // for vector
    2425
    2526#include "Alternative.h"           // for AltList, Alternative
     
    333334                tmpCost.incPoly( -tmpCost.get_polyCost() );
    334335                if ( tmpCost != Cost::zero ) {
    335                 // if ( convCost != Cost::zero ) {
    336336                        Type *newType = formalType->clone();
    337337                        env.apply( newType );
     
    405405///     needAssertions.insert( needAssertions.end(), (*tyvar)->get_assertions().begin(), (*tyvar)->get_assertions().end() );
    406406                }
    407         }
    408 
    409         /// instantiate a single argument by matching actuals from [actualIt, actualEnd) against formalType,
    410         /// producing expression(s) in out and their total cost in cost.
    411         template< typename AltIterator, typename OutputIterator >
    412         bool instantiateArgument( Type * formalType, Initializer * defaultValue, AltIterator & actualIt, AltIterator actualEnd, OpenVarSet & openVars, TypeEnvironment & resultEnv, AssertionSet & resultNeed, AssertionSet & resultHave, const SymTab::Indexer & indexer, Cost & cost, OutputIterator out ) {
    413                 if ( TupleType * tupleType = dynamic_cast< TupleType * >( formalType ) ) {
    414                         // formalType is a TupleType - group actuals into a TupleExpr whose type unifies with the TupleType
    415                         std::list< Expression * > exprs;
    416                         for ( Type * type : *tupleType ) {
    417                                 if ( ! instantiateArgument( type, defaultValue, actualIt, actualEnd, openVars, resultEnv, resultNeed, resultHave, indexer, cost, back_inserter( exprs ) ) ) {
    418                                         deleteAll( exprs );
    419                                         return false;
    420                                 }
    421                         }
    422                         *out++ = new TupleExpr( exprs );
    423                 } else if ( TypeInstType * ttype = Tuples::isTtype( formalType ) ) {
    424                         // xxx - mixing default arguments with variadic??
    425                         std::list< Expression * > exprs;
    426                         for ( ; actualIt != actualEnd; ++actualIt ) {
    427                                 exprs.push_back( actualIt->expr->clone() );
    428                                 cost += actualIt->cost;
    429                         }
    430                         Expression * arg = nullptr;
    431                         if ( exprs.size() == 1 && Tuples::isTtype( exprs.front()->get_result() ) ) {
    432                                 // the case where a ttype value is passed directly is special, e.g. for argument forwarding purposes
    433                                 // xxx - what if passing multiple arguments, last of which is ttype?
    434                                 // xxx - what would happen if unify was changed so that unifying tuple types flattened both before unifying lists? then pass in TupleType(ttype) below.
    435                                 arg = exprs.front();
    436                         } else {
    437                                 arg = new TupleExpr( exprs );
    438                         }
    439                         assert( arg && arg->get_result() );
    440                         if ( ! unify( ttype, arg->get_result(), resultEnv, resultNeed, resultHave, openVars, indexer ) ) {
    441                                 return false;
    442                         }
    443                         *out++ = arg;
    444                 } else if ( actualIt != actualEnd ) {
    445                         // both actualType and formalType are atomic (non-tuple) types - if they unify
    446                         // then accept actual as an argument, otherwise return false (fail to instantiate argument)
    447                         Expression * actual = actualIt->expr;
    448                         Type * actualType = actual->get_result();
    449 
    450                         PRINT(
    451                                 std::cerr << "formal type is ";
    452                                 formalType->print( std::cerr );
    453                                 std::cerr << std::endl << "actual type is ";
    454                                 actualType->print( std::cerr );
    455                                 std::cerr << std::endl;
    456                         )
    457                         if ( ! unify( formalType, actualType, resultEnv, resultNeed, resultHave, openVars, indexer ) ) {
    458                                 // std::cerr << "unify failed" << std::endl;
    459                                 return false;
    460                         }
    461                         // move the expression from the alternative to the output iterator
    462                         *out++ = actual;
    463                         actualIt->expr = nullptr;
    464                         cost += actualIt->cost;
    465                         ++actualIt;
    466                 } else {
    467                         // End of actuals - Handle default values
    468                         if ( SingleInit *si = dynamic_cast<SingleInit *>( defaultValue )) {
    469                                 if ( CastExpr * castExpr = dynamic_cast< CastExpr * >( si->get_value() ) ) {
    470                                         // so far, only constant expressions are accepted as default values
    471                                         if ( ConstantExpr *cnstexpr = dynamic_cast<ConstantExpr *>( castExpr->get_arg() ) ) {
    472                                                 if ( Constant *cnst = dynamic_cast<Constant *>( cnstexpr->get_constant() ) ) {
    473                                                         if ( unify( formalType, cnst->get_type(), resultEnv, resultNeed, resultHave, openVars, indexer ) ) {
    474                                                                 *out++ = cnstexpr->clone();
    475                                                                 return true;
    476                                                         } // if
    477                                                 } // if
    478                                         } // if
    479                                 }
    480                         } // if
    481                         return false;
    482                 } // if
    483                 return true;
    484         }
    485 
    486         bool AlternativeFinder::instantiateFunction( std::list< DeclarationWithType* >& formals, const AltList &actuals, bool isVarArgs, OpenVarSet& openVars, TypeEnvironment &resultEnv, AssertionSet &resultNeed, AssertionSet &resultHave, AltList & out ) {
    487                 simpleCombineEnvironments( actuals.begin(), actuals.end(), resultEnv );
    488                 // make sure we don't widen any existing bindings
    489                 for ( TypeEnvironment::iterator i = resultEnv.begin(); i != resultEnv.end(); ++i ) {
    490                         i->allowWidening = false;
    491                 }
    492                 resultEnv.extractOpenVars( openVars );
    493 
    494                 // flatten actuals so that each actual has an atomic (non-tuple) type
    495                 AltList exploded;
    496                 Tuples::explode( actuals, indexer, back_inserter( exploded ) );
    497 
    498                 AltList::iterator actualExpr = exploded.begin();
    499                 AltList::iterator actualEnd = exploded.end();
    500                 for ( DeclarationWithType * formal : formals ) {
    501                         // match flattened actuals with formal parameters - actuals will be grouped to match
    502                         // with formals as appropriate
    503                         Cost cost = Cost::zero;
    504                         std::list< Expression * > newExprs;
    505                         ObjectDecl * obj = strict_dynamic_cast< ObjectDecl * >( formal );
    506                         if ( ! instantiateArgument( obj->get_type(), obj->get_init(), actualExpr, actualEnd, openVars, resultEnv, resultNeed, resultHave, indexer, cost, back_inserter( newExprs ) ) ) {
    507                                 deleteAll( newExprs );
    508                                 return false;
    509                         }
    510                         // success - produce argument as a new alternative
    511                         assert( newExprs.size() == 1 );
    512                         out.push_back( Alternative( newExprs.front(), resultEnv, cost ) );
    513                 }
    514                 if ( actualExpr != actualEnd ) {
    515                         // there are still actuals remaining, but we've run out of formal parameters to match against
    516                         // this is okay only if the function is variadic
    517                         if ( ! isVarArgs ) {
    518                                 return false;
    519                         }
    520                         out.splice( out.end(), exploded, actualExpr, actualEnd );
    521                 }
    522                 return true;
    523407        }
    524408
     
    675559        }
    676560
    677         template< typename OutputIterator >
    678         void AlternativeFinder::makeFunctionAlternatives( const Alternative &func, FunctionType *funcType, const AltList &actualAlt, OutputIterator out ) {
    679                 OpenVarSet openVars;
    680                 AssertionSet resultNeed, resultHave;
    681                 TypeEnvironment resultEnv( func.env );
    682                 makeUnifiableVars( funcType, openVars, resultNeed );
    683                 resultEnv.add( funcType->get_forall() ); // add all type variables as open variables now so that those not used in the parameter list are still considered open
    684                 AltList instantiatedActuals; // filled by instantiate function
     561        /// Gets a default value from an initializer, nullptr if not present
     562        ConstantExpr* getDefaultValue( Initializer* init ) {
     563                if ( SingleInit* si = dynamic_cast<SingleInit*>( init ) ) {
     564                        if ( CastExpr* ce = dynamic_cast<CastExpr*>( si->get_value() ) ) {
     565                                return dynamic_cast<ConstantExpr*>( ce->get_arg() );
     566                        }
     567                }
     568                return nullptr;
     569        }
     570
     571        /// State to iteratively build a match of parameter expressions to arguments
     572        struct ArgPack {
     573                AltList actuals;                 ///< Arguments included in this pack
     574                TypeEnvironment env;             ///< Environment for this pack
     575                AssertionSet need;               ///< Assertions outstanding for this pack
     576                AssertionSet have;               ///< Assertions found for this pack
     577                OpenVarSet openVars;             ///< Open variables for this pack
     578                unsigned nextArg;                ///< Index of next argument in arguments list
     579                std::vector<Alternative> expls;  ///< Exploded actuals left over from last match
     580                unsigned nextExpl;               ///< Index of next exploded alternative to use
     581                std::vector<unsigned> tupleEls;  /// Number of elements in current tuple element(s)
     582
     583                ArgPack(const TypeEnvironment& env, const AssertionSet& need, const AssertionSet& have,
     584                                const OpenVarSet& openVars)
     585                        : actuals(), env(env), need(need), have(have), openVars(openVars), nextArg(0),
     586                          expls(), nextExpl(0), tupleEls() {}
     587
     588                /// Starts a new tuple expression
     589                void beginTuple() {
     590                        if ( ! tupleEls.empty() ) ++tupleEls.back();
     591                        tupleEls.push_back(0);
     592                }
     593
     594                /// Ends a tuple expression, consolidating the appropriate actuals
     595                void endTuple() {
     596                        // set up new Tuple alternative
     597                        std::list<Expression*> exprs;
     598                        Cost cost = Cost::zero;
     599
     600                        // transfer elements into alternative
     601                        for (unsigned i = 0; i < tupleEls.back(); ++i) {
     602                                exprs.push_front( actuals.back().expr );
     603                                actuals.back().expr = nullptr;
     604                                cost += actuals.back().cost;
     605                                actuals.pop_back();
     606                        }
     607                        tupleEls.pop_back();
     608
     609                        // build new alternative
     610                        actuals.emplace_back( new TupleExpr( exprs ), this->env, cost );
     611                }
     612
     613                /// Clones and adds an actual, returns this
     614                ArgPack& withArg( Expression* expr, Cost cost = Cost::zero ) {
     615                        actuals.emplace_back( expr->clone(), this->env, cost );
     616                        if ( ! tupleEls.empty() ) ++tupleEls.back();
     617                        return *this;
     618                }
     619        };
     620
     621        /// Instantiates an argument to match a formal, returns false if no results left
     622        bool instantiateArgument( Type* formalType, Initializer* initializer,
     623                        const std::vector< AlternativeFinder >& args,
     624                        std::vector<ArgPack>& results, std::vector<ArgPack>& nextResults,
     625                        const SymTab::Indexer& indexer ) {
     626                if ( TupleType* tupleType = dynamic_cast<TupleType*>( formalType ) ) {
     627                        // formalType is a TupleType - group actuals into a TupleExpr
     628                        for ( ArgPack& result : results ) { result.beginTuple(); }
     629                        for ( Type* type : *tupleType ) {
     630                                // xxx - dropping initializer changes behaviour from previous, but seems correct
     631                                if ( ! instantiateArgument( type, nullptr, args, results, nextResults, indexer ) )
     632                                        return false;
     633                        }
     634                        for ( ArgPack& result : results ) { result.endTuple(); }
     635                        return true;
     636                } else if ( TypeInstType* ttype = Tuples::isTtype( formalType ) ) {
     637                        // formalType is a ttype, consumes all remaining arguments
     638                        // xxx - mixing default arguments with variadic??
     639                        std::vector<ArgPack> finalResults{};  /// list of completed tuples
     640                        // start tuples
     641                        for ( ArgPack& result : results ) {
     642                                result.beginTuple();
     643
     644                                // use rest of exploded tuple if present
     645                                while ( result.nextExpl < result.expls.size() ) {
     646                                        const Alternative& actual = result.expls[result.nextExpl];
     647                                        result.env.addActual( actual.env, result.openVars );
     648                                        result.withArg( actual.expr );
     649                                        ++result.nextExpl;
     650                                }
     651                        }
     652                        // iterate until all results completed
     653                        while ( ! results.empty() ) {
     654                                // add another argument to results
     655                                for ( ArgPack& result : results ) {
     656                                        // finish result when out of arguments
     657                                        if ( result.nextArg >= args.size() ) {
     658                                                Type* argType = result.actuals.back().expr->get_result();
     659                                                if ( result.tupleEls.back() == 1 && Tuples::isTtype( argType ) ) {
     660                                                        // the case where a ttype value is passed directly is special, e.g. for
     661                                                        // argument forwarding purposes
     662                                                        // xxx - what if passing multiple arguments, last of which is ttype?
     663                                                        // xxx - what would happen if unify was changed so that unifying tuple
     664                                                        // types flattened both before unifying lists? then pass in TupleType
     665                                                        // (ttype) below.
     666                                                        result.tupleEls.pop_back();
     667                                                } else {
     668                                                        // collapse leftover arguments into tuple
     669                                                        result.endTuple();
     670                                                        argType = result.actuals.back().expr->get_result();
     671                                                }
     672                                                // check unification for ttype before adding to final
     673                                                if ( unify( ttype, argType, result.env, result.need, result.have,
     674                                                                result.openVars, indexer ) ) {
     675                                                        finalResults.push_back( std::move(result) );
     676                                                }
     677                                                continue;
     678                                        }
     679
     680                                        // add each possible next argument
     681                                        for ( const Alternative& actual : args[result.nextArg] ) {
     682                                                ArgPack aResult = result;  // copy to clone everything
     683                                                // add details of actual to result
     684                                                aResult.env.addActual( actual.env, aResult.openVars );
     685                                                Cost cost = actual.cost;
     686
     687                                                // explode argument
     688                                                std::vector<Alternative> exploded;
     689                                                Tuples::explode( actual, indexer, back_inserter( exploded ) );
     690
     691                                                // add exploded argument to tuple
     692                                                for ( Alternative& aActual : exploded ) {
     693                                                        aResult.withArg( aActual.expr, cost );
     694                                                        cost = Cost::zero;
     695                                                }
     696                                                ++aResult.nextArg;
     697                                                nextResults.push_back( std::move(aResult) );
     698                                        }
     699                                }
     700
     701                                // reset for next round
     702                                results.swap( nextResults );
     703                                nextResults.clear();
     704                        }
     705                        results.swap( finalResults );
     706                        return ! results.empty();
     707                }
     708
     709                // iterate each current subresult
     710                for ( unsigned iResult = 0; iResult < results.size(); ++iResult ) {
     711                        ArgPack& result = results[iResult];
     712
     713                        if ( result.nextExpl < result.expls.size() ) {
     714                                // use remainder of exploded tuple if present
     715                                const Alternative& actual = result.expls[result.nextExpl];
     716                                result.env.addActual( actual.env, result.openVars );
     717                                Type* actualType = actual.expr->get_result();
     718
     719                                PRINT(
     720                                        std::cerr << "formal type is ";
     721                                        formalType->print( std::cerr );
     722                                        std::cerr << std::endl << "actual type is ";
     723                                        actualType->print( std::cerr );
     724                                        std::cerr << std::endl;
     725                                )
     726
     727                                if ( unify( formalType, actualType, result.env, result.need, result.have,
     728                                                result.openVars, indexer ) ) {
     729                                        ++result.nextExpl;
     730                                        nextResults.push_back( std::move(result.withArg( actual.expr )) );
     731                                }
     732
     733                                continue;
     734                        } else if ( result.nextArg >= args.size() ) {
     735                                // use default initializers if out of arguments
     736                                if ( ConstantExpr* cnstExpr = getDefaultValue( initializer ) ) {
     737                                        if ( Constant* cnst = dynamic_cast<Constant*>( cnstExpr->get_constant() ) ) {
     738                                                if ( unify( formalType, cnst->get_type(), result.env, result.need,
     739                                                                result.have, result.openVars, indexer ) ) {
     740                                                        nextResults.push_back( std::move(result.withArg( cnstExpr )) );
     741                                                }
     742                                        }
     743                                }
     744                                continue;
     745                        }
     746
     747                        // Check each possible next argument
     748                        for ( const Alternative& actual : args[result.nextArg] ) {
     749                                ArgPack aResult = result;  // copy to clone everything
     750                                // add details of actual to result
     751                                aResult.env.addActual( actual.env, aResult.openVars );
     752
     753                                // explode argument
     754                                std::vector<Alternative> exploded;
     755                                Tuples::explode( actual, indexer, back_inserter( exploded ) );
     756                                if ( exploded.empty() ) {
     757                                        // skip empty tuple arguments
     758                                        ++aResult.nextArg;
     759                                        results.push_back( std::move(aResult) );
     760                                        continue;
     761                                }
     762
     763                                // consider only first exploded actual
     764                                const Alternative& aActual = exploded.front();
     765                                Type* actualType = aActual.expr->get_result()->clone();
     766
     767                                PRINT(
     768                                        std::cerr << "formal type is ";
     769                                        formalType->print( std::cerr );
     770                                        std::cerr << std::endl << "actual type is ";
     771                                        actualType->print( std::cerr );
     772                                        std::cerr << std::endl;
     773                                )
     774
     775                                // attempt to unify types
     776                                if ( unify( formalType, actualType, aResult.env, aResult.need, aResult.have, aResult.openVars, indexer ) ) {
     777                                        // add argument
     778                                        aResult.withArg( aActual.expr, actual.cost );
     779                                        ++aResult.nextArg;
     780                                        if ( exploded.size() > 1 ) {
     781                                                // other parts of tuple left over
     782                                                aResult.expls = std::move( exploded );
     783                                                aResult.nextExpl = 1;
     784                                        }
     785                                        nextResults.push_back( std::move(aResult) );
     786                                }
     787                        }
     788                }
     789
     790                // reset for next parameter
     791                results.swap( nextResults );
     792                nextResults.clear();
     793
     794                return ! results.empty();
     795        }
     796
     797        template<typename OutputIterator>
     798        void AlternativeFinder::makeFunctionAlternatives( const Alternative &func,
     799                        FunctionType *funcType, const std::vector< AlternativeFinder > &args,
     800                        OutputIterator out ) {
     801                OpenVarSet funcOpenVars;
     802                AssertionSet funcNeed, funcHave;
     803                TypeEnvironment funcEnv( func.env );
     804                makeUnifiableVars( funcType, funcOpenVars, funcNeed );
     805                // add all type variables as open variables now so that those not used in the parameter
     806                // list are still considered open.
     807                funcEnv.add( funcType->get_forall() );
     808
    685809                if ( targetType && ! targetType->isVoid() && ! funcType->get_returnVals().empty() ) {
    686810                        // attempt to narrow based on expected target type
    687811                        Type * returnType = funcType->get_returnVals().front()->get_type();
    688                         if ( ! unify( returnType, targetType, resultEnv, resultNeed, resultHave, openVars, indexer ) ) {
    689                                 // unification failed, don't pursue this alternative
     812                        if ( ! unify( returnType, targetType, funcEnv, funcNeed, funcHave, funcOpenVars,
     813                                        indexer ) ) {
     814                                // unification failed, don't pursue this function alternative
    690815                                return;
    691816                        }
    692817                }
    693818
    694                 if ( instantiateFunction( funcType->get_parameters(), actualAlt, funcType->get_isVarArgs(), openVars, resultEnv, resultNeed, resultHave, instantiatedActuals ) ) {
     819                // iteratively build matches, one parameter at a time
     820                std::vector<ArgPack> results{ ArgPack{ funcEnv, funcNeed, funcHave, funcOpenVars } };
     821                std::vector<ArgPack> nextResults{};
     822                for ( DeclarationWithType* formal : funcType->get_parameters() ) {
     823                        ObjectDecl* obj = strict_dynamic_cast< ObjectDecl* >( formal );
     824                        if ( ! instantiateArgument(
     825                                        obj->get_type(), obj->get_init(), args, results, nextResults, indexer ) )
     826                                return;
     827                }
     828
     829                // filter out results that don't use all the arguments, and aren't variadic
     830                std::vector<ArgPack> finalResults{};
     831                if ( funcType->get_isVarArgs() ) {
     832                        for ( ArgPack& result : results ) {
     833                                // use rest of exploded tuple if present
     834                                while ( result.nextExpl < result.expls.size() ) {
     835                                        const Alternative& actual = result.expls[result.nextExpl];
     836                                        result.env.addActual( actual.env, result.openVars );
     837                                        result.withArg( actual.expr );
     838                                        ++result.nextExpl;
     839                                }
     840                        }
     841
     842                        while ( ! results.empty() ) {
     843                                // build combinations for all remaining arguments
     844                                for ( ArgPack& result : results ) {
     845                                        // keep if used all arguments
     846                                        if ( result.nextArg >= args.size() ) {
     847                                                finalResults.push_back( std::move(result) );
     848                                                continue;
     849                                        }
     850
     851                                        // add each possible next argument
     852                                        for ( const Alternative& actual : args[result.nextArg] ) {
     853                                                ArgPack aResult = result; // copy to clone everything
     854                                                // add details of actual to result
     855                                                aResult.env.addActual( actual.env, aResult.openVars );
     856                                                Cost cost = actual.cost;
     857
     858                                                // explode argument
     859                                                std::vector<Alternative> exploded;
     860                                                Tuples::explode( actual, indexer, back_inserter( exploded ) );
     861
     862                                                // add exploded argument to arg list
     863                                                for ( Alternative& aActual : exploded ) {
     864                                                        aResult.withArg( aActual.expr, cost );
     865                                                        cost = Cost::zero;
     866                                                }
     867                                                ++aResult.nextArg;
     868                                                nextResults.push_back( std::move(aResult) );
     869                                        }
     870                                }
     871
     872                                // reset for next round
     873                                results.swap( nextResults );
     874                                nextResults.clear();
     875                        }
     876                } else {
     877                        // filter out results that don't use all the arguments
     878                        for ( ArgPack& result : results ) {
     879                                if ( result.nextExpl >= result.expls.size() && result.nextArg >= args.size() ) {
     880                                        finalResults.push_back( std::move(result) );
     881                                }
     882                        }
     883                }
     884
     885                // validate matching combos, add to final result list
     886                for ( ArgPack& result : finalResults ) {
    695887                        ApplicationExpr *appExpr = new ApplicationExpr( func.expr->clone() );
    696                         Alternative newAlt( appExpr, resultEnv, sumCost( instantiatedActuals ) );
    697                         makeExprList( instantiatedActuals, appExpr->get_args() );
     888                        Alternative newAlt( appExpr, result.env, sumCost( result.actuals ) );
     889                        makeExprList( result.actuals, appExpr->get_args() );
    698890                        PRINT(
    699891                                std::cerr << "instantiate function success: " << appExpr << std::endl;
    700892                                std::cerr << "need assertions:" << std::endl;
    701                                 printAssertionSet( resultNeed, std::cerr, 8 );
     893                                printAssertionSet( result.need, std::cerr, 8 );
    702894                        )
    703                         inferParameters( resultNeed, resultHave, newAlt, openVars, out );
     895                        inferParameters( result.need, result.have, newAlt, result.openVars, out );
    704896                }
    705897        }
     
    711903                if ( funcFinder.alternatives.empty() ) return;
    712904
    713                 std::list< AlternativeFinder > argAlternatives;
    714                 findSubExprs( untypedExpr->begin_args(), untypedExpr->end_args(), back_inserter( argAlternatives ) );
    715 
    716                 std::list< AltList > possibilities;
    717                 combos( argAlternatives.begin(), argAlternatives.end(), back_inserter( possibilities ) );
     905                std::vector< AlternativeFinder > argAlternatives;
     906                findSubExprs( untypedExpr->begin_args(), untypedExpr->end_args(),
     907                        back_inserter( argAlternatives ) );
    718908
    719909                // take care of possible tuple assignments
    720910                // if not tuple assignment, assignment is taken care of as a normal function call
    721                 Tuples::handleTupleAssignment( *this, untypedExpr, possibilities );
     911                Tuples::handleTupleAssignment( *this, untypedExpr, argAlternatives );
    722912
    723913                // find function operators
     
    744934                                                Alternative newFunc( *func );
    745935                                                referenceToRvalueConversion( newFunc.expr );
    746                                                 for ( std::list< AltList >::iterator actualAlt = possibilities.begin(); actualAlt != possibilities.end(); ++actualAlt ) {
    747                                                         // XXX
    748                                                         //Designators::check_alternative( function, *actualAlt );
    749                                                         makeFunctionAlternatives( newFunc, function, *actualAlt, std::back_inserter( candidates ) );
    750                                                 }
     936                                                makeFunctionAlternatives( newFunc, function, argAlternatives,
     937                                                        std::back_inserter( candidates ) );
    751938                                        }
    752939                                } else if ( TypeInstType *typeInst = dynamic_cast< TypeInstType* >( func->expr->get_result()->stripReferences() ) ) { // handle ftype (e.g. *? on function pointer)
     
    756943                                                        Alternative newFunc( *func );
    757944                                                        referenceToRvalueConversion( newFunc.expr );
    758                                                         for ( std::list< AltList >::iterator actualAlt = possibilities.begin(); actualAlt != possibilities.end(); ++actualAlt ) {
    759                                                                 makeFunctionAlternatives( newFunc, function, *actualAlt, std::back_inserter( candidates ) );
    760                                                         } // for
     945                                                        makeFunctionAlternatives( newFunc, function, argAlternatives,
     946                                                                std::back_inserter( candidates ) );
    761947                                                } // if
    762948                                        } // if
    763949                                }
    764 
    765                                 // try each function operator ?() with the current function alternative and each of the argument combinations
    766                                 for ( AltList::iterator funcOp = funcOpFinder.alternatives.begin(); funcOp != funcOpFinder.alternatives.end(); ++funcOp ) {
    767                                         // check if the type is pointer to function
    768                                         if ( PointerType *pointer = dynamic_cast< PointerType* >( funcOp->expr->get_result()->stripReferences() ) ) {
    769                                                 if ( FunctionType *function = dynamic_cast< FunctionType* >( pointer->get_base() ) ) {
     950                        } catch ( SemanticError &e ) {
     951                                errors.append( e );
     952                        }
     953                } // for
     954
     955                // try each function operator ?() with each function alternative
     956                if ( ! funcOpFinder.alternatives.empty() ) {
     957                        // add function alternatives to front of argument list
     958                        argAlternatives.insert( argAlternatives.begin(), std::move(funcFinder) );
     959
     960                        for ( AltList::iterator funcOp = funcOpFinder.alternatives.begin();
     961                                        funcOp != funcOpFinder.alternatives.end(); ++funcOp ) {
     962                                try {
     963                                        // check if type is a pointer to function
     964                                        if ( PointerType* pointer = dynamic_cast<PointerType*>(
     965                                                        funcOp->expr->get_result()->stripReferences() ) ) {
     966                                                if ( FunctionType* function =
     967                                                                dynamic_cast<FunctionType*>( pointer->get_base() ) ) {
    770968                                                        Alternative newFunc( *funcOp );
    771969                                                        referenceToRvalueConversion( newFunc.expr );
    772                                                         for ( std::list< AltList >::iterator actualAlt = possibilities.begin(); actualAlt != possibilities.end(); ++actualAlt ) {
    773                                                                 AltList currentAlt;
    774                                                                 currentAlt.push_back( *func );
    775                                                                 currentAlt.insert( currentAlt.end(), actualAlt->begin(), actualAlt->end() );
    776                                                                 makeFunctionAlternatives( newFunc, function, currentAlt, std::back_inserter( candidates ) );
    777                                                         } // for
    778                                                 } // if
    779                                         } // if
    780                                 } // for
    781                         } catch ( SemanticError &e ) {
    782                                 errors.append( e );
    783                         }
    784                 } // for
     970                                                        makeFunctionAlternatives( newFunc, function, argAlternatives,
     971                                                                std::back_inserter( candidates ) );
     972                                                }
     973                                        }
     974                                } catch ( SemanticError &e ) {
     975                                        errors.append( e );
     976                                }
     977                        }
     978                }
    785979
    786980                // Implement SFINAE; resolution errors are only errors if there aren't any non-erroneous resolutions
     
    8131007                candidates.splice( candidates.end(), alternatives );
    8141008
    815                 findMinCost( candidates.begin(), candidates.end(), std::back_inserter( alternatives ) );
     1009                // use a new list so that alternatives are not examined by addAnonConversions twice.
     1010                AltList winners;
     1011                findMinCost( candidates.begin(), candidates.end(), std::back_inserter( winners ) );
    8161012
    8171013                // function may return struct or union value, in which case we need to add alternatives for implicit
    8181014                // conversions to each of the anonymous members, must happen after findMinCost since anon conversions
    8191015                // are never the cheapest expression
    820                 for ( const Alternative & alt : alternatives ) {
     1016                for ( const Alternative & alt : winners ) {
    8211017                        addAnonConversions( alt );
    8221018                }
     1019                alternatives.splice( alternatives.begin(), winners );
    8231020
    8241021                if ( alternatives.empty() && targetType && ! targetType->isVoid() ) {
  • src/ResolvExpr/AlternativeFinder.h

    rf5c3b6c r0fe4e62  
    3434          public:
    3535                AlternativeFinder( const SymTab::Indexer &indexer, const TypeEnvironment &env );
     36
     37                AlternativeFinder( const AlternativeFinder& o )
     38                        : indexer(o.indexer), alternatives(o.alternatives), env(o.env),
     39                          targetType(o.targetType) {}
     40               
     41                AlternativeFinder( AlternativeFinder&& o )
     42                        : indexer(o.indexer), alternatives(std::move(o.alternatives)), env(o.env),
     43                          targetType(o.targetType) {}
     44               
     45                AlternativeFinder& operator= ( const AlternativeFinder& o ) {
     46                        if (&o == this) return *this;
     47                       
     48                        // horrific nasty hack to rebind references...
     49                        alternatives.~AltList();
     50                        new(this) AlternativeFinder(o);
     51                        return *this;
     52                }
     53
     54                AlternativeFinder& operator= ( AlternativeFinder&& o ) {
     55                        if (&o == this) return *this;
     56                       
     57                        // horrific nasty hack to rebind references...
     58                        alternatives.~AltList();
     59                        new(this) AlternativeFinder(std::move(o));
     60                        return *this;
     61                }
     62
    3663                void find( Expression *expr, bool adjust = false, bool prune = true, bool failFast = true );
    3764                /// Calls find with the adjust flag set; adjustment turns array and function types into equivalent pointer types
     
    99126                /// Adds alternatives for offsetof expressions, given the base type and name of the member
    100127                template< typename StructOrUnionType > void addOffsetof( StructOrUnionType *aggInst, const std::string &name );
    101                 bool instantiateFunction( std::list< DeclarationWithType* >& formals, const AltList &actuals, bool isVarArgs, OpenVarSet& openVars, TypeEnvironment &resultEnv, AssertionSet &resultNeed, AssertionSet &resultHave, AltList & out );
    102                 template< typename OutputIterator >
    103                 void makeFunctionAlternatives( const Alternative &func, FunctionType *funcType, const AltList &actualAlt, OutputIterator out );
     128                template<typename OutputIterator>
     129                void makeFunctionAlternatives( const Alternative &func, FunctionType *funcType, const std::vector< AlternativeFinder >& args, OutputIterator out );
    104130                template< typename OutputIterator >
    105131                void inferParameters( const AssertionSet &need, AssertionSet &have, const Alternative &newAlt, OpenVarSet &openVars, OutputIterator out );
  • src/ResolvExpr/CurrentObject.cc

    rf5c3b6c r0fe4e62  
    260260
    261261                AggregateIterator( const std::string & kind, const std::string & name, Type * inst, const MemberList & members ) : kind( kind ), name( name ), inst( inst ), members( members ), curMember( members.begin() ), sub( makeGenericSubstitution( inst ) ) {
     262                        PRINT( std::cerr << "Creating " << kind << "(" << name << ")"; )
    262263                        init();
    263264                }
  • src/ResolvExpr/RenameVars.cc

    rf5c3b6c r0fe4e62  
    2929        RenameVars global_renamer;
    3030
    31         RenameVars::RenameVars() : level( 0 ) {
     31        RenameVars::RenameVars() : level( 0 ), resetCount( 0 ) {
    3232                mapStack.push_front( std::map< std::string, std::string >() );
    3333        }
     
    3535        void RenameVars::reset() {
    3636                level = 0;
     37                resetCount++;
    3738        }
    3839
     
    130131                        for ( Type::ForallList::iterator i = type->get_forall().begin(); i != type->get_forall().end(); ++i ) {
    131132                                std::ostringstream output;
    132                                 output << "_" << level << "_" << (*i)->get_name();
     133                                output << "_" << resetCount << "_" << level << "_" << (*i)->get_name();
    133134                                std::string newname( output.str() );
    134135                                mapStack.front()[ (*i)->get_name() ] = newname;
  • src/ResolvExpr/RenameVars.h

    rf5c3b6c r0fe4e62  
    4848                void typeBefore( Type *type );
    4949                void typeAfter( Type *type );
    50                 int level;
     50                int level, resetCount;
    5151                std::list< std::map< std::string, std::string > > mapStack;
    5252        };
  • src/ResolvExpr/TypeEnvironment.cc

    rf5c3b6c r0fe4e62  
    201201        }
    202202
     203        void TypeEnvironment::addActual( const TypeEnvironment& actualEnv, OpenVarSet& openVars ) {
     204                for ( const EqvClass& c : actualEnv ) {
     205                        EqvClass c2 = c;
     206                        c2.allowWidening = false;
     207                        for ( const std::string& var : c2.vars ) {
     208                                openVars[ var ] = c2.data;
     209                        }
     210                        env.push_back( std::move(c2) );
     211                }
     212        }
     213
    203214} // namespace ResolvExpr
    204215
  • src/ResolvExpr/TypeEnvironment.h

    rf5c3b6c r0fe4e62  
    8686                TypeEnvironment *clone() const { return new TypeEnvironment( *this ); }
    8787
     88                /// Iteratively adds the environment of a new actual (with allowWidening = false),
     89                /// and extracts open variables.
     90                void addActual( const TypeEnvironment& actualEnv, OpenVarSet& openVars );
     91
    8892                typedef std::list< EqvClass >::iterator iterator;
    8993                iterator begin() { return env.begin(); }
  • src/SymTab/Mangler.cc

    rf5c3b6c r0fe4e62  
    3232namespace SymTab {
    3333        std::string Mangler::mangleType( Type * ty ) {
    34                 Mangler mangler( false, true );
     34                Mangler mangler( false, true, true );
    3535                maybeAccept( ty, mangler );
    3636                return mangler.get_mangleName();
    3737        }
    3838
    39         Mangler::Mangler( bool mangleOverridable, bool typeMode )
    40                 : nextVarNum( 0 ), isTopLevel( true ), mangleOverridable( mangleOverridable ), typeMode( typeMode ) {}
     39        std::string Mangler::mangleConcrete( Type* ty ) {
     40                Mangler mangler( false, false, false );
     41                maybeAccept( ty, mangler );
     42                return mangler.get_mangleName();
     43        }
     44
     45        Mangler::Mangler( bool mangleOverridable, bool typeMode, bool mangleGenericParams )
     46                : nextVarNum( 0 ), isTopLevel( true ), mangleOverridable( mangleOverridable ), typeMode( typeMode ), mangleGenericParams( mangleGenericParams ) {}
    4147
    4248        Mangler::Mangler( const Mangler &rhs ) : mangleName() {
     
    166172
    167173                mangleName << ( refType->get_name().length() + prefix.length() ) << prefix << refType->get_name();
    168         }
    169 
    170         void Mangler::mangleGenericRef( ReferenceToType * refType, std::string prefix ) {
    171                 printQualifiers( refType );
    172 
    173                 std::ostringstream oldName( mangleName.str() );
    174                 mangleName.clear();
    175 
    176                 mangleName << prefix << refType->get_name();
    177 
    178                 std::list< Expression* >& params = refType->get_parameters();
    179                 if ( ! params.empty() ) {
    180                         mangleName << "_";
    181                         for ( std::list< Expression* >::const_iterator param = params.begin(); param != params.end(); ++param ) {
    182                                 TypeExpr *paramType = dynamic_cast< TypeExpr* >( *param );
    183                                 assertf(paramType, "Aggregate parameters should be type expressions: %s", toString(*param).c_str());
    184                                 maybeAccept( paramType->get_type(), *this );
     174
     175                if ( mangleGenericParams ) {
     176                        std::list< Expression* >& params = refType->get_parameters();
     177                        if ( ! params.empty() ) {
     178                                mangleName << "_";
     179                                for ( std::list< Expression* >::const_iterator param = params.begin(); param != params.end(); ++param ) {
     180                                        TypeExpr *paramType = dynamic_cast< TypeExpr* >( *param );
     181                                        assertf(paramType, "Aggregate parameters should be type expressions: %s", toString(*param).c_str());
     182                                        maybeAccept( paramType->get_type(), *this );
     183                                }
     184                                mangleName << "_";
    185185                        }
    186                         mangleName << "_";
    187186                }
    188 
    189                 oldName << mangleName.str().length() << mangleName.str();
    190                 mangleName.str( oldName.str() );
    191187        }
    192188
    193189        void Mangler::visit( StructInstType * aggregateUseType ) {
    194                 if ( typeMode ) mangleGenericRef( aggregateUseType, "s" );
    195                 else mangleRef( aggregateUseType, "s" );
     190                mangleRef( aggregateUseType, "s" );
    196191        }
    197192
    198193        void Mangler::visit( UnionInstType * aggregateUseType ) {
    199                 if ( typeMode ) mangleGenericRef( aggregateUseType, "u" );
    200                 else mangleRef( aggregateUseType, "u" );
     194                mangleRef( aggregateUseType, "u" );
    201195        }
    202196
     
    285279                                varNums[ (*i)->name ] = std::pair< int, int >( nextVarNum++, (int)(*i)->get_kind() );
    286280                                for ( std::list< DeclarationWithType* >::iterator assert = (*i)->assertions.begin(); assert != (*i)->assertions.end(); ++assert ) {
    287                                         Mangler sub_mangler( mangleOverridable, typeMode );
     281                                        Mangler sub_mangler( mangleOverridable, typeMode, mangleGenericParams );
    288282                                        sub_mangler.nextVarNum = nextVarNum;
    289283                                        sub_mangler.isTopLevel = false;
  • src/SymTab/Mangler.h

    rf5c3b6c r0fe4e62  
    3030                /// Mangle syntax tree object; primary interface to clients
    3131                template< typename SynTreeClass >
    32             static std::string mangle( SynTreeClass *decl, bool mangleOverridable = true, bool typeMode = false );
     32            static std::string mangle( SynTreeClass *decl, bool mangleOverridable = true, bool typeMode = false, bool mangleGenericParams = true );
    3333                /// Mangle a type name; secondary interface
    3434                static std::string mangleType( Type* ty );
     35                /// Mangle ignoring generic type parameters
     36                static std::string mangleConcrete( Type* ty );
     37
    3538
    3639                virtual void visit( ObjectDecl *declaration );
     
    6265                bool mangleOverridable;         ///< Specially mangle overridable built-in methods
    6366                bool typeMode;                  ///< Produce a unique mangled name for a type
     67                bool mangleGenericParams;       ///< Include generic parameters in name mangling if true
    6468
    65                 Mangler( bool mangleOverridable, bool typeMode );
     69                Mangler( bool mangleOverridable, bool typeMode, bool mangleGenericParams );
    6670                Mangler( const Mangler & );
    6771
    6872                void mangleDecl( DeclarationWithType *declaration );
    6973                void mangleRef( ReferenceToType *refType, std::string prefix );
    70                 void mangleGenericRef( ReferenceToType *refType, std::string prefix );
    7174
    7275                void printQualifiers( Type *type );
     
    7477
    7578        template< typename SynTreeClass >
    76         std::string Mangler::mangle( SynTreeClass *decl, bool mangleOverridable, bool typeMode ) {
    77                 Mangler mangler( mangleOverridable, typeMode );
     79        std::string Mangler::mangle( SynTreeClass *decl, bool mangleOverridable, bool typeMode, bool mangleGenericParams ) {
     80                Mangler mangler( mangleOverridable, typeMode, mangleGenericParams );
    7881                maybeAccept( decl, mangler );
    7982                return mangler.get_mangleName();
  • src/SymTab/Validate.cc

    rf5c3b6c r0fe4e62  
    268268                HoistStruct::hoistStruct( translationUnit ); // must happen after EliminateTypedef, so that aggregate typedefs occur in the correct order
    269269                ReturnTypeFixer::fix( translationUnit ); // must happen before autogen
     270                acceptAll( translationUnit, epc ); // must happen before VerifyCtorDtorAssign, because void return objects should not exist; before LinkReferenceToTypes because it is an indexer and needs correct types for mangling
    270271                acceptAll( translationUnit, lrt ); // must happen before autogen, because sized flag needs to propagate to generated functions
    271272                acceptAll( translationUnit, genericParams );  // check as early as possible - can't happen before LinkReferenceToTypes
    272                 acceptAll( translationUnit, epc ); // must happen before VerifyCtorDtorAssign, because void return objects should not exist
    273273                VerifyCtorDtorAssign::verify( translationUnit );  // must happen before autogen, because autogen examines existing ctor/dtors
    274274                ReturnChecker::checkFunctionReturns( translationUnit );
  • src/Tuples/TupleAssignment.cc

    rf5c3b6c r0fe4e62  
    2020#include <memory>                          // for unique_ptr, allocator_trai...
    2121#include <string>                          // for string
     22#include <vector>
    2223
    2324#include "CodeGen/OperatorTable.h"
     
    3334#include "ResolvExpr/Resolver.h"           // for resolveCtorInit
    3435#include "ResolvExpr/TypeEnvironment.h"    // for TypeEnvironment
     36#include "ResolvExpr/typeops.h"            // for combos
    3537#include "SynTree/Declaration.h"           // for ObjectDecl
    3638#include "SynTree/Expression.h"            // for Expression, CastExpr, Name...
     
    5254                // dispatcher for Tuple (multiple and mass) assignment operations
    5355                TupleAssignSpotter( ResolvExpr::AlternativeFinder & );
    54                 void spot( UntypedExpr * expr, const std::list<ResolvExpr::AltList> &possibilities );
     56                void spot( UntypedExpr * expr, std::vector<ResolvExpr::AlternativeFinder> &args );
    5557
    5658          private:
     
    5961                struct Matcher {
    6062                  public:
    61                         Matcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList & alts );
     63                        Matcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList& lhs, const
     64                                ResolvExpr::AltList& rhs );
    6265                        virtual ~Matcher() {}
    6366                        virtual void match( std::list< Expression * > &out ) = 0;
     
    7275                struct MassAssignMatcher : public Matcher {
    7376                  public:
    74                         MassAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList & alts );
     77                        MassAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList& lhs,
     78                                const ResolvExpr::AltList& rhs ) : Matcher(spotter, lhs, rhs) {}
    7579                        virtual void match( std::list< Expression * > &out );
    7680                };
     
    7882                struct MultipleAssignMatcher : public Matcher {
    7983                  public:
    80                         MultipleAssignMatcher( TupleAssignSpotter &spot, const ResolvExpr::AltList & alts );
     84                        MultipleAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList& lhs,
     85                                const ResolvExpr::AltList& rhs ) : Matcher(spotter, lhs, rhs) {}
    8186                        virtual void match( std::list< Expression * > &out );
    8287                };
     
    114119        }
    115120
    116         void handleTupleAssignment( ResolvExpr::AlternativeFinder & currentFinder, UntypedExpr * expr, const std::list<ResolvExpr::AltList> &possibilities ) {
     121        void handleTupleAssignment( ResolvExpr::AlternativeFinder & currentFinder, UntypedExpr * expr,
     122                                std::vector<ResolvExpr::AlternativeFinder> &args ) {
    117123                TupleAssignSpotter spotter( currentFinder );
    118                 spotter.spot( expr, possibilities );
     124                spotter.spot( expr, args );
    119125        }
    120126
     
    122128                : currentFinder(f) {}
    123129
    124         void TupleAssignSpotter::spot( UntypedExpr * expr, const std::list<ResolvExpr::AltList> &possibilities ) {
     130        void TupleAssignSpotter::spot( UntypedExpr * expr,
     131                        std::vector<ResolvExpr::AlternativeFinder> &args ) {
    125132                if (  NameExpr *op = dynamic_cast< NameExpr * >(expr->get_function()) ) {
    126133                        if ( CodeGen::isCtorDtorAssign( op->get_name() ) ) {
    127                                fname = op->get_name();
    128                                 PRINT( std::cerr << "TupleAssignment: " << fname << std::endl; )
    129                                 for ( std::list<ResolvExpr::AltList>::const_iterator ali = possibilities.begin(); ali != possibilities.end(); ++ali ) {
    130                                         if ( ali->size() == 0 ) continue; // AlternativeFinder will natrually handle this case, if it's legal
    131                                         if ( ali->size() <= 1 && CodeGen::isAssignment( op->get_name() ) ) {
    132                                                 // what does it mean if an assignment takes 1 argument? maybe someone defined such a function, in which case AlternativeFinder will naturally handle it
    133                                                 continue;
     134                                fname = op->get_name();
     135
     136                                // AlternativeFinder will naturally handle this case case, if it's legal
     137                                if ( args.size() == 0 ) return;
     138
     139                                // if an assignment only takes 1 argument, that's odd, but maybe someone wrote
     140                                // the function, in which case AlternativeFinder will handle it normally
     141                                if ( args.size() == 1 && CodeGen::isAssignment( fname ) ) return;
     142
     143                                // look over all possible left-hand-sides
     144                                for ( ResolvExpr::Alternative& lhsAlt : args[0] ) {
     145                                        // skip non-tuple LHS
     146                                        if ( ! refToTuple(lhsAlt.expr) ) continue;
     147
     148                                        // explode is aware of casts - ensure every LHS expression is sent into explode
     149                                        // with a reference cast
     150                                        // xxx - this seems to change the alternatives before the normal
     151                                        //  AlternativeFinder flow; maybe this is desired?
     152                                        if ( ! dynamic_cast<CastExpr*>( lhsAlt.expr ) ) {
     153                                                lhsAlt.expr = new CastExpr( lhsAlt.expr,
     154                                                                new ReferenceType( Type::Qualifiers(),
     155                                                                        lhsAlt.expr->get_result()->clone() ) );
    134156                                        }
    135157
    136                                         assert( ! ali->empty() );
    137                                         // grab args 2-N and group into a TupleExpr
    138                                         const ResolvExpr::Alternative & alt1 = ali->front();
    139                                         auto begin = std::next(ali->begin(), 1), end = ali->end();
    140                                         PRINT( std::cerr << "alt1 is " << alt1.expr << std::endl; )
    141                                         if ( refToTuple(alt1.expr) ) {
    142                                                 PRINT( std::cerr << "and is reference to tuple" << std::endl; )
    143                                                 if ( isMultAssign( begin, end ) ) {
    144                                                         PRINT( std::cerr << "possible multiple assignment" << std::endl; )
    145                                                         matcher.reset( new MultipleAssignMatcher( *this, *ali ) );
    146                                                 } else {
    147                                                         // mass assignment
    148                                                         PRINT( std::cerr << "possible mass assignment" << std::endl; )
    149                                                         matcher.reset( new MassAssignMatcher( *this,  *ali ) );
     158                                        // explode the LHS so that each field of a tuple-valued-expr is assigned
     159                                        ResolvExpr::AltList lhs;
     160                                        explode( lhsAlt, currentFinder.get_indexer(), back_inserter(lhs), true );
     161                                        for ( ResolvExpr::Alternative& alt : lhs ) {
     162                                                // each LHS value must be a reference - some come in with a cast expression,
     163                                                // if not just cast to reference here
     164                                                if ( ! dynamic_cast<ReferenceType*>( alt.expr->get_result() ) ) {
     165                                                        alt.expr = new CastExpr( alt.expr,
     166                                                                new ReferenceType( Type::Qualifiers(),
     167                                                                        alt.expr->get_result()->clone() ) );
    150168                                                }
     169                                        }
     170
     171                                        if ( args.size() == 1 ) {
     172                                                // mass default-initialization/destruction
     173                                                ResolvExpr::AltList rhs{};
     174                                                matcher.reset( new MassAssignMatcher( *this, lhs, rhs ) );
    151175                                                match();
     176                                        } else if ( args.size() > 2 ) {
     177                                                // expand all possible RHS possibilities
     178                                                // TODO build iterative version of this instead of using combos
     179                                                std::vector< ResolvExpr::AltList > rhsAlts;
     180                                                combos( std::next(args.begin(), 1), args.end(),
     181                                                        std::back_inserter( rhsAlts ) );
     182                                                for ( const ResolvExpr::AltList& rhsAlt : rhsAlts ) {
     183                                                        // multiple assignment
     184                                                        ResolvExpr::AltList rhs;
     185                                                        explode( rhsAlt, currentFinder.get_indexer(),
     186                                                                std::back_inserter(rhs), true );
     187                                                        matcher.reset( new MultipleAssignMatcher( *this, lhs, rhs ) );
     188                                                        match();
     189                                                }
     190                                        } else {
     191                                                for ( const ResolvExpr::Alternative& rhsAlt : args[1] ) {
     192                                                        ResolvExpr::AltList rhs;
     193                                                        if ( isTuple(rhsAlt.expr) ) {
     194                                                                // multiple assignment
     195                                                                explode( rhsAlt, currentFinder.get_indexer(), 
     196                                                                        std::back_inserter(rhs), true );
     197                                                                matcher.reset( new MultipleAssignMatcher( *this, lhs, rhs ) );
     198                                                        } else {
     199                                                                // mass assignment
     200                                                                rhs.push_back( rhsAlt );
     201                                                                matcher.reset( new MassAssignMatcher( *this, lhs, rhs ) );
     202                                                        }
     203                                                        match();
     204                                                }
    152205                                        }
    153206                                }
     
    169222                ResolvExpr::AltList current;
    170223                // now resolve new assignments
    171                 for ( std::list< Expression * >::iterator i = new_assigns.begin(); i != new_assigns.end(); ++i ) {
     224                for ( std::list< Expression * >::iterator i = new_assigns.begin();
     225                                i != new_assigns.end(); ++i ) {
    172226                        PRINT(
    173227                                std::cerr << "== resolving tuple assign ==" << std::endl;
     
    175229                        )
    176230
    177                         ResolvExpr::AlternativeFinder finder( currentFinder.get_indexer(), currentFinder.get_environ() );
     231                        ResolvExpr::AlternativeFinder finder{ currentFinder.get_indexer(),
     232                                currentFinder.get_environ() };
    178233                        try {
    179234                                finder.findWithAdjustment(*i);
     
    196251                // combine assignment environments into combined expression environment
    197252                simpleCombineEnvironments( current.begin(), current.end(), matcher->compositeEnv );
    198                 currentFinder.get_alternatives().push_front( ResolvExpr::Alternative(new TupleAssignExpr(solved_assigns, matcher->tmpDecls), matcher->compositeEnv, ResolvExpr::sumCost( current  ) + matcher->baseCost ) );
    199         }
    200 
    201         TupleAssignSpotter::Matcher::Matcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList &alts ) : spotter(spotter), baseCost( ResolvExpr::sumCost( alts ) ) {
    202                 assert( ! alts.empty() );
    203                 // combine argument environments into combined expression environment
    204                 simpleCombineEnvironments( alts.begin(), alts.end(), compositeEnv );
    205 
    206                 ResolvExpr::Alternative lhsAlt = alts.front();
    207                 // explode is aware of casts - ensure every LHS expression is sent into explode with a reference cast
    208                 if ( ! dynamic_cast< CastExpr * >( lhsAlt.expr ) ) {
    209                         lhsAlt.expr = new CastExpr( lhsAlt.expr, new ReferenceType( Type::Qualifiers(), lhsAlt.expr->get_result()->clone() ) );
    210                 }
    211 
    212                 // explode the lhs so that each field of the tuple-valued-expr is assigned.
    213                 explode( lhsAlt, spotter.currentFinder.get_indexer(), back_inserter(lhs), true );
    214 
    215                 for ( ResolvExpr::Alternative & alt : lhs ) {
    216                         // every LHS value must be a reference - some come in with a cast expression, if it doesn't just cast to reference here.
    217                         if ( ! dynamic_cast< ReferenceType * >( alt.expr->get_result() ) ) {
    218                                 alt.expr = new CastExpr( alt.expr, new ReferenceType( Type::Qualifiers(), alt.expr->get_result()->clone() ) );
    219                         }
    220                 }
    221         }
    222 
    223         TupleAssignSpotter::MassAssignMatcher::MassAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList & alts ) : Matcher( spotter, alts ) {
    224                 assert( alts.size() == 1 || alts.size() == 2 );
    225                 if ( alts.size() == 2 ) {
    226                         rhs.push_back( alts.back() );
    227                 }
    228         }
    229 
    230         TupleAssignSpotter::MultipleAssignMatcher::MultipleAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList & alts ) : Matcher( spotter, alts ) {
    231                 // explode the rhs so that each field of the tuple-valued-expr is assigned.
    232                 explode( std::next(alts.begin(), 1), alts.end(), spotter.currentFinder.get_indexer(), back_inserter(rhs), true );
     253                currentFinder.get_alternatives().push_front( ResolvExpr::Alternative(
     254                        new TupleAssignExpr(solved_assigns, matcher->tmpDecls), matcher->compositeEnv,
     255                        ResolvExpr::sumCost( current ) + matcher->baseCost ) );
     256        }
     257
     258        TupleAssignSpotter::Matcher::Matcher( TupleAssignSpotter &spotter,
     259                const ResolvExpr::AltList &lhs, const ResolvExpr::AltList &rhs )
     260        : lhs(lhs), rhs(rhs), spotter(spotter),
     261          baseCost( ResolvExpr::sumCost( lhs ) + ResolvExpr::sumCost( rhs ) ) {
     262                simpleCombineEnvironments( lhs.begin(), lhs.end(), compositeEnv );
     263                simpleCombineEnvironments( rhs.begin(), rhs.end(), compositeEnv );
    233264        }
    234265
  • src/Tuples/Tuples.h

    rf5c3b6c r0fe4e62  
    1717
    1818#include <string>
     19#include <vector>
    1920
    2021#include "SynTree/Expression.h"
     
    2627namespace Tuples {
    2728        // TupleAssignment.cc
    28         void handleTupleAssignment( ResolvExpr::AlternativeFinder & currentFinder, UntypedExpr * assign, const std::list<ResolvExpr::AltList> & possibilities );
    29 
     29        void handleTupleAssignment( ResolvExpr::AlternativeFinder & currentFinder, UntypedExpr * assign,
     30                std::vector< ResolvExpr::AlternativeFinder >& args );
     31       
    3032        // TupleExpansion.cc
    3133        /// expands z.[a, b.[x, y], c] into [z.a, z.b.x, z.b.y, z.c], inserting UniqueExprs as appropriate
  • src/benchmark/Makefile.am

    rf5c3b6c r0fe4e62  
    2323STATS    = ${TOOLSDIR}stat.py
    2424repeats  = 30
     25TIME_FORMAT = "%E"
     26PRINT_FORMAT = '%20s\t'
    2527
    2628.NOTPARALLEL:
     
    2830noinst_PROGRAMS =
    2931
    30 bench$(EXEEXT) :
    31         @for ccflags in "-debug" "-nodebug"; do \
    32                 echo ${CC} ${AM_CFLAGS} ${CFLAGS} ${ccflags} @CFA_FLAGS@ -lrt bench.c;\
    33                 ${CC} ${AM_CFLAGS} ${CFLAGS} $${ccflags} -lrt bench.c;\
    34                 ./a.out ; \
    35         done ; \
    36         rm -f ./a.out ;
    37 
    38 csv-data$(EXEEXT):
    39         @${CC} ${AM_CFLAGS} ${CFLAGS} ${ccflags} @CFA_FLAGS@ -nodebug -lrt -quiet -DN=50000000 csv-data.c
    40         @./a.out
    41         @rm -f ./a.out
    42 
    43 ## =========================================================================================================
    44 ctxswitch$(EXEEXT): \
    45         ctxswitch-pthread.run           \
    46         ctxswitch-cfa_coroutine.run     \
    47         ctxswitch-cfa_thread.run        \
    48         ctxswitch-upp_coroutine.run     \
    49         ctxswitch-upp_thread.run
    50 
    51 ctxswitch-cfa_coroutine$(EXEEXT):
    52         ${CC}        ctxswitch/cfa_cor.c   -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    53 
    54 ctxswitch-cfa_thread$(EXEEXT):
    55         ${CC}        ctxswitch/cfa_thrd.c  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    56 
    57 ctxswitch-upp_coroutine$(EXEEXT):
    58         u++          ctxswitch/upp_cor.cc  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    59 
    60 ctxswitch-upp_thread$(EXEEXT):
    61         u++          ctxswitch/upp_thrd.cc -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    62 
    63 ctxswitch-pthread$(EXEEXT):
    64         @BACKEND_CC@ ctxswitch/pthreads.c  -DBENCH_N=50000000  -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    65 
    66 ## =========================================================================================================
    67 creation$(EXEEXT) :\
    68         creation-pthread.run            \
    69         creation-cfa_coroutine.run      \
    70         creation-cfa_thread.run         \
    71         creation-upp_coroutine.run      \
    72         creation-upp_thread.run
    73 
    74 creation-cfa_coroutine$(EXEEXT):
    75         ${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    76 
    77 creation-cfa_thread$(EXEEXT):
    78         ${CC}        creation/cfa_thrd.c  -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    79 
    80 creation-upp_coroutine$(EXEEXT):
    81         u++          creation/upp_cor.cc  -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    82 
    83 creation-upp_thread$(EXEEXT):
    84         u++          creation/upp_thrd.cc -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    85 
    86 creation-pthread$(EXEEXT):
    87         @BACKEND_CC@ creation/pthreads.c  -DBENCH_N=250000     -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    88 
    89 ## =========================================================================================================
    90 mutex$(EXEEXT) :\
    91         mutex-function.run      \
    92         mutex-pthread_lock.run  \
    93         mutex-upp.run           \
    94         mutex-cfa1.run          \
    95         mutex-cfa2.run          \
    96         mutex-cfa4.run
    97 
    98 mutex-function$(EXEEXT):
    99         @BACKEND_CC@ mutex/function.c    -DBENCH_N=500000000   -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    100 
    101 mutex-pthread_lock$(EXEEXT):
    102         @BACKEND_CC@ mutex/pthreads.c    -DBENCH_N=50000000    -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    103 
    104 mutex-upp$(EXEEXT):
    105         u++          mutex/upp.cc        -DBENCH_N=50000000    -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    106 
    107 mutex-cfa1$(EXEEXT):
    108         ${CC}        mutex/cfa1.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    109 
    110 mutex-cfa2$(EXEEXT):
    111         ${CC}        mutex/cfa2.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    112 
    113 mutex-cfa4$(EXEEXT):
    114         ${CC}        mutex/cfa4.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    115 
    116 ## =========================================================================================================
    117 signal$(EXEEXT) :\
    118         signal-upp.run          \
    119         signal-cfa1.run         \
    120         signal-cfa2.run         \
    121         signal-cfa4.run
    122 
    123 signal-upp$(EXEEXT):
    124         u++          schedint/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    125 
    126 signal-cfa1$(EXEEXT):
    127         ${CC}        schedint/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    128 
    129 signal-cfa2$(EXEEXT):
    130         ${CC}        schedint/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    131 
    132 signal-cfa4$(EXEEXT):
    133         ${CC}        schedint/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    134 
    135 ## =========================================================================================================
    136 waitfor$(EXEEXT) :\
    137         waitfor-upp.run         \
    138         waitfor-cfa1.run                \
    139         waitfor-cfa2.run                \
    140         waitfor-cfa4.run
    141 
    142 waitfor-upp$(EXEEXT):
    143         u++          schedext/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    144 
    145 waitfor-cfa1$(EXEEXT):
    146         ${CC}        schedext/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    147 
    148 waitfor-cfa2$(EXEEXT):
    149         ${CC}        schedext/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    150 
    151 waitfor-cfa4$(EXEEXT):
    152         ${CC}        schedext/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    153 
    154 ## =========================================================================================================
     32all : ctxswitch$(EXEEXT) mutex$(EXEEXT) signal$(EXEEXT) waitfor$(EXEEXT) creation$(EXEEXT)
    15533
    15634%.run : %$(EXEEXT) ${REPEAT}
     
    16341        @rm -f a.out .result.log
    16442
     43%.runquiet :
     44        @+make $(basename $@)
     45        @./a.out
     46        @rm -f a.out
     47
     48%.make :
     49        @printf "${PRINT_FORMAT}" $(basename $(subst compile-,,$@))
     50        @+/usr/bin/time -f ${TIME_FORMAT} make $(basename $@) 2>&1
     51
    16552${REPEAT} :
    16653        @+make -C ${TOOLSDIR} repeat
     54
     55## =========================================================================================================
     56
     57jenkins$(EXEEXT):
     58        @echo "{"
     59        @echo -e '\t"githash": "'${githash}'",'
     60        @echo -e '\t"arch": "'   ${arch}   '",'
     61        @echo -e '\t"compile": {'
     62        @+make compile TIME_FORMAT='%e,' PRINT_FORMAT='\t\t\"%s\" :'
     63        @echo -e '\t\t"dummy" : {}'
     64        @echo -e '\t},'
     65        @echo -e '\t"ctxswitch": {'
     66        @echo -en '\t\t"coroutine":'
     67        @+make ctxswitch-cfa_coroutine.runquiet
     68        @echo -en '\t\t,"thread":'
     69        @+make ctxswitch-cfa_thread.runquiet
     70        @echo -e '\t},'
     71        @echo -e '\t"mutex": ['
     72        @echo -en '\t\t'
     73        @+make mutex-cfa1.runquiet
     74        @echo -en '\t\t,'
     75        @+make mutex-cfa2.runquiet
     76        @echo -e '\t],'
     77        @echo -e '\t"scheduling": ['
     78        @echo -en '\t\t'
     79        @+make signal-cfa1.runquiet
     80        @echo -en '\t\t,'
     81        @+make signal-cfa2.runquiet
     82        @echo -en '\t\t,'
     83        @+make waitfor-cfa1.runquiet
     84        @echo -en '\t\t,'
     85        @+make waitfor-cfa2.runquiet
     86        @echo -e '\n\t],'
     87        @echo -e '\t"epoch": ' $(shell date +%s)
     88        @echo "}"
     89
     90## =========================================================================================================
     91ctxswitch$(EXEEXT): \
     92        ctxswitch-pthread.run           \
     93        ctxswitch-cfa_coroutine.run     \
     94        ctxswitch-cfa_thread.run        \
     95        ctxswitch-upp_coroutine.run     \
     96        ctxswitch-upp_thread.run
     97
     98ctxswitch-cfa_coroutine$(EXEEXT):
     99        @${CC}        ctxswitch/cfa_cor.c   -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     100
     101ctxswitch-cfa_thread$(EXEEXT):
     102        @${CC}        ctxswitch/cfa_thrd.c  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     103
     104ctxswitch-upp_coroutine$(EXEEXT):
     105        @u++          ctxswitch/upp_cor.cc  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     106
     107ctxswitch-upp_thread$(EXEEXT):
     108        @u++          ctxswitch/upp_thrd.cc -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     109
     110ctxswitch-pthread$(EXEEXT):
     111        @@BACKEND_CC@ ctxswitch/pthreads.c  -DBENCH_N=50000000  -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     112
     113## =========================================================================================================
     114mutex$(EXEEXT) :\
     115        mutex-function.run      \
     116        mutex-pthread_lock.run  \
     117        mutex-upp.run           \
     118        mutex-cfa1.run          \
     119        mutex-cfa2.run          \
     120        mutex-cfa4.run
     121
     122mutex-function$(EXEEXT):
     123        @@BACKEND_CC@ mutex/function.c    -DBENCH_N=500000000   -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     124
     125mutex-pthread_lock$(EXEEXT):
     126        @@BACKEND_CC@ mutex/pthreads.c    -DBENCH_N=50000000    -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     127
     128mutex-upp$(EXEEXT):
     129        @u++          mutex/upp.cc        -DBENCH_N=50000000    -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     130
     131mutex-cfa1$(EXEEXT):
     132        @${CC}        mutex/cfa1.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     133
     134mutex-cfa2$(EXEEXT):
     135        @${CC}        mutex/cfa2.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     136
     137mutex-cfa4$(EXEEXT):
     138        @${CC}        mutex/cfa4.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     139
     140## =========================================================================================================
     141signal$(EXEEXT) :\
     142        signal-upp.run          \
     143        signal-cfa1.run         \
     144        signal-cfa2.run         \
     145        signal-cfa4.run
     146
     147signal-upp$(EXEEXT):
     148        @u++          schedint/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     149
     150signal-cfa1$(EXEEXT):
     151        @${CC}        schedint/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     152
     153signal-cfa2$(EXEEXT):
     154        @${CC}        schedint/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     155
     156signal-cfa4$(EXEEXT):
     157        @${CC}        schedint/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     158
     159## =========================================================================================================
     160waitfor$(EXEEXT) :\
     161        waitfor-upp.run         \
     162        waitfor-cfa1.run                \
     163        waitfor-cfa2.run                \
     164        waitfor-cfa4.run
     165
     166waitfor-upp$(EXEEXT):
     167        @u++          schedext/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     168
     169waitfor-cfa1$(EXEEXT):
     170        @${CC}        schedext/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     171
     172waitfor-cfa2$(EXEEXT):
     173        @${CC}        schedext/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     174
     175waitfor-cfa4$(EXEEXT):
     176        @${CC}        schedext/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     177
     178## =========================================================================================================
     179creation$(EXEEXT) :\
     180        creation-pthread.run                    \
     181        creation-cfa_coroutine.run              \
     182        creation-cfa_coroutine_eager.run        \
     183        creation-cfa_thread.run                 \
     184        creation-upp_coroutine.run              \
     185        creation-upp_thread.run
     186
     187creation-cfa_coroutine$(EXEEXT):
     188        @${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     189
     190creation-cfa_coroutine_eager$(EXEEXT):
     191        @${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags} -DEAGER
     192
     193creation-cfa_thread$(EXEEXT):
     194        @${CC}        creation/cfa_thrd.c  -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     195
     196creation-upp_coroutine$(EXEEXT):
     197        @u++          creation/upp_cor.cc  -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     198
     199creation-upp_thread$(EXEEXT):
     200        @u++          creation/upp_thrd.cc -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     201
     202creation-pthread$(EXEEXT):
     203        @@BACKEND_CC@ creation/pthreads.c  -DBENCH_N=250000     -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     204
     205## =========================================================================================================
     206
     207compile$(EXEEXT) :\
     208        compile-array.make      \
     209        compile-attributes.make \
     210        compile-empty.make      \
     211        compile-expression.make \
     212        compile-io.make         \
     213        compile-monitor.make    \
     214        compile-operators.make  \
     215        compile-typeof.make
     216
     217
     218compile-array$(EXEEXT):
     219        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/array.c
     220
     221compile-attributes$(EXEEXT):
     222        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/attributes.c
     223
     224compile-empty$(EXEEXT):
     225        @${CC} -nodebug -quiet -fsyntax-only -w compile/empty.c
     226
     227compile-expression$(EXEEXT):
     228        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/expression.c
     229
     230compile-io$(EXEEXT):
     231        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/io.c
     232
     233compile-monitor$(EXEEXT):
     234        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/monitor.c
     235
     236compile-operators$(EXEEXT):
     237        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/operators.c
     238
     239compile-thread$(EXEEXT):
     240        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/thread.c
     241
     242compile-typeof$(EXEEXT):
     243        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/typeof.c
     244
  • src/benchmark/Makefile.in

    rf5c3b6c r0fe4e62  
    124124  esac
    125125am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
    126 am__DIST_COMMON = $(srcdir)/Makefile.in
     126am__DIST_COMMON = $(srcdir)/Makefile.in compile
    127127DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
    128128ACLOCAL = @ACLOCAL@
     
    253253STATS = ${TOOLSDIR}stat.py
    254254repeats = 30
     255TIME_FORMAT = "%E"
     256PRINT_FORMAT = '%20s\t'
    255257all: all-am
    256258
     
    444446.NOTPARALLEL:
    445447
    446 bench$(EXEEXT) :
    447         @for ccflags in "-debug" "-nodebug"; do \
    448                 echo ${CC} ${AM_CFLAGS} ${CFLAGS} ${ccflags} @CFA_FLAGS@ -lrt bench.c;\
    449                 ${CC} ${AM_CFLAGS} ${CFLAGS} $${ccflags} -lrt bench.c;\
    450                 ./a.out ; \
    451         done ; \
    452         rm -f ./a.out ;
    453 
    454 csv-data$(EXEEXT):
    455         @${CC} ${AM_CFLAGS} ${CFLAGS} ${ccflags} @CFA_FLAGS@ -nodebug -lrt -quiet -DN=50000000 csv-data.c
    456         @./a.out
    457         @rm -f ./a.out
    458 
    459 ctxswitch$(EXEEXT): \
    460         ctxswitch-pthread.run           \
    461         ctxswitch-cfa_coroutine.run     \
    462         ctxswitch-cfa_thread.run        \
    463         ctxswitch-upp_coroutine.run     \
    464         ctxswitch-upp_thread.run
    465 
    466 ctxswitch-cfa_coroutine$(EXEEXT):
    467         ${CC}        ctxswitch/cfa_cor.c   -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    468 
    469 ctxswitch-cfa_thread$(EXEEXT):
    470         ${CC}        ctxswitch/cfa_thrd.c  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    471 
    472 ctxswitch-upp_coroutine$(EXEEXT):
    473         u++          ctxswitch/upp_cor.cc  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    474 
    475 ctxswitch-upp_thread$(EXEEXT):
    476         u++          ctxswitch/upp_thrd.cc -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    477 
    478 ctxswitch-pthread$(EXEEXT):
    479         @BACKEND_CC@ ctxswitch/pthreads.c  -DBENCH_N=50000000  -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    480 
    481 creation$(EXEEXT) :\
    482         creation-pthread.run            \
    483         creation-cfa_coroutine.run      \
    484         creation-cfa_thread.run         \
    485         creation-upp_coroutine.run      \
    486         creation-upp_thread.run
    487 
    488 creation-cfa_coroutine$(EXEEXT):
    489         ${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    490 
    491 creation-cfa_thread$(EXEEXT):
    492         ${CC}        creation/cfa_thrd.c  -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    493 
    494 creation-upp_coroutine$(EXEEXT):
    495         u++          creation/upp_cor.cc  -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    496 
    497 creation-upp_thread$(EXEEXT):
    498         u++          creation/upp_thrd.cc -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    499 
    500 creation-pthread$(EXEEXT):
    501         @BACKEND_CC@ creation/pthreads.c  -DBENCH_N=250000     -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    502 
    503 mutex$(EXEEXT) :\
    504         mutex-function.run      \
    505         mutex-pthread_lock.run  \
    506         mutex-upp.run           \
    507         mutex-cfa1.run          \
    508         mutex-cfa2.run          \
    509         mutex-cfa4.run
    510 
    511 mutex-function$(EXEEXT):
    512         @BACKEND_CC@ mutex/function.c    -DBENCH_N=500000000   -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    513 
    514 mutex-pthread_lock$(EXEEXT):
    515         @BACKEND_CC@ mutex/pthreads.c    -DBENCH_N=50000000    -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    516 
    517 mutex-upp$(EXEEXT):
    518         u++          mutex/upp.cc        -DBENCH_N=50000000    -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    519 
    520 mutex-cfa1$(EXEEXT):
    521         ${CC}        mutex/cfa1.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    522 
    523 mutex-cfa2$(EXEEXT):
    524         ${CC}        mutex/cfa2.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    525 
    526 mutex-cfa4$(EXEEXT):
    527         ${CC}        mutex/cfa4.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    528 
    529 signal$(EXEEXT) :\
    530         signal-upp.run          \
    531         signal-cfa1.run         \
    532         signal-cfa2.run         \
    533         signal-cfa4.run
    534 
    535 signal-upp$(EXEEXT):
    536         u++          schedint/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    537 
    538 signal-cfa1$(EXEEXT):
    539         ${CC}        schedint/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    540 
    541 signal-cfa2$(EXEEXT):
    542         ${CC}        schedint/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    543 
    544 signal-cfa4$(EXEEXT):
    545         ${CC}        schedint/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    546 
    547 waitfor$(EXEEXT) :\
    548         waitfor-upp.run         \
    549         waitfor-cfa1.run                \
    550         waitfor-cfa2.run                \
    551         waitfor-cfa4.run
    552 
    553 waitfor-upp$(EXEEXT):
    554         u++          schedext/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    555 
    556 waitfor-cfa1$(EXEEXT):
    557         ${CC}        schedext/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    558 
    559 waitfor-cfa2$(EXEEXT):
    560         ${CC}        schedext/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    561 
    562 waitfor-cfa4$(EXEEXT):
    563         ${CC}        schedext/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     448all : ctxswitch$(EXEEXT) mutex$(EXEEXT) signal$(EXEEXT) waitfor$(EXEEXT) creation$(EXEEXT)
    564449
    565450%.run : %$(EXEEXT) ${REPEAT}
     
    572457        @rm -f a.out .result.log
    573458
     459%.runquiet :
     460        @+make $(basename $@)
     461        @./a.out
     462        @rm -f a.out
     463
     464%.make :
     465        @printf "${PRINT_FORMAT}" $(basename $(subst compile-,,$@))
     466        @+/usr/bin/time -f ${TIME_FORMAT} make $(basename $@) 2>&1
     467
    574468${REPEAT} :
    575469        @+make -C ${TOOLSDIR} repeat
     470
     471jenkins$(EXEEXT):
     472        @echo "{"
     473        @echo -e '\t"githash": "'${githash}'",'
     474        @echo -e '\t"arch": "'   ${arch}   '",'
     475        @echo -e '\t"compile": {'
     476        @+make compile TIME_FORMAT='%e,' PRINT_FORMAT='\t\t\"%s\" :'
     477        @echo -e '\t\t"dummy" : {}'
     478        @echo -e '\t},'
     479        @echo -e '\t"ctxswitch": {'
     480        @echo -en '\t\t"coroutine":'
     481        @+make ctxswitch-cfa_coroutine.runquiet
     482        @echo -en '\t\t,"thread":'
     483        @+make ctxswitch-cfa_thread.runquiet
     484        @echo -e '\t},'
     485        @echo -e '\t"mutex": ['
     486        @echo -en '\t\t'
     487        @+make mutex-cfa1.runquiet
     488        @echo -en '\t\t,'
     489        @+make mutex-cfa2.runquiet
     490        @echo -e '\t],'
     491        @echo -e '\t"scheduling": ['
     492        @echo -en '\t\t'
     493        @+make signal-cfa1.runquiet
     494        @echo -en '\t\t,'
     495        @+make signal-cfa2.runquiet
     496        @echo -en '\t\t,'
     497        @+make waitfor-cfa1.runquiet
     498        @echo -en '\t\t,'
     499        @+make waitfor-cfa2.runquiet
     500        @echo -e '\n\t],'
     501        @echo -e '\t"epoch": ' $(shell date +%s)
     502        @echo "}"
     503
     504ctxswitch$(EXEEXT): \
     505        ctxswitch-pthread.run           \
     506        ctxswitch-cfa_coroutine.run     \
     507        ctxswitch-cfa_thread.run        \
     508        ctxswitch-upp_coroutine.run     \
     509        ctxswitch-upp_thread.run
     510
     511ctxswitch-cfa_coroutine$(EXEEXT):
     512        @${CC}        ctxswitch/cfa_cor.c   -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     513
     514ctxswitch-cfa_thread$(EXEEXT):
     515        @${CC}        ctxswitch/cfa_thrd.c  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     516
     517ctxswitch-upp_coroutine$(EXEEXT):
     518        @u++          ctxswitch/upp_cor.cc  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     519
     520ctxswitch-upp_thread$(EXEEXT):
     521        @u++          ctxswitch/upp_thrd.cc -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     522
     523ctxswitch-pthread$(EXEEXT):
     524        @@BACKEND_CC@ ctxswitch/pthreads.c  -DBENCH_N=50000000  -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     525
     526mutex$(EXEEXT) :\
     527        mutex-function.run      \
     528        mutex-pthread_lock.run  \
     529        mutex-upp.run           \
     530        mutex-cfa1.run          \
     531        mutex-cfa2.run          \
     532        mutex-cfa4.run
     533
     534mutex-function$(EXEEXT):
     535        @@BACKEND_CC@ mutex/function.c    -DBENCH_N=500000000   -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     536
     537mutex-pthread_lock$(EXEEXT):
     538        @@BACKEND_CC@ mutex/pthreads.c    -DBENCH_N=50000000    -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     539
     540mutex-upp$(EXEEXT):
     541        @u++          mutex/upp.cc        -DBENCH_N=50000000    -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     542
     543mutex-cfa1$(EXEEXT):
     544        @${CC}        mutex/cfa1.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     545
     546mutex-cfa2$(EXEEXT):
     547        @${CC}        mutex/cfa2.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     548
     549mutex-cfa4$(EXEEXT):
     550        @${CC}        mutex/cfa4.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     551
     552signal$(EXEEXT) :\
     553        signal-upp.run          \
     554        signal-cfa1.run         \
     555        signal-cfa2.run         \
     556        signal-cfa4.run
     557
     558signal-upp$(EXEEXT):
     559        @u++          schedint/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     560
     561signal-cfa1$(EXEEXT):
     562        @${CC}        schedint/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     563
     564signal-cfa2$(EXEEXT):
     565        @${CC}        schedint/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     566
     567signal-cfa4$(EXEEXT):
     568        @${CC}        schedint/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     569
     570waitfor$(EXEEXT) :\
     571        waitfor-upp.run         \
     572        waitfor-cfa1.run                \
     573        waitfor-cfa2.run                \
     574        waitfor-cfa4.run
     575
     576waitfor-upp$(EXEEXT):
     577        @u++          schedext/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     578
     579waitfor-cfa1$(EXEEXT):
     580        @${CC}        schedext/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     581
     582waitfor-cfa2$(EXEEXT):
     583        @${CC}        schedext/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     584
     585waitfor-cfa4$(EXEEXT):
     586        @${CC}        schedext/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     587
     588creation$(EXEEXT) :\
     589        creation-pthread.run                    \
     590        creation-cfa_coroutine.run              \
     591        creation-cfa_coroutine_eager.run        \
     592        creation-cfa_thread.run                 \
     593        creation-upp_coroutine.run              \
     594        creation-upp_thread.run
     595
     596creation-cfa_coroutine$(EXEEXT):
     597        @${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     598
     599creation-cfa_coroutine_eager$(EXEEXT):
     600        @${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags} -DEAGER
     601
     602creation-cfa_thread$(EXEEXT):
     603        @${CC}        creation/cfa_thrd.c  -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     604
     605creation-upp_coroutine$(EXEEXT):
     606        @u++          creation/upp_cor.cc  -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     607
     608creation-upp_thread$(EXEEXT):
     609        @u++          creation/upp_thrd.cc -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     610
     611creation-pthread$(EXEEXT):
     612        @@BACKEND_CC@ creation/pthreads.c  -DBENCH_N=250000     -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     613
     614compile$(EXEEXT) :\
     615        compile-array.make      \
     616        compile-attributes.make \
     617        compile-empty.make      \
     618        compile-expression.make \
     619        compile-io.make         \
     620        compile-monitor.make    \
     621        compile-operators.make  \
     622        compile-typeof.make
     623
     624compile-array$(EXEEXT):
     625        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/array.c
     626
     627compile-attributes$(EXEEXT):
     628        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/attributes.c
     629
     630compile-empty$(EXEEXT):
     631        @${CC} -nodebug -quiet -fsyntax-only -w compile/empty.c
     632
     633compile-expression$(EXEEXT):
     634        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/expression.c
     635
     636compile-io$(EXEEXT):
     637        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/io.c
     638
     639compile-monitor$(EXEEXT):
     640        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/monitor.c
     641
     642compile-operators$(EXEEXT):
     643        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/operators.c
     644
     645compile-thread$(EXEEXT):
     646        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/thread.c
     647
     648compile-typeof$(EXEEXT):
     649        @${CC} -nodebug -quiet -fsyntax-only -w ../tests/typeof.c
    576650
    577651# Tell versions [3.59,3.63) of GNU make to not export all variables.
  • src/benchmark/creation/cfa_cor.c

    rf5c3b6c r0fe4e62  
    55
    66coroutine MyCoroutine {};
    7 void ?{} (MyCoroutine & this) { prime(this); }
     7void ?{} (MyCoroutine & this) {
     8#ifdef EAGER
     9        prime(this);
     10#endif
     11}
    812void main(MyCoroutine & this) {}
    913
  • src/benchmark/csv-data.c

    rf5c3b6c r0fe4e62  
    2828// coroutine context switch
    2929long long int measure_coroutine() {
    30         const unsigned int NoOfTimes = N;
     30        const unsigned int NoOfTimes = 50000000;
    3131        long long int StartTime, EndTime;
    3232
     
    4343// thread context switch
    4444long long int measure_thread() {
    45         const unsigned int NoOfTimes = N;
     45        const unsigned int NoOfTimes = 50000000;
    4646        long long int StartTime, EndTime;
    4747
     
    6161
    6262long long int measure_1_monitor_entry() {
    63         const unsigned int NoOfTimes = N;
     63        const unsigned int NoOfTimes = 5000000;
    6464        long long int StartTime, EndTime;
    6565        mon_t mon;
     
    7979
    8080long long int measure_2_monitor_entry() {
    81         const unsigned int NoOfTimes = N;
     81        const unsigned int NoOfTimes = 5000000;
    8282        long long int StartTime, EndTime;
    8383        mon_t mon1, mon2;
     
    9494//-----------------------------------------------------------------------------
    9595// single internal sched entry
     96const unsigned int NoOfTimes = 500000;
     97
    9698mon_t mon1;
    9799
     
    107109
    108110void side1A( mon_t & mutex a, long long int * out ) {
    109         long long int StartTime, EndTime;
    110 
    111         StartTime = Time();
    112         for( int i = 0;; i++ ) {
    113                 signal(&cond1a);
     111        const unsigned int NoOfTimes = 500000;
     112        long long int StartTime, EndTime;
     113
     114        StartTime = Time();
     115        for( int i = 0;; i++ ) {
     116                signal(cond1a);
     117                if( i > NoOfTimes ) break;
     118                wait(cond1b);
     119        }
     120        EndTime = Time();
     121
     122        *out = ( EndTime - StartTime ) / NoOfTimes;
     123}
     124
     125void side1B( mon_t & mutex a ) {
     126        for( int i = 0;; i++ ) {
     127                signal(cond1b);
    114128                if( i > N ) break;
    115                 wait(&cond1b);
    116         }
    117         EndTime = Time();
    118 
    119         *out = ( EndTime - StartTime ) / N;
    120 }
    121 
    122 void side1B( mon_t & mutex a ) {
    123         for( int i = 0;; i++ ) {
    124                 signal(&cond1b);
    125                 if( i > N ) break;
    126                 wait(&cond1a);
     129                wait(cond1a);
    127130        }
    128131}
     
    141144
    142145//-----------------------------------------------------------------------------
    143 // multi internal sched entry
     146// multi internal sched
    144147mon_t mon2;
    145148
     
    155158
    156159void side2A( mon_t & mutex a, mon_t & mutex b, long long int * out ) {
    157         long long int StartTime, EndTime;
    158 
    159         StartTime = Time();
    160         for( int i = 0;; i++ ) {
    161                 signal(&cond2a);
     160        const unsigned int NoOfTimes = 500000;
     161        long long int StartTime, EndTime;
     162
     163        StartTime = Time();
     164        for( int i = 0;; i++ ) {
     165                signal(cond2a);
     166                if( i > NoOfTimes ) break;
     167                wait(cond2b);
     168        }
     169        EndTime = Time();
     170
     171        *out = ( EndTime - StartTime ) / NoOfTimes;
     172}
     173
     174void side2B( mon_t & mutex a, mon_t & mutex b ) {
     175        for( int i = 0;; i++ ) {
     176                signal(cond2b);
    162177                if( i > N ) break;
    163                 wait(&cond2b);
    164         }
    165         EndTime = Time();
    166 
    167         *out = ( EndTime - StartTime ) / N;
    168 }
    169 
    170 void side2B( mon_t & mutex a, mon_t & mutex b ) {
    171         for( int i = 0;; i++ ) {
    172                 signal(&cond2b);
    173                 if( i > N ) break;
    174                 wait(&cond2a);
     178                wait(cond2a);
    175179        }
    176180}
     
    189193
    190194//-----------------------------------------------------------------------------
     195// single external sched
     196
     197volatile int go = 0;
     198
     199void __attribute__((noinline)) call( mon_t & mutex m1 ) {}
     200
     201long long int  __attribute__((noinline)) wait( mon_t & mutex m1 ) {
     202        go = 1;
     203        const unsigned int NoOfTimes = 5000000;
     204        long long int StartTime, EndTime;
     205
     206        StartTime = Time();
     207        for (size_t i = 0; i < NoOfTimes; i++) {
     208                waitfor(call, m1);
     209        }
     210
     211        EndTime = Time();
     212        go = 0;
     213        return ( EndTime - StartTime ) / NoOfTimes;
     214}
     215
     216thread thrd3 {};
     217void ^?{}( thrd3 & mutex this ) {}
     218void main( thrd3 & this ) {
     219        while(go == 0) { yield(); }
     220        while(go == 1) { call(mon1); }
     221
     222}
     223
     224long long int measure_1_sched_ext() {
     225        go = 0;
     226        thrd3 t;
     227        return wait(mon1);
     228}
     229
     230//-----------------------------------------------------------------------------
     231// multi external sched
     232
     233void __attribute__((noinline)) call( mon_t & mutex m1, mon_t & mutex m2 ) {}
     234
     235long long int  __attribute__((noinline)) wait( mon_t & mutex m1, mon_t & mutex m2 ) {
     236        go = 1;
     237        const unsigned int NoOfTimes = 5000000;
     238        long long int StartTime, EndTime;
     239
     240        StartTime = Time();
     241        for (size_t i = 0; i < NoOfTimes; i++) {
     242                waitfor(call, m1, m2);
     243        }
     244
     245        EndTime = Time();
     246        go = 0;
     247        return ( EndTime - StartTime ) / NoOfTimes;
     248}
     249
     250thread thrd4 {};
     251void ^?{}( thrd4 & mutex this ) {}
     252void main( thrd4 & this ) {
     253        while(go == 0) { yield(); }
     254        while(go == 1) { call(mon1, mon2); }
     255
     256}
     257
     258long long int measure_2_sched_ext() {
     259        go = 0;
     260        thrd3 t;
     261        return wait(mon1, mon2);
     262}
     263
     264//-----------------------------------------------------------------------------
    191265// main loop
    192266int main()
    193267{
    194         sout | time(NULL) | ',';
    195         sout | measure_coroutine() | ',';
    196         sout | measure_thread() | ',';
    197         sout | measure_1_monitor_entry() | ',';
    198         sout | measure_2_monitor_entry() | ',';
    199         sout | measure_1_sched_int() | ',';
    200         sout | measure_2_sched_int() | endl;
    201 }
     268        sout | "\tepoch:" | time(NULL) | ',' | endl;
     269        sout | "\tctxswitch: {" | endl;
     270        sout | "\t\tcoroutine: "| measure_coroutine() | ',' | endl;
     271        sout | "\t\tthread:" | measure_thread() | ',' | endl;
     272        sout | "\t}," | endl;
     273        sout | "\tmutex: ["     | measure_1_monitor_entry()     | ',' | measure_2_monitor_entry()       | "]," | endl;
     274        sout | "\tscheduling: ["| measure_1_sched_int()         | ',' | measure_2_sched_int()   | ','  |
     275                                          measure_1_sched_ext()         | ',' | measure_2_sched_ext()   | "]," | endl;
     276}
  • src/benchmark/schedint/cfa1.c

    rf5c3b6c r0fe4e62  
    1515
    1616void __attribute__((noinline)) call( M & mutex a1 ) {
    17         signal(&c);
     17        signal(c);
    1818}
    1919
     
    2222        BENCH(
    2323                for (size_t i = 0; i < n; i++) {
    24                         wait(&c);
     24                        wait(c);
    2525                },
    2626                result
  • src/benchmark/schedint/cfa2.c

    rf5c3b6c r0fe4e62  
    1515
    1616void __attribute__((noinline)) call( M & mutex a1, M & mutex a2 ) {
    17         signal(&c);
     17        signal(c);
    1818}
    1919
     
    2222        BENCH(
    2323                for (size_t i = 0; i < n; i++) {
    24                         wait(&c);
     24                        wait(c);
    2525                },
    2626                result
  • src/benchmark/schedint/cfa4.c

    rf5c3b6c r0fe4e62  
    1515
    1616void __attribute__((noinline)) call( M & mutex a1, M & mutex a2, M & mutex a3, M & mutex a4 ) {
    17         signal(&c);
     17        signal(c);
    1818}
    1919
     
    2222        BENCH(
    2323                for (size_t i = 0; i < n; i++) {
    24                         wait(&c);
     24                        wait(c);
    2525                },
    2626                result
  • src/libcfa/Makefile.am

    rf5c3b6c r0fe4e62  
    9595
    9696cfa_includedir = $(CFA_INCDIR)
    97 nobase_cfa_include_HEADERS = ${headers} ${stdhdr} math gmp concurrency/invoke.h
     97nobase_cfa_include_HEADERS =    \
     98        ${headers}                      \
     99        ${stdhdr}                       \
     100        math                            \
     101        gmp                             \
     102        bits/defs.h             \
     103        bits/locks.h            \
     104        concurrency/invoke.h    \
     105        libhdr.h                        \
     106        libhdr/libalign.h       \
     107        libhdr/libdebug.h       \
     108        libhdr/libtools.h
    98109
    99110CLEANFILES = libcfa-prelude.c
  • src/libcfa/Makefile.in

    rf5c3b6c r0fe4e62  
    264264        containers/result containers/vector concurrency/coroutine \
    265265        concurrency/thread concurrency/kernel concurrency/monitor \
    266         ${shell echo stdhdr/*} math gmp concurrency/invoke.h
     266        ${shell echo stdhdr/*} math gmp bits/defs.h bits/locks.h \
     267        concurrency/invoke.h libhdr.h libhdr/libalign.h \
     268        libhdr/libdebug.h libhdr/libtools.h
    267269HEADERS = $(nobase_cfa_include_HEADERS)
    268270am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
     
    430432stdhdr = ${shell echo stdhdr/*}
    431433cfa_includedir = $(CFA_INCDIR)
    432 nobase_cfa_include_HEADERS = ${headers} ${stdhdr} math gmp concurrency/invoke.h
     434nobase_cfa_include_HEADERS = \
     435        ${headers}                      \
     436        ${stdhdr}                       \
     437        math                            \
     438        gmp                             \
     439        bits/defs.h             \
     440        bits/locks.h            \
     441        concurrency/invoke.h    \
     442        libhdr.h                        \
     443        libhdr/libalign.h       \
     444        libhdr/libdebug.h       \
     445        libhdr/libtools.h
     446
    433447CLEANFILES = libcfa-prelude.c
    434448all: all-am
  • src/libcfa/concurrency/alarm.c

    rf5c3b6c r0fe4e62  
    186186
    187187        disable_interrupts();
    188         lock( &event_kernel->lock DEBUG_CTX2 );
     188        lock( event_kernel->lock DEBUG_CTX2 );
    189189        {
    190190                verify( validate( alarms ) );
     
    196196                }
    197197        }
    198         unlock( &event_kernel->lock );
     198        unlock( event_kernel->lock );
    199199        this->set = true;
    200200        enable_interrupts( DEBUG_CTX );
     
    203203void unregister_self( alarm_node_t * this ) {
    204204        disable_interrupts();
    205         lock( &event_kernel->lock DEBUG_CTX2 );
     205        lock( event_kernel->lock DEBUG_CTX2 );
    206206        {
    207207                verify( validate( &event_kernel->alarms ) );
    208208                remove( &event_kernel->alarms, this );
    209209        }
    210         unlock( &event_kernel->lock );
     210        unlock( event_kernel->lock );
    211211        enable_interrupts( DEBUG_CTX );
    212212        this->set = false;
  • src/libcfa/concurrency/coroutine.c

    rf5c3b6c r0fe4e62  
    156156                this->limit = (char *)libCeiling( (unsigned long)this->storage, 16 ); // minimum alignment
    157157        } // if
    158         assertf( this->size >= MinStackSize, "Stack size %d provides less than minimum of %d bytes for a stack.", this->size, MinStackSize );
     158        assertf( this->size >= MinStackSize, "Stack size %zd provides less than minimum of %d bytes for a stack.", this->size, MinStackSize );
    159159
    160160        this->base = (char *)this->limit + this->size;
  • src/libcfa/concurrency/invoke.h

    rf5c3b6c r0fe4e62  
    1414//
    1515
    16 #include <stdbool.h>
    17 #include <stdint.h>
     16#include "bits/defs.h"
     17#include "bits/locks.h"
    1818
    1919#ifdef __CFORALL__
     
    2525#define _INVOKE_H_
    2626
    27       #define unlikely(x)    __builtin_expect(!!(x), 0)
    28       #define thread_local _Thread_local
    29 
    30       typedef void (*fptr_t)();
    31 
    32       struct spinlock {
    33             volatile int lock;
    34             #ifdef __CFA_DEBUG__
    35                   const char * prev_name;
    36                   void* prev_thrd;
    37             #endif
    38       };
    39 
    40       struct __thread_queue_t {
    41             struct thread_desc * head;
    42             struct thread_desc ** tail;
    43       };
    44 
    45       struct __condition_stack_t {
    46             struct __condition_criterion_t * top;
    47       };
    48 
    49       #ifdef __CFORALL__
    50       extern "Cforall" {
    51             void ?{}( struct __thread_queue_t & );
    52             void append( struct __thread_queue_t *, struct thread_desc * );
    53             struct thread_desc * pop_head( struct __thread_queue_t * );
    54             struct thread_desc * remove( struct __thread_queue_t *, struct thread_desc ** );
    55 
    56             void ?{}( struct __condition_stack_t & );
    57             void push( struct __condition_stack_t *, struct __condition_criterion_t * );
    58             struct __condition_criterion_t * pop( struct __condition_stack_t * );
    59 
    60             void ?{}(spinlock & this);
    61             void ^?{}(spinlock & this);
    62       }
    63       #endif
    64 
    65       struct coStack_t {
    66             unsigned int size;                        // size of stack
    67             void *storage;                            // pointer to stack
    68             void *limit;                              // stack grows towards stack limit
    69             void *base;                               // base of stack
    70             void *context;                            // address of cfa_context_t
    71             void *top;                                // address of top of storage
    72             bool userStack;                           // whether or not the user allocated the stack
    73       };
    74 
    75       enum coroutine_state { Halted, Start, Inactive, Active, Primed };
    76 
    77       struct coroutine_desc {
    78             struct coStack_t stack;                   // stack information of the coroutine
    79             const char *name;                         // textual name for coroutine/task, initialized by uC++ generated code
    80             int errno_;                               // copy of global UNIX variable errno
    81             enum coroutine_state state;               // current execution status for coroutine
    82             struct coroutine_desc * starter;          // first coroutine to resume this one
    83             struct coroutine_desc * last;             // last coroutine to resume this one
    84       };
    85 
    86       struct __waitfor_mask_t {
    87             short * accepted;                         // the index of the accepted function, -1 if none
    88             struct __acceptable_t * clauses;          // list of acceptable functions, null if any
    89             short size;                               // number of acceptable functions
    90       };
    91 
    92       struct monitor_desc {
    93             struct spinlock lock;                     // spinlock to protect internal data
    94             struct thread_desc * owner;               // current owner of the monitor
    95             struct __thread_queue_t entry_queue;      // queue of threads that are blocked waiting for the monitor
    96             struct __condition_stack_t signal_stack;  // stack of conditions to run next once we exit the monitor
    97             unsigned int recursion;                   // monitor routines can be called recursively, we need to keep track of that
    98             struct __waitfor_mask_t mask;             // mask used to know if some thread is waiting for something while holding the monitor
    99             struct __condition_node_t * dtor_node;    // node used to signal the dtor in a waitfor dtor
    100       };
    101 
    102       struct __monitor_group_t {
    103             struct monitor_desc ** list;              // currently held monitors
    104             short                  size;              // number of currently held monitors
    105             fptr_t                 func;              // last function that acquired monitors
    106       };
    107 
    108       struct thread_desc {
    109             // Core threading fields
    110             struct coroutine_desc  self_cor;          // coroutine body used to store context
    111             struct monitor_desc    self_mon;          // monitor body used for mutual exclusion
    112             struct monitor_desc *  self_mon_p;        // pointer to monitor with sufficient lifetime for current monitors
    113             struct __monitor_group_t monitors;        // monitors currently held by this thread
    114 
    115             // Link lists fields
    116             struct thread_desc * next;                // instrusive link field for threads
    117 
    118 
     27        typedef void (*fptr_t)();
     28        typedef int_fast16_t __lock_size_t;
     29
     30        struct __thread_queue_t {
     31                struct thread_desc * head;
     32                struct thread_desc ** tail;
     33        };
     34
     35        struct __condition_stack_t {
     36                struct __condition_criterion_t * top;
     37        };
     38
     39        #ifdef __CFORALL__
     40        extern "Cforall" {
     41                void ?{}( struct __thread_queue_t & );
     42                void append( struct __thread_queue_t &, struct thread_desc * );
     43                struct thread_desc * pop_head( struct __thread_queue_t & );
     44                struct thread_desc * remove( struct __thread_queue_t &, struct thread_desc ** );
     45
     46                void ?{}( struct __condition_stack_t & );
     47                void push( struct __condition_stack_t &, struct __condition_criterion_t * );
     48                struct __condition_criterion_t * pop( struct __condition_stack_t & );
     49        }
     50        #endif
     51
     52        struct coStack_t {
     53                // size of stack
     54                size_t size;
     55
     56                // pointer to stack
     57                void *storage;
     58
     59                // stack grows towards stack limit
     60                void *limit;
     61
     62                // base of stack
     63                void *base;
     64
     65                // address of cfa_context_t
     66                void *context;
     67
     68                // address of top of storage
     69