Changes in / [0fe4e62:f5c3b6c]


Ignore:
Files:
1 added
8 deleted
71 edited

Legend:

Unmodified
Added
Removed
  • Jenkinsfile

    r0fe4e62 rf5c3b6c  
    1515        arch_name               = ''
    1616        architecture    = ''
    17 
     17       
    1818        do_alltests             = false
    1919        do_benchmark    = false
     
    183183                sh 'make clean > /dev/null'
    184184                sh 'make > /dev/null 2>&1'
    185         }
     185        } 
    186186        catch (Exception caughtError) {
    187187                err = caughtError //rethrow error later
     
    257257def build() {
    258258        build_stage('Build') {
    259 
     259       
    260260                def install_dir = pwd tmp: true
    261 
     261               
    262262                //Configure the conpilation (Output is not relevant)
    263263                //Use the current directory as the installation target so nothing
     
    290290                if( !do_benchmark ) return
    291291
     292                //Write the commit id to Benchmark
     293                writeFile  file: 'bench.csv', text:'data=' + gitRefNewValue + ',' + arch_name + ','
     294 
    292295                //Append bench results
    293                 sh 'make -C src/benchmark --no-print-directory jenkins githash=' + gitRefNewValue + ' arch=' + arch_name + ' | tee bench.json'
     296                sh 'make -C src/benchmark --no-print-directory csv-data >> bench.csv'
    294297        }
    295298}
     
    324327
    325328                //Then publish the results
    326                 sh 'curl -H "Content-Type: application/json" --silent --data @bench.json http://plg2:8082/jenkins/publish > /dev/null || true'
     329                sh 'curl --silent --data @bench.csv http://plg2:8082/jenkins/publish > /dev/null || true'
    327330        }
    328331}
  • doc/proposals/concurrency/.gitignore

    r0fe4e62 rf5c3b6c  
    1616build/*.out
    1717build/*.ps
    18 build/*.pstex
    19 build/*.pstex_t
    2018build/*.tex
    2119build/*.toc
  • doc/proposals/concurrency/Makefile

    r0fe4e62 rf5c3b6c  
    1313annex/glossary \
    1414text/intro \
     15text/cforall \
    1516text/basics \
    16 text/cforall \
    1717text/concurrency \
    1818text/internals \
    1919text/parallelism \
    20 text/results \
    2120text/together \
    2221text/future \
     
    3029}}
    3130
    32 PICTURES = ${addprefix build/, ${addsuffix .pstex, \
    33         system \
    34         monitor_structs \
    35 }}
     31PICTURES = ${addsuffix .pstex, \
     32}
    3633
    3734PROGRAMS = ${addsuffix .tex, \
     
    7067        build/*.out     \
    7168        build/*.ps      \
    72         build/*.pstex   \
    7369        build/*.pstex_t \
    7470        build/*.tex     \
     
    8480        dvips $< -o $@
    8581
    86 build/${basename ${DOCUMENT}}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} ${basename ${DOCUMENT}}.tex ../../LaTeXmacros/common.tex ../../LaTeXmacros/indexstyle annex/local.bib
     82build/${basename ${DOCUMENT}}.dvi : Makefile ${GRAPHS} ${PROGRAMS} ${PICTURES} ${FIGURES} ${SOURCES} ${basename ${DOCUMENT}}.tex ../../LaTeXmacros/common.tex ../../LaTeXmacros/indexstyle
    8783
    8884        @ if [ ! -r ${basename $@}.ind ] ; then touch ${basename $@}.ind ; fi                           # Conditionally create an empty *.ind (index) file for inclusion until makeindex is run.
     
    9591        @ -${BibTeX} ${basename $@}
    9692        @ echo "Glossary"
    97         @ makeglossaries -q -s ${basename $@}.ist ${basename $@}                                                # Make index from *.aux entries and input index at end of document
     93        makeglossaries -q -s ${basename $@}.ist ${basename $@}                                          # Make index from *.aux entries and input index at end of document
    9894        @ echo ".dvi generation"
    9995        @ -build/bump_ver.sh
  • doc/proposals/concurrency/annex/local.bib

    r0fe4e62 rf5c3b6c  
    5252        year            = 2017
    5353}
    54 
    55 @manual{Cpp-Transactions,
    56         keywords        = {C++, Transactional Memory},
    57         title           = {Technical Specification for C++ Extensions for Transactional Memory},
    58         organization= {International Standard ISO/IEC TS 19841:2015 },
    59         publisher   = {American National Standards Institute},
    60         address = {http://www.iso.org},
    61         year            = 2015,
    62 }
    63 
    64 @article{BankTransfer,
    65         keywords        = {Bank Transfer},
    66         title   = {Bank Account Transfer Problem},
    67         publisher       = {Wiki Wiki Web},
    68         address = {http://wiki.c2.com},
    69         year            = 2010
    70 }
    71 
    72 @misc{2FTwoHardThings,
    73         keywords        = {Hard Problem},
    74         title   = {TwoHardThings},
    75         author  = {Martin Fowler},
    76         address = {https://martinfowler.com/bliki/TwoHardThings.html},
    77         year            = 2009
    78 }
    79 
    80 @article{IntrusiveData,
    81         title           = {Intrusive Data Structures},
    82         author  = {Jiri Soukup},
    83         journal = {CppReport},
    84         year            = 1998,
    85         month           = May,
    86         volume  = {10/No5.},
    87         page            = 22
    88 }
    89 
    90 @misc{affinityLinux,
    91         title           = "{Linux man page - sched\_setaffinity(2)}"
    92 }
    93 
    94 @misc{affinityWindows,
    95         title           = "{Windows (vs.85) - SetThreadAffinityMask function}"
    96 }
    97 
    98 @misc{affinityFreebsd,
    99         title           = "{FreeBSD General Commands Manual - CPUSET(1)}"
    100 }
    101 
    102 @misc{affinityNetbsd,
    103         title           = "{NetBSD Library Functions Manual - AFFINITY(3)}"
    104 }
    105 
    106 @misc{affinityMacosx,
    107         title           = "{Affinity API Release Notes for OS X v10.5}"
    108 }
  • doc/proposals/concurrency/figures/int_monitor.fig

    r0fe4e62 rf5c3b6c  
    88-2
    991200 2
    10 5 1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 675.000 2700.000 675 2400 375 2700 675 3000
    11 6 4533 2866 4655 3129
    12 5 1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 4657.017 2997.000 4655 2873 4533 2997 4655 3121
    13 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    14          4655 2866 4655 3129
     105 1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 600.000 2625.000 600 2325 300 2625 600 2925
     116 3225 4500 7425 4800
     121 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3375 4650 80 80 3375 4650 3455 4730
     131 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4725 4650 105 105 4725 4650 4830 4755
     141 3 0 1 -1 -1 0 0 4 0.000 1 0.0000 6225 4650 105 105 6225 4650 6330 4755
     154 0 -1 0 0 0 12 0.0000 2 135 1035 4950 4725 blocked task\001
     164 0 -1 0 0 0 12 0.0000 2 135 870 3525 4725 active task\001
     174 0 -1 0 0 0 12 0.0000 2 180 930 6450 4725 routine ptrs\001
    1518-6
    16 6 4725 2866 4847 3129
    17 5 1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 4849.017 2997.000 4847 2873 4725 2997 4847 3121
    18 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    19          4847 2866 4847 3129
     196 8445 1695 8655 1905
     201 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 8550 1800 105 105 8550 1800 8655 1905
     214 1 -1 0 0 0 10 0.0000 2 75 75 8550 1860 a\001
    2022-6
    21 6 4911 2866 5033 3129
    22 5 1 0 1 0 7 50 -1 -1 0.000 0 1 0 0 5035.017 2997.000 5033 2873 4911 2997 5033 3121
    23 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    24          5033 2866 5033 3129
     236 8445 1395 8655 1605
     241 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 8550 1500 105 105 8550 1500 8655 1605
     254 1 -1 0 0 0 10 0.0000 2 105 90 8550 1560 b\001
    2526-6
    26 6 9027 2866 9149 3129
    27 5 1 0 1 0 7 50 -1 -1 0.000 0 0 0 0 9024.983 2997.000 9027 2873 9149 2997 9027 3121
    28 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    29          9027 2866 9027 3129
     276 3945 1695 4155 1905
     281 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4050 1800 105 105 4050 1800 4155 1905
     294 1 -1 0 0 0 10 0.0000 2 75 75 4050 1860 a\001
    3030-6
    31 6 9253 2866 9375 3129
    32 5 1 0 1 0 7 50 -1 -1 0.000 0 0 0 0 9250.983 2997.000 9253 2873 9375 2997 9253 3121
    33 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    34          9253 2866 9253 3129
    35 -6
    36 6 9478 2866 9600 3129
    37 5 1 0 1 0 7 50 -1 -1 0.000 0 0 0 0 9475.983 2997.000 9478 2873 9600 2997 9478 3121
    38 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    39          9478 2866 9478 3129
     316 3945 1395 4155 1605
     321 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4050 1500 105 105 4050 1500 4155 1605
     334 1 -1 0 0 0 10 0.0000 2 105 90 4050 1560 b\001
    4034-6
    41351 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 7650 3675 80 80 7650 3675 7730 3755
    42361 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3150 3675 80 80 3150 3675 3230 3755
    43 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 4047 1793 125 125 4047 1793 3929 1752
    44 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 4050 1500 125 125 4050 1500 3932 1459
    45 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 8550 1500 125 125 8550 1500 8432 1459
    46 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 8550 1800 125 125 8550 1800 8432 1759
    47 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 1200 2850 125 125 1200 2850 1082 2809
    48 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 900 2850 125 125 900 2850 782 2809
    49 1 3 0 1 -1 -1 0 0 4 0.000 1 0.0000 6225 4650 105 105 6225 4650 6330 4755
    50 1 3 0 1 -1 -1 0 0 20 0.000 1 0.0000 3150 4650 80 80 3150 4650 3230 4730
    51 1 3 0 1 -1 -1 0 0 -1 0.000 1 0.0000 4575 4650 105 105 4575 4650 4680 4755
    52372 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    5338         3900 1950 4200 2100
     
    7762         3000 4050 3300 4200
    78632 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    79          675 3000 1425 3000
     64         600 2925 1350 2925
    80652 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    81          675 2400 1425 2400
     66         600 2325 1350 2325
    82672 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    83          1425 2700 1500 2925
     68         1350 2625 1425 2850
    84692 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    85          1425 2400 1350 2625
     70         1350 2325 1275 2550
    86712 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
    87          675 2700 1425 2700
     72         600 2625 1350 2625
     732 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
     74         1350 2775 1275 2645 1125 2645 1050 2775 1125 2905 1275 2905
     75         1350 2775
     762 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
     77         975 2775 900 2645 750 2645 675 2775 750 2905 900 2905
     78         975 2775
     792 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
     80         4800 3000 4725 2870 4575 2870 4500 3000 4575 3130 4725 3130
     81         4800 3000
     822 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
     83         5100 3000 5025 2870 4875 2870 4800 3000 4875 3130 5025 3130
     84         5100 3000
     852 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
     86         9300 3000 9225 2870 9075 2870 9000 3000 9075 3130 9225 3130
     87         9300 3000
     882 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
     89         9600 3000 9525 2870 9375 2870 9300 3000 9375 3130 9525 3130
     90         9600 3000
     912 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     92         675 2775 975 2775
     932 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     94         1050 2775 1350 2775
     952 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
     96         4875 4950 4800 4820 4650 4820 4575 4950 4650 5080 4800 5080
     97         4875 4950
     982 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
     99         4575 4950 4875 4950
     1002 3 0 1 0 7 50 -1 -1 0.000 0 0 0 0 0 7
     101         3525 4970 3450 4840 3300 4840 3225 4970 3300 5100 3450 5100
     102         3525 4970
    881034 1 -1 0 0 0 12 0.0000 2 135 315 2850 4275 exit\001
    891044 1 -1 0 0 0 12 0.0000 2 135 315 7350 4275 exit\001
     
    1061214 1 -1 0 0 0 12 0.0000 2 135 495 4050 1275 queue\001
    1071224 1 -1 0 0 0 12 0.0000 2 165 420 4050 1050 entry\001
    108 4 0 0 50 -1 0 11 0.0000 2 120 705 600 2325 Condition\001
    109 4 0 -1 0 0 0 12 0.0000 2 180 930 6450 4725 routine ptrs\001
    110 4 0 -1 0 0 0 12 0.0000 2 135 1050 3300 4725 active thread\001
    111 4 0 -1 0 0 0 12 0.0000 2 135 1215 4725 4725 blocked thread\001
     1234 0 0 50 -1 0 11 0.0000 2 120 705 450 2250 Condition\001
     1244 0 0 50 -1 0 11 0.0000 2 165 630 3600 5025 signalled\001
     1254 0 0 50 -1 0 11 0.0000 2 165 525 4950 5025 waiting\001
  • doc/proposals/concurrency/style/cfa-format.tex

    r0fe4e62 rf5c3b6c  
    254254}{}
    255255
    256 \lstnewenvironment{gocode}[1][]{
    257   \lstset{
    258     language = Golang,
    259     style=defaultStyle,
    260     #1
    261   }
    262 }{}
    263 
    264256\newcommand{\zero}{\lstinline{zero_t}\xspace}
    265257\newcommand{\one}{\lstinline{one_t}\xspace}
  • doc/proposals/concurrency/text/basics.tex

    r0fe4e62 rf5c3b6c  
    99At its core, concurrency is based on having multiple call-stacks and scheduling among threads of execution executing on these stacks. Concurrency without parallelism only requires having multiple call stacks (or contexts) for a single thread of execution.
    1010
    11 Execution with a single thread and multiple stacks where the thread is self-scheduling deterministically across the stacks is called coroutining. Execution with a single and multiple stacks but where the thread is scheduled by an oracle (non-deterministic from the thread perspective) across the stacks is called concurrency.
    12 
    13 Therefore, a minimal concurrency system can be achieved by creating coroutines, which instead of context switching among each other, always ask an oracle where to context switch next. While coroutines can execute on the caller's stack-frame, stackfull coroutines allow full generality and are sufficient as the basis for concurrency. The aforementioned oracle is a scheduler and the whole system now follows a cooperative threading-model (a.k.a non-preemptive scheduling). The oracle/scheduler can either be a stackless or stackfull entity and correspondingly require one or two context switches to run a different coroutine. In any case, a subset of concurrency related challenges start to appear. For the complete set of concurrency challenges to occur, the only feature missing is preemption.
    14 
    15 A scheduler introduces order of execution uncertainty, while preemption introduces uncertainty about where context-switches occur. Mutual-exclusion and synchronisation are ways of limiting non-determinism in a concurrent system. Now it is important to understand that uncertainty is desireable; uncertainty can be used by runtime systems to significantly increase performance and is often the basis of giving a user the illusion that tasks are running in parallel. Optimal performance in concurrent applications is often obtained by having as much non-determinism as correctness allows.
     11Indeed, while execution with a single thread and multiple stacks where the thread is self-scheduling deterministically across the stacks is called coroutining, execution with a single and multiple stacks but where the thread is scheduled by an oracle (non-deterministic from the thread perspective) across the stacks is called concurrency.
     12
     13Therefore, a minimal concurrency system can be achieved by creating coroutines, which instead of context switching among each other, always ask an oracle where to context switch next. While coroutines can execute on the caller's stack-frame, stackfull coroutines allow full generality and are sufficient as the basis for concurrency. The aforementioned oracle is a scheduler and the whole system now follows a cooperative threading-model \cit. The oracle/scheduler can either be a stackless or stackfull entity and correspondingly require one or two context switches to run a different coroutine. In any case, a subset of concurrency related challenges start to appear. For the complete set of concurrency challenges to occur, the only feature missing is preemption. Indeed, concurrency challenges appear with non-determinism. Using mutual-exclusion or synchronisation are ways of limiting the lack of determinism in a system. A scheduler introduces order of execution uncertainty, while preemption introduces uncertainty about where context-switches occur. Now it is important to understand that uncertainty is not undesireable; uncertainty can often be used by systems to significantly increase performance and is often the basis of giving a user the illusion that tasks are running in parallel. Optimal performance in concurrent applications is often obtained by having as much non-determinism as correctness allows\cit.
    1614
    1715\section{\protect\CFA 's Thread Building Blocks}
    18 One of the important features that is missing in C is threading. On modern architectures, a lack of threading is unacceptable\cite{Sutter05, Sutter05b}, and therefore modern programming languages must have the proper tools to allow users to write performant concurrent programs to take advantage of parallelism. As an extension of C, \CFA needs to express these concepts in a way that is as natural as possible to programmers familiar with imperative languages. And being a system-level language means programmers expect to choose precisely which features they need and which cost they are willing to pay.
     16One of the important features that is missing in C is threading. On modern architectures, a lack of threading is unacceptable\cite{Sutter05, Sutter05b}, and therefore modern programming languages must have the proper tools to allow users to write performant concurrent and/or parallel programs. As an extension of C, \CFA needs to express these concepts in a way that is as natural as possible to programmers familiar with imperative languages. And being a system-level language means programmers expect to choose precisely which features they need and which cost they are willing to pay.
    1917
    2018\section{Coroutines: A stepping stone}\label{coroutine}
    21 While the main focus of this proposal is concurrency and parallelism, it is important to address coroutines, which are actually a significant building block of a concurrency system. Coroutines need to deal with context-switches and other context-management operations. Therefore, this proposal includes coroutines both as an intermediate step for the implementation of threads, and a first class feature of \CFA. Furthermore, many design challenges of threads are at least partially present in designing coroutines, which makes the design effort that much more relevant. The core \acrshort{api} of coroutines revolve around two features: independent call stacks and \code{suspend}/\code{resume}.
    22 
     19While the main focus of this proposal is concurrency and parallelism, it is important to address coroutines, which are actually a significant building block of a concurrency system. Coroutines need to deal with context-switchs and other context-management operations. Therefore, this proposal includes coroutines both as an intermediate step for the implementation of threads, and a first class feature of \CFA. Furthermore, many design challenges of threads are at least partially present in designing coroutines, which makes the design effort that much more relevant. The core \acrshort{api} of coroutines revolve around two features: independent call stacks and \code{suspend}/\code{resume}.
     20
     21A good example of a problem made easier with coroutines is genereting the fibonacci sequence. This problem comes with the challenge of decoupling how a sequence is generated and how it is used. Figure \ref{fig:fibonacci-c} shows conventional approaches to writing generators in C. All three of these approach suffer from strong coupling. The left and center approaches require that the generator have knowledge of how the sequence will be used, while the rightmost approach requires to user to hold internal state between calls on behalf of th sequence generator and makes it much harder to handle corner cases like the Fibonacci seed.
    2322\begin{figure}
     23\label{fig:fibonacci-c}
    2424\begin{center}
    2525\begin{tabular}{c @{\hskip 0.025in}|@{\hskip 0.025in} c @{\hskip 0.025in}|@{\hskip 0.025in} c}
     
    4545        }
    4646}
    47 
    48 int main() {
    49         void print_fib(int n) {
    50                 printf("%d\n", n);
    51         }
    52 
    53         fibonacci_func(
    54                 10, print_fib
    55         );
    56 
    57 
    58 
    59 }
    6047\end{ccode}&\begin{ccode}[tabsize=2]
    6148//Using output array
     
    7562                        f2 = next;
    7663                }
    77                 array[i] = next;
    78         }
    79 }
    80 
    81 
    82 int main() {
    83         int a[10];
    84 
    85         fibonacci_func(
    86                 10, a
    87         );
    88 
    89         for(int i=0;i<10;i++){
    90                 printf("%d\n", a[i]);
    91         }
    92 
     64                *array = next;
     65                array++;
     66        }
    9367}
    9468\end{ccode}&\begin{ccode}[tabsize=2]
     
    9670typedef struct {
    9771        int f1, f2;
    98 } Iterator_t;
     72} iterator_t;
    9973
    10074int fibonacci_state(
    101         Iterator_t * it
     75        iterator_t * it
    10276) {
    10377        int f;
    10478        f = it->f1 + it->f2;
    10579        it->f2 = it->f1;
    106         it->f1 = max(f,1);
     80        it->f1 = f;
    10781        return f;
    10882}
     
    11387
    11488
    115 
    116 int main() {
    117         Iterator_t it={0,0};
    118 
    119         for(int i=0;i<10;i++){
    120                 printf("%d\n",
    121                         fibonacci_state(
    122                                 &it
    123                         );
    124                 );
    125         }
    126 
    127 }
    12889\end{ccode}
    12990\end{tabular}
    13091\end{center}
    13192\caption{Different implementations of a fibonacci sequence generator in C.}
    132 \label{lst:fibonacci-c}
    13393\end{figure}
    13494
    135 A good example of a problem made easier with coroutines is generators, like the fibonacci sequence. This problem comes with the challenge of decoupling how a sequence is generated and how it is used. Figure \ref{lst:fibonacci-c} shows conventional approaches to writing generators in C. All three of these approach suffer from strong coupling. The left and center approaches require that the generator have knowledge of how the sequence is used, while the rightmost approach requires holding internal state between calls on behalf of the generator and makes it much harder to handle corner cases like the Fibonacci seed.
    136 
    137 Figure \ref{lst:fibonacci-cfa} is an example of a solution to the fibonnaci problem using \CFA coroutines, where the coroutine stack holds sufficient state for the generation. This solution has the advantage of having very strong decoupling between how the sequence is generated and how it is used. Indeed, this version is as easy to use as the \code{fibonacci_state} solution, while the imlpementation is very similar to the \code{fibonacci_func} example.
     95
     96Figure \ref{fig:fibonacci-cfa} is an example of a solution to the fibonnaci problem using \CFA coroutines, using the coroutine stack to hold sufficient state for the generation. This solution has the advantage of having very strong decoupling between how the sequence is generated and how it is used. Indeed, this version is a easy to use as the \code{fibonacci_state} solution, while the imlpementation is very similar to the \code{fibonacci_func} example.
    13897
    13998\begin{figure}
     99\label{fig:fibonacci-cfa}
    140100\begin{cfacode}
    141101coroutine Fibonacci {
     
    148108
    149109//main automacically called on first resume
    150 void main(Fibonacci & this) with (this) {
     110void main(Fibonacci & this) {
    151111        int fn1, fn2;           //retained between resumes
    152         fn = 0;
    153         fn1 = fn;
     112        this.fn = 0;
     113        fn1 = this.fn;
    154114        suspend(this);          //return to last resume
    155115
    156         fn = 1;
     116        this.fn = 1;
    157117        fn2 = fn1;
    158         fn1 = fn;
     118        fn1 = this.fn;
    159119        suspend(this);          //return to last resume
    160120
    161121        for ( ;; ) {
    162                 fn = fn1 + fn2;
     122                this.fn = fn1 + fn2;
    163123                fn2 = fn1;
    164                 fn1 = fn;
     124                fn1 = this.fn;
    165125                suspend(this);  //return to last resume
    166126        }
     
    180140\end{cfacode}
    181141\caption{Implementation of fibonacci using coroutines}
    182 \label{lst:fibonacci-cfa}
    183142\end{figure}
    184143
    185 Figure \ref{lst:fmt-line} shows the \code{Format} coroutine which rearranges text in order to group characters into blocks of fixed size. The example takes advantage of resuming coroutines in the constructor to simplify the code and highlights the idea that interesting control flow can occur in the constructor.
    186 
     144\subsection{Construction}
     145One important design challenge for coroutines and threads (shown in section \ref{threads}) is that the runtime system needs to run code after the user-constructor runs to connect the object into the system. In the case of coroutines, this challenge is simpler since there is no non-determinism from preemption or scheduling. However, the underlying challenge remains the same for coroutines and threads.
     146
     147The runtime system needs to create the coroutine's stack and more importantly prepare it for the first resumption. The timing of the creation is non-trivial since users both expect to have fully constructed objects once execution enters the coroutine main and to be able to resume the coroutine from the constructor. As regular objects, constructors can leak coroutines before they are ready. There are several solutions to this problem but the chosen options effectively forces the design of the coroutine.
     148
     149Furthermore, \CFA faces an extra challenge as polymorphic routines create invisible thunks when casted to non-polymorphic routines and these thunks have function scope. For example, the following code, while looking benign, can run into undefined behaviour because of thunks:
     150
     151\begin{cfacode}
     152//async: Runs function asynchronously on another thread
     153forall(otype T)
     154extern void async(void (*func)(T*), T* obj);
     155
     156forall(otype T)
     157void noop(T *) {}
     158
     159void bar() {
     160        int a;
     161        async(noop, &a);
     162}
     163\end{cfacode}
     164
     165The generated C code\footnote{Code trimmed down for brevity} creates a local thunk to hold type information:
     166
     167\begin{ccode}
     168extern void async(/* omitted */, void (*func)(void *), void *obj);
     169
     170void noop(/* omitted */, void *obj){}
     171
     172void bar(){
     173        int a;
     174        void _thunk0(int *_p0){
     175                /* omitted */
     176                noop(/* omitted */, _p0);
     177        }
     178        /* omitted */
     179        async(/* omitted */, ((void (*)(void *))(&_thunk0)), (&a));
     180}
     181\end{ccode}
     182The problem in this example is a storage management issue, the function pointer \code{_thunk0} is only valid until the end of the block. This extra challenge limits which solutions are viable because storing the function pointer for too long causes undefined behavior; i.e. the stack based thunk being destroyed before it was used. This challenge is an extension of challenges that come with second-class routines. Indeed, GCC nested routines also have the limitation that the routines cannot be passed outside of the scope of the functions these were declared in. The case of coroutines and threads is simply an extension of this problem to multiple call-stacks.
     183
     184\subsection{Alternative: Composition}
     185One solution to this challenge is to use composition/containement, where uses add insert a coroutine field which contains the necessary information to manage the coroutine.
     186
     187\begin{cfacode}
     188struct Fibonacci {
     189        int fn; //used for communication
     190        coroutine c; //composition
     191};
     192
     193void ?{}(Fibonacci & this) {
     194        this.fn = 0;
     195        (this.c){}; //Call constructor to initialize coroutine
     196}
     197\end{cfacode}
     198There are two downsides to this approach. The first, which is relatively minor, made aware of the main routine pointer. This information must either be store in the coroutine runtime data or in its static type structure. When using composition, all coroutine handles have the same static type structure which means the pointer to the main needs to be part of the runtime data. This requirement means the coroutine data must be made larger to store a value that is actually a compile time constant (address of the main routine). The second problem, which is both subtle and significant, is that now users can get the initialisation order of coroutines wrong. Indeed, every field of a \CFA struct is constructed but in declaration order, unless users explicitly write otherwise. This semantics means that users who forget to initialize the coroutine handle may resume the coroutine with an uninitilized object. For coroutines, this is unlikely to be a problem, for threads however, this is a significant problem. Figure \ref{fig:fmt-line} shows the \code{Format} coroutine which rearranges text in order to group characters into blocks of fixed size. This is a good example where the control flow is made much simpler from being able to resume the coroutine from the constructor and highlights the idea that interesting control flow can occor in the constructor.
    187199\begin{figure}
     200\label{fig:fmt-line}
    188201\begin{cfacode}[tabsize=3]
    189202//format characters into blocks of 4 and groups of 5 blocks per line
     
    231244\end{cfacode}
    232245\caption{Formatting text into lines of 5 blocks of 4 characters.}
    233 \label{lst:fmt-line}
    234246\end{figure}
    235247
    236 \subsection{Construction}
    237 One important design challenge for coroutines and threads (shown in section \ref{threads}) is that the runtime system needs to run code after the user-constructor runs to connect the fully constructed object into the system. In the case of coroutines, this challenge is simpler since there is no non-determinism from preemption or scheduling. However, the underlying challenge remains the same for coroutines and threads.
    238 
    239 The runtime system needs to create the coroutine's stack and more importantly prepare it for the first resumption. The timing of the creation is non-trivial since users both expect to have fully constructed objects once execution enters the coroutine main and to be able to resume the coroutine from the constructor. As regular objects, constructors can leak coroutines before they are ready. There are several solutions to this problem but the chosen options effectively forces the design of the coroutine.
    240 
    241 Furthermore, \CFA faces an extra challenge as polymorphic routines create invisible thunks when casted to non-polymorphic routines and these thunks have function scope. For example, the following code, while looking benign, can run into undefined behaviour because of thunks:
    242 
    243 \begin{cfacode}
    244 //async: Runs function asynchronously on another thread
    245 forall(otype T)
    246 extern void async(void (*func)(T*), T* obj);
    247 
    248 forall(otype T)
    249 void noop(T*) {}
    250 
    251 void bar() {
    252         int a;
    253         async(noop, &a); //start thread running noop with argument a
    254 }
    255 \end{cfacode}
    256 
    257 The generated C code\footnote{Code trimmed down for brevity} creates a local thunk to hold type information:
    258 
    259 \begin{ccode}
    260 extern void async(/* omitted */, void (*func)(void *), void *obj);
    261 
    262 void noop(/* omitted */, void *obj){}
    263 
    264 void bar(){
    265         int a;
    266         void _thunk0(int *_p0){
    267                 /* omitted */
    268                 noop(/* omitted */, _p0);
    269         }
    270         /* omitted */
    271         async(/* omitted */, ((void (*)(void *))(&_thunk0)), (&a));
    272 }
    273 \end{ccode}
    274 The problem in this example is a storage management issue, the function pointer \code{_thunk0} is only valid until the end of the block, which limits the viable solutions because storing the function pointer for too long causes undefined behavior; i.e., the stack-based thunk being destroyed before it can be used. This challenge is an extension of challenges that come with second-class routines. Indeed, GCC nested routines also have the limitation that nested routine cannot be passed outside of the declaration scope. The case of coroutines and threads is simply an extension of this problem to multiple call-stacks.
    275 
    276 \subsection{Alternative: Composition}
    277 One solution to this challenge is to use composition/containement, where coroutine fields are added to manage the coroutine.
    278 
    279 \begin{cfacode}
    280 struct Fibonacci {
    281         int fn; //used for communication
    282         coroutine c; //composition
    283 };
    284 
    285 void FibMain(void *) {
    286         //...
    287 }
    288 
    289 void ?{}(Fibonacci & this) {
    290         this.fn = 0;
    291         //Call constructor to initialize coroutine
    292         (this.c){myMain};
    293 }
    294 \end{cfacode}
    295 The downside of this approach is that users need to correctly construct the coroutine handle before using it. Like any other objects, doing so the users carefully choose construction order to prevent usage of unconstructed objects. However, in the case of coroutines, users must also pass to the coroutine information about the coroutine main, like in the previous example. This opens the door for user errors and requires extra runtime storage to pass at runtime information that can be known statically.
    296248
    297249\subsection{Alternative: Reserved keyword}
     
    303255};
    304256\end{cfacode}
    305 The \code{coroutine} keyword means the compiler can find and inject code where needed. The downside of this approach is that it makes coroutine a special case in the language. Users wantint to extend coroutines or build their own for various reasons can only do so in ways offered by the language. Furthermore, implementing coroutines without language supports also displays the power of the programming language used. While this is ultimately the option used for idiomatic \CFA code, coroutines and threads can still be constructed by users without using the language support. The reserved keywords are only present to improve ease of use for the common cases.
     257This mean the compiler can solve problems by injecting code where needed. The downside of this approach is that it makes coroutine a special case in the language. Users who would want to extend coroutines or build their own for various reasons can only do so in ways offered by the language. Furthermore, implementing coroutines without language supports also displays the power of the programming language used. While this is ultimately the option used for idiomatic \CFA code, coroutines and threads can both be constructed by users without using the language support. The reserved keywords are only present to improve ease of use for the common cases.
    306258
    307259\subsection{Alternative: Lamda Objects}
    308260
    309 For coroutines as for threads, many implementations are based on routine pointers or function objects\cite{Butenhof97, ANSI14:C++, MS:VisualC++, BoostCoroutines15}. For example, Boost implements coroutines in terms of four functor object types:
     261For coroutines as for threads, many implementations are based on routine pointers or function objects\cit. For example, Boost implements coroutines in terms of four functor object types:
    310262\begin{cfacode}
    311263asymmetric_coroutine<>::pull_type
     
    316268Often, the canonical threading paradigm in languages is based on function pointers, pthread being one of the most well known examples. The main problem of this approach is that the thread usage is limited to a generic handle that must otherwise be wrapped in a custom type. Since the custom type is simple to write in \CFA and solves several issues, added support for routine/lambda based coroutines adds very little.
    317269
    318 A variation of this would be to use a simple function pointer in the same way pthread does for threads :
     270A variation of this would be to use an simple function pointer in the same way pthread does for threads :
    319271\begin{cfacode}
    320272void foo( coroutine_t cid, void * arg ) {
     
    329281}
    330282\end{cfacode}
    331 This semantics is more common for thread interfaces than coroutines works equally well. As discussed in section \ref{threads}, this approach is superseeded by static approaches in terms of expressivity.
     283This semantic is more common for thread interfaces than coroutines but would work equally well. As discussed in section \ref{threads}, this approach is superseeded by static approaches in terms of expressivity.
    332284
    333285\subsection{Alternative: Trait-based coroutines}
     
    398350\end{cfacode}
    399351
    400 In this example, threads of type \code{foo} start execution in the \code{void main(foo &)} routine, which prints \code{"Hello World!"}. While this thesis encourages this approach to enforce strongly-typed programming, users may prefer to use the routine-based thread semantics for the sake of simplicity. With the static semantics it is trivial to write a thread type that takes a function pointer as a parameter and executes it on its stack asynchronously.
     352In this example, threads of type \code{foo} start execution in the \code{void main(foo &)} routine, which prints \code{"Hello World!"}. While this thesis encourages this approach to enforce strongly-typed programming, users may prefer to use the routine-based thread semantics for the sake of simplicity. With these semantics it is trivial to write a thread type that takes a function pointer as a parameter and executes it on its stack asynchronously
    401353\begin{cfacode}
    402354typedef void (*voidFunc)(int);
     
    409361void ?{}(FuncRunner & this, voidFunc inFunc, int arg) {
    410362        this.func = inFunc;
    411         this.arg  = arg;
    412363}
    413364
    414365void main(FuncRunner & this) {
    415         //thread starts here and runs the function
    416366        this.func( this.arg );
    417367}
    418368\end{cfacode}
    419369
    420 A consequence of the strongly-typed approach to main is that memory layout of parameters and return values to/from a thread are now explicitly specified in the \acrshort{api}.
     370An consequence of the strongly typed approach to main is that memory layout of parameters and return values to/from a thread are now explicitly specified in the \acrshort{api}.
    421371
    422372Of course for threads to be useful, it must be possible to start and stop threads and wait for them to complete execution. While using an \acrshort{api} such as \code{fork} and \code{join} is relatively common in the literature, such an interface is unnecessary. Indeed, the simplest approach is to use \acrshort{raii} principles and have threads \code{fork} after the constructor has completed and \code{join} before the destructor runs.
     
    439389\end{cfacode}
    440390
    441 This semantic has several advantages over explicit semantics: a thread is always started and stopped exaclty once, users cannot make any progamming errors, and it naturally scales to multiple threads meaning basic synchronisation is very simple.
     391This semantic has several advantages over explicit semantics: a thread is always started and stopped exaclty once and users cannot make any progamming errors and it naturally scales to multiple threads meaning basic synchronisation is very simple
    442392
    443393\begin{cfacode}
     
    461411\end{cfacode}
    462412
    463 However, one of the drawbacks of this approach is that threads now always form a lattice, that is they are always destroyed in the opposite order of construction because of block structure. This restriction is relaxed by using dynamic allocation, so threads can outlive the scope in which they are created, much like dynamically allocating memory lets objects outlive the scope in which they are created.
     413However, one of the drawbacks of this approach is that threads now always form a lattice, that is they are always destroyed in opposite order of construction because of block structure. This restriction is relaxed by using dynamic allocation, so threads can outlive the scope in which they are created, much like dynamically allocating memory lets objects outlive the scope in which they are created
    464414
    465415\begin{cfacode}
  • doc/proposals/concurrency/text/cforall.tex

    r0fe4e62 rf5c3b6c  
    11% ======================================================================
    22% ======================================================================
    3 \chapter{Cforall Overview}
     3\chapter{Cforall crash course}
    44% ======================================================================
    55% ======================================================================
    66
    7 The following is a quick introduction to the \CFA language, specifically tailored to the features needed to support concurrency.
     7This thesis presents the design for a set of concurrency features in \CFA. Since it is a new dialect of C, the following is a quick introduction to the language, specifically tailored to the features needed to support concurrency.
    88
    9 \CFA is a extension of ISO-C and therefore supports all of the same paradigms as C. It is a non-object oriented system language, meaning most of the major abstractions have either no runtime overhead or can be opt-out easily. Like C, the basics of \CFA revolve around structures and routines, which are thin abstractions over machine code. The vast majority of the code produced by the \CFA translator respects memory-layouts and calling-conventions laid out by C. Interestingly, while \CFA is not an object-oriented language, lacking the concept of a receiver (e.g., this), it does have some notion of objects\footnote{C defines the term objects as : ``region of data storage in the execution environment, the contents of which can represent
    10 values''\cite[3.15]{C11}}, most importantly construction and destruction of objects. Most of the following code examples can be found on the \CFA website \cite{www-cfa}
     9\CFA is a extension of ISO-C and therefore supports all of the same paradigms as C. It is a non-object oriented system language, meaning most of the major abstractions have either no runtime overhead or can be opt-out easily. Like C, the basics of \CFA revolve around structures and routines, which are thin abstractions over machine code. The vast majority of the code produced by the \CFA translator respects memory-layouts and calling-conventions laid out by C. Interestingly, while \CFA is not an object-oriented language, lacking the concept of a received (e.g.: this), it does have some notion of objects\footnote{C defines the term objects as : [Where to I get the C11 reference manual?]}, most importantly construction and destruction of objects. Most of the following pieces of code can be found on the \CFA website \cite{www-cfa}
    1110
    1211\section{References}
    1312
    14 Like \CC, \CFA introduces rebindable references providing multiple dereferecing as an alternative to pointers. In regards to concurrency, the semantic difference between pointers and references are not particularly relevant, but since this document uses mostly references, here is a quick overview of the semantics:
     13Like \CC, \CFA introduces references as an alternative to pointers. In regards to concurrency, the semantics difference between pointers and references are not particularly relevant but since this document uses mostly references here is a quick overview of the semantics :
    1514\begin{cfacode}
    1615int x, *p1 = &x, **p2 = &p1, ***p3 = &p2,
    17         &r1 = x,    &&r2 = r1,   &&&r3 = r2;
     16&r1 = x,    &&r2 = r1,   &&&r3 = r2;
    1817***p3 = 3;                                                      //change x
    1918r3    = 3;                                                      //change x, ***r3
     
    2625sizeof(&ar[1]) == sizeof(int *);        //is true, i.e., the size of a reference
    2726\end{cfacode}
    28 The important take away from this code example is that references offer a handle to an object, much like pointers, but which is automatically dereferenced for convinience.
     27The important thing to take away from this code snippet is that references offer a handle to an object much like pointers but which is automatically derefferenced when convinient.
    2928
    3029\section{Overloading}
    3130
    32 Another important feature of \CFA is function overloading as in Java and \CC, where routines with the same name are selected based on the number and type of the arguments. As well, \CFA uses the return type as part of the selection criteria, as in Ada\cite{Ada}. For routines with multiple parameters and returns, the selection is complex.
     31Another important feature of \CFA is function overloading as in Java and \CC, where routine with the same name are selected based on the numbers and type of the arguments. As well, \CFA uses the return type as part of the selection criteria, as in Ada\cite{Ada}. For routines with multiple parameters and returns, the selection is complex.
    3332\begin{cfacode}
    3433//selection based on type and number of parameters
     
    4645double d = f(4);                //select (2)
    4746\end{cfacode}
    48 This feature is particularly important for concurrency since the runtime system relies on creating different types to represent concurrency objects. Therefore, overloading is necessary to prevent the need for long prefixes and other naming conventions that prevent name clashes. As seen in chapter \ref{basics}, routine \code{main} is an example that benefits from overloading.
     47This feature is particularly important for concurrency since the runtime system relies on creating different types to represent concurrency objects. Therefore, overloading is necessary to prevent the need for long prefixes and other naming conventions that prevent name clashes. As seen in chapter \ref{basics}, routines main is an example that benefits from overloading.
    4948
    5049\section{Operators}
    51 Overloading also extends to operators. The syntax for denoting operator-overloading is to name a routine with the symbol of the operator and question marks where the arguments of the operation occur, e.g.:
     50Overloading also extends to operators. The syntax for denoting operator-overloading is to name a routine with the symbol of the operator and question marks where the arguments of the operation would be, like so :
    5251\begin{cfacode}
    5352int ++? (int op);                       //unary prefix increment
     
    102101
    103102\section{Parametric Polymorphism}
    104 Routines in \CFA can also be reused for multiple types. This capability is done using the \code{forall} clause which gives \CFA its name. \code{forall} clauses allow separately compiled routines to support generic usage over multiple types. For example, the following sum function works for any type that supports construction from 0 and addition :
     103Routines in \CFA can also be reused for multiple types. This is done using the \code{forall} clause which gives \CFA it's name. \code{forall} clauses allow seperatly compiled routines to support generic usage over multiple types. For example, the following sum function will work for any type which support construction from 0 and addition :
    105104\begin{cfacode}
    106105//constraint type, 0 and +
     
    117116\end{cfacode}
    118117
    119 Since writing constraints on types can become cumbersome for more constrained functions, \CFA also has the concept of traits. Traits are named collection of constraints that can be used both instead and in addition to regular constraints:
     118Since writing constraints on types can become cumbersome for more constrained functions, \CFA also has the concept of traits. Traits are named collection of constraints which can be used both instead and in addition to regular constraints:
    120119\begin{cfacode}
    121120trait sumable( otype T ) {
     
    131130
    132131\section{with Clause/Statement}
    133 Since \CFA lacks the concept of a receiver, certain functions end-up needing to repeat variable names often. To remove this inconvenience, \CFA provides the \code{with} statement, which opens an aggregate scope making its fields directly accessible (like Pascal).
     132Since \CFA lacks the concept of a receiver, certain functions end-up needing to repeat variable names often, to solve this \CFA offers the \code{with} statement which opens an aggregate scope making its fields directly accessible (like Pascal).
    134133\begin{cfacode}
    135134struct S { int i, j; };
    136 int mem(S & this) with (this)           //with clause
     135int mem(S & this) with this             //with clause
    137136        i = 1;                                          //this->i
    138137        j = 2;                                          //this->j
     
    141140        struct S1 { ... } s1;
    142141        struct S2 { ... } s2;
    143         with (s1)                                       //with statement
     142        with s1                                         //with statement
    144143        {
    145144                //access fields of s1
    146145                //without qualification
    147                 with (s2)                                       //nesting
     146                with s2                                 //nesting
    148147                {
    149148                        //access fields of s1 and s2
     
    151150                }
    152151        }
    153         with (s1, s2)                           //scopes open in parallel
     152        with s1, s2                             //scopes open in parallel
    154153        {
    155154                //access fields of s1 and s2
  • doc/proposals/concurrency/text/concurrency.tex

    r0fe4e62 rf5c3b6c  
    44% ======================================================================
    55% ======================================================================
    6 Several tool can be used to solve concurrency challenges. Since many of these challenges appear with the use of mutable shared-state, some languages and libraries simply disallow mutable shared-state (Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, Akka (Scala)~\cite{Akka}). In these paradigms, interaction among concurrent objects relies on message passing~\cite{Thoth,Harmony,V-Kernel} or other paradigms closely relate to networking concepts (channels\cite{CSP,Go} for example). However, in languages that use routine calls as their core abstraction-mechanism, these approaches force a clear distinction between concurrent and non-concurrent paradigms (i.e., message passing versus routine call). This distinction in turn means that, in order to be effective, programmers need to learn two sets of designs patterns. While this distinction can be hidden away in library code, effective use of the librairy still has to take both paradigms into account.
     6Several tool can be used to solve concurrency challenges. Since many of these challenges appear with the use of mutable shared-state, some languages and libraries simply disallow mutable shared-state (Erlang~\cite{Erlang}, Haskell~\cite{Haskell}, Akka (Scala)~\cite{Akka}). In these paradigms, interaction among concurrent objects relies on message passing~\cite{Thoth,Harmony,V-Kernel} or other paradigms closely relate to networking concepts (channels\cit for example). However, in languages that use routine calls as their core abstraction-mechanism, these approaches force a clear distinction between concurrent and non-concurrent paradigms (i.e., message passing versus routine call). This distinction in turn means that, in order to be effective, programmers need to learn two sets of designs patterns. While this distinction can be hidden away in library code, effective use of the librairy still has to take both paradigms into account.
    77
    88Approaches based on shared memory are more closely related to non-concurrent paradigms since they often rely on basic constructs like routine calls and shared objects. At the lowest level, concurrent paradigms are implemented as atomic operations and locks. Many such mechanisms have been proposed, including semaphores~\cite{Dijkstra68b} and path expressions~\cite{Campbell74}. However, for productivity reasons it is desireable to have a higher-level construct be the core concurrency paradigm~\cite{HPP:Study}.
    99
    10 An approach that is worth mentioning because it is gaining in popularity is transactionnal memory~\cite{Dice10}[Check citation]. While this approach is even pursued by system languages like \CC\cite{Cpp-Transactions}, the performance and feature set is currently too restrictive to be the main concurrency paradigm for systems language, which is why it was rejected as the core paradigm for concurrency in \CFA.
     10An approach that is worth mentionning because it is gaining in popularity is transactionnal memory~\cite{Dice10}[Check citation]. While this approach is even pursued by system languages like \CC\cit, the performance and feature set is currently too restrictive to be the main concurrency paradigm for systems language, which is why it was rejected as the core paradigm for concurrency in \CFA.
    1111
    1212One of the most natural, elegant, and efficient mechanisms for synchronization and communication, especially for shared-memory systems, is the \emph{monitor}. Monitors were first proposed by Brinch Hansen~\cite{Hansen73} and later described and extended by C.A.R.~Hoare~\cite{Hoare74}. Many programming languages---e.g., Concurrent Pascal~\cite{ConcurrentPascal}, Mesa~\cite{Mesa}, Modula~\cite{Modula-2}, Turing~\cite{Turing:old}, Modula-3~\cite{Modula-3}, NeWS~\cite{NeWS}, Emerald~\cite{Emerald}, \uC~\cite{Buhr92a} and Java~\cite{Java}---provide monitors as explicit language constructs. In addition, operating-system kernels and device drivers have a monitor-like structure, although they often use lower-level primitives such as semaphores or locks to simulate monitors. For these reasons, this project proposes monitors as the core concurrency-construct.
     
    1919
    2020\subsection{Synchronization}
    21 As for mutual-exclusion, low-level synchronisation primitives often offer good performance and good flexibility at the cost of ease of use. Again, higher-level mechanism often simplify usage by adding better coupling between synchronization and data, e.g.: message passing, or offering simpler solution to otherwise involved challenges. As mentioned above, synchronization can be expressed as guaranteeing that event \textit{X} always happens before \textit{Y}. Most of the time, synchronisation happens within a critical section, where threads must acquire mutual-exclusion in a certain order. However, it may also be desirable to guarantee that event \textit{Z} does not occur between \textit{X} and \textit{Y}. Not satisfying this property called barging. For example, where event \textit{X} tries to effect event \textit{Y} but another thread acquires the critical section and emits \textit{Z} before \textit{Y}. The classic exmaple is the thread that finishes using a ressource and unblocks a thread waiting to use the resource, but the unblocked thread must compete again to acquire the resource. Preventing or detecting barging is an involved challenge with low-level locks, which can be made much easier by higher-level constructs. This challenge is often split into two different methods, barging avoidance and barging prevention. Algorithms that use status flags and other flag variables to detect barging threads are said to be using barging avoidance while algorithms that baton-passing locks between threads instead of releasing the locks are said to be using barging prevention.
     21As for mutual-exclusion, low-level synchronisation primitives often offer good performance and good flexibility at the cost of ease of use. Again, higher-level mechanism often simplify usage by adding better coupling between synchronization and data, e.g.: message passing, or offering simple solution to otherwise involved challenges. An example is barging. As mentioned above, synchronization can be expressed as guaranteeing that event \textit{X} always happens before \textit{Y}. Most of the time, synchronisation happens around a critical section, where threads must acquire critical sections in a certain order. However, it may also be desirable to guarantee that event \textit{Z} does not occur between \textit{X} and \textit{Y}. Not satisfying this property called barging. For example, where event \textit{X} tries to effect event \textit{Y} but another thread acquires the critical section and emits \textit{Z} before \textit{Y}. Preventing or detecting barging is an involved challenge with low-level locks, which can be made much easier by higher-level constructs. This challenge is often split into two different methods, barging avoidance and barging prevention. Algorithms that use status flags and other flag variables to detect barging threads are said to be using barging avoidance while algorithms that baton-passing locks between threads instead of releasing the locks are said to be using barging prevention.
    2222
    2323% ======================================================================
     
    7171\end{tabular}
    7272\end{center}
    73 Notice how the counter is used without any explicit synchronisation and yet supports thread-safe semantics for both reading and writting, which is similar in usage to \CC \code{atomic} template.
    74 
    75 Here, the constructor(\code{?\{\}}) uses the \code{nomutex} keyword to signify that it does not acquire the monitor mutual-exclusion when constructing. This semantics is because an object not yet con\-structed should never be shared and therefore does not require mutual exclusion. The prefix increment operator uses \code{mutex} to protect the incrementing process from race conditions. Finally, there is a conversion operator from \code{counter_t} to \code{size_t}. This conversion may or may not require the \code{mutex} keyword depending on whether or not reading a \code{size_t} is an atomic operation.
    76 
    77 For maximum usability, monitors use \gls{multi-acq} semantics, which means a single thread can acquire the same monitor multiple times without deadlock. For example, figure \ref{fig:search} uses recursion and \gls{multi-acq} to print values inside a binary tree.
     73Notice how the counter is used without any explicit synchronisation and yet supports thread-safe semantics for both reading and writting.
     74
     75Here, the constructor(\code{?\{\}}) uses the \code{nomutex} keyword to signify that it does not acquire the monitor mutual-exclusion when constructing. This semantics is because an object not yet constructed should never be shared and therefore does not require mutual exclusion. The prefix increment operator uses \code{mutex} to protect the incrementing process from race conditions. Finally, there is a conversion operator from \code{counter_t} to \code{size_t}. This conversion may or may not require the \code{mutex} keyword depending on whether or not reading a \code{size_t} is an atomic operation.
     76
     77For maximum usability, monitors use \gls{multi-acq} semantics, which means a single thread can acquire multiple times the same monitor without deadlock. For example, figure \ref{fig:search} uses recursion and \gls{multi-acq} to print values inside a binary tree.
    7878\begin{figure}
    7979\label{fig:search}
     
    9595\end{figure}
    9696
    97 Having both \code{mutex} and \code{nomutex} keywords is redundant based on the meaning of a routine having neither of these keywords. For example, given a routine without qualifiers \code{void foo(counter_t & this)}, then it is reasonable that it should default to the safest option \code{mutex}, whereas assuming \code{nomutex} is unsafe and may cause subtle errors. In fact, \code{nomutex} is the ``normal'' parameter behaviour, with the \code{nomutex} keyword effectively stating explicitly that ``this routine is not special''. Another alternative is making exactly one of these keywords mandatory, which provides the same semantics but without the ambiguity of supporting routines with neither keyword. Mandatory keywords would also have the added benefit of being self-documented but at the cost of extra typing. While there are several benefits to mandatory keywords, they do bring a few challenges. Mandatory keywords in \CFA would imply that the compiler must know without doubt whether or not a parameter is a monitor or not. Since \CFA relies heavily on traits as an abstraction mechanism, the distinction between a type that is a monitor and a type that looks like a monitor can become blurred. For this reason, \CFA only has the \code{mutex} keyword and uses no keyword to mean \code{nomutex}.
     97Having both \code{mutex} and \code{nomutex} keywords is redundant based on the meaning of a routine having neither of these keywords. For example, given a routine without qualifiers \code{void foo(counter_t & this)}, then it is reasonable that it should default to the safest option \code{mutex}, whereas assuming \code{nomutex} is unsafe and may cause subtle errors. In fact, \code{nomutex} is the "normal" parameter behaviour, with the \code{nomutex} keyword effectively stating explicitly that "this routine is not special". Another alternative is making exactly one of these keywords mandatory, which would provide the same semantics but without the ambiguity of supporting routines with neither keyword. Mandatory keywords would also have the added benefit of being self-documented but at the cost of extra typing. While there are several benefits to mandatory keywords, they do bring a few challenges. Mandatory keywords in \CFA would imply that the compiler must know without doubt whether or not a parameter is a monitor or not. Since \CFA relies heavily on traits as an abstraction mechanism, the distinction between a type that is a monitor and a type that looks like a monitor can become blurred. For this reason, \CFA only has the \code{mutex} keyword and uses no keyword to mean \code{nomutex}.
    9898
    9999The next semantic decision is to establish when \code{mutex} may be used as a type qualifier. Consider the following declarations:
     
    113113int f5(monitor * mutex m []); //Not Okay : Array of unkown length
    114114\end{cfacode}
    115 Note that not all array functions are actually distinct in the type system. However, even if the code generation could tell the difference, the extra information is still not sufficient to extend meaningfully the monitor call semantic.
    116 
    117 Unlike object-oriented monitors, where calling a mutex member \emph{implicitly} acquires mutual-exclusion of the receiver object, \CFA uses an explicit mechanism to acquire mutual-exclusion. A consequence of this approach is that it extends naturally to multi-monitor calls.
     115Note that not all array functions are actually distinct in the type system sense. However, even the code generation could tell the difference, the extra information is still not sufficient to extend meaningfully the monitor call semantic.
     116
     117Unlike object-oriented monitors, where calling a mutex member \emph{implicitly} acquires mutual-exclusion often receives an object, \CFA uses an explicit mechanism to acquire mutual-exclusion. A consequence of this approach is that it extends naturally to multi-monitor calls.
    118118\begin{cfacode}
    119119int f(MonitorA & mutex a, MonitorB & mutex b);
     
    123123f(a,b);
    124124\end{cfacode}
    125 While OO monitors could be extended with a mutex qualifier for multiple-monitor calls, no example of this feature could be found. The capacity to acquire multiple locks before entering a critical section is called \emph{\gls{bulk-acq}}. In practice, writing multi-locking routines that do not lead to deadlocks is tricky. Having language support for such a feature is therefore a significant asset for \CFA. In the case presented above, \CFA guarantees that the order of aquisition is consistent across calls to different routines using the same monitors as arguments. This consistent ordering means acquiring multiple monitors in the way is safe from deadlock. However, users can still force the acquiring order. For example, notice which routines use \code{mutex}/\code{nomutex} and how this affects aquiring order:
     125The capacity to acquire multiple locks before entering a critical section is called \emph{\gls{bulk-acq}}. In practice, writing multi-locking routines that do not lead to deadlocks is tricky. Having language support for such a feature is therefore a significant asset for \CFA. In the case presented above, \CFA guarantees that the order of aquisition is consistent across calls to routines using the same monitors as arguments. However, since \CFA monitors use \gls{multi-acq} locks, users can effectively force the acquiring order. For example, notice which routines use \code{mutex}/\code{nomutex} and how this affects aquiring order:
    126126\begin{cfacode}
    127127void foo(A & mutex a, B & mutex b) { //acquire a & b
     
    139139The \gls{multi-acq} monitor lock allows a monitor lock to be acquired by both \code{bar} or \code{baz} and acquired again in \code{foo}. In the calls to \code{bar} and \code{baz} the monitors are acquired in opposite order.
    140140
    141 However, such use leads to the lock acquiring order problem. In the example above, the user uses implicit ordering in the case of function \code{foo} but explicit ordering in the case of \code{bar} and \code{baz}. This subtle mistake means that calling these routines concurrently may lead to deadlock and is therefore undefined behavior. As shown\cite{Lister77}, solving this problem requires:
     141However, such use leads to the lock acquiring order problem. In the example above, the user uses implicit ordering in the case of function \code{foo} but explicit ordering in the case of \code{bar} and \code{baz}. This subtle mistake means that calling these routines concurrently may lead to deadlock and is therefore undefined behavior. As shown on several occasion\cit, solving this problem requires:
    142142\begin{enumerate}
    143143        \item Dynamically tracking of the monitor-call order.
    144144        \item Implement rollback semantics.
    145145\end{enumerate}
    146 While the first requirement is already a significant constraint on the system, implementing a general rollback semantics in a C-like language is still prohibitively complex \cite{Dice10}. In \CFA, users simply need to be carefull when acquiring multiple monitors at the same time or only use \gls{bulk-acq} of all the monitors. While \CFA provides only a partial solution, many system provide no solution and the \CFA partial solution handles many useful cases.
    147 
    148 For example, \gls{multi-acq} and \gls{bulk-acq} can be used together in interesting ways:
     146While the first requirement is already a significant constraint on the system, implementing a general rollback semantics in a C-like language is prohibitively complex \cit. In \CFA, users simply need to be carefull when acquiring multiple monitors at the same time or only use \gls{bulk-acq} of all the monitors.
     147
     148\Gls{multi-acq} and \gls{bulk-acq} can be used together in interesting ways, for example:
    149149\begin{cfacode}
    150150monitor bank { ... };
     
    157157}
    158158\end{cfacode}
    159 This example shows a trivial solution to the bank-account transfer-problem\cite{BankTransfer}. Without \gls{multi-acq} and \gls{bulk-acq}, the solution to this problem is much more involved and requires carefull engineering.
    160 
    161 \subsection{\code{mutex} statement} \label{mutex-stmt}
    162 
    163 The call semantics discussed aboved have one software engineering issue, only a named routine can acquire the mutual-exclusion of a set of monitor. \CFA offers the \code{mutex} statement to workaround the need for unnecessary names, avoiding a major software engineering problem\cite{2FTwoHardThings}. Listing \ref{lst:mutex-stmt} shows an example of the \code{mutex} statement, which introduces a new scope in which the mutual-exclusion of a set of monitor is acquired. Beyond naming, the \code{mutex} statement has no semantic difference from a routine call with \code{mutex} parameters.
     159This example shows a trivial solution to the bank account transfer problem\cit. Without \gls{multi-acq} and \gls{bulk-acq}, the solution to this problem is much more involved and requires carefull engineering.
     160
     161\subsubsection{\code{mutex} statement} \label{mutex-stmt}
     162
     163The call semantics discussed aboved have one software engineering issue, only a named routine can acquire the mutual-exclusion of a set of monitor. \CFA offers the \code{mutex} statement to workaround the need for unnecessary names, avoiding a major software engineering problem\cit. Listing \ref{lst:mutex-stmt} shows an example of the \code{mutex} statement, which introduces a new scope in which the mutual-exclusion of a set of monitor is acquired. Beyond naming, the \code{mutex} statement has no semantic difference from a routine call with \code{mutex} parameters.
    164164
    165165\begin{figure}
     
    218218\end{cfacode}
    219219
    220 Like threads and coroutines, monitors are defined in terms of traits with some additional language support in the form of the \code{monitor} keyword. The monitor trait is :
    221 \begin{cfacode}
    222 trait is_monitor(dtype T) {
    223         monitor_desc * get_monitor( T & );
    224         void ^?{}( T & mutex );
    225 };
    226 \end{cfacode}
    227 Note that the destructor of a monitor must be a \code{mutex} routine. This requirement ensures that the destructor has mutual-exclusion. As with any object, any call to a monitor, using \code{mutex} or otherwise, is Undefined Behaviour after the destructor has run.
    228 
    229 % ======================================================================
    230 % ======================================================================
    231 \section{Internal scheduling} \label{intsched}
    232 % ======================================================================
    233 % ======================================================================
    234 In addition to mutual exclusion, the monitors at the core of \CFA's concurrency can also be used to achieve synchronisation. With monitors, this capability is generally achieved with internal or external scheduling as in \cite{Hoare74}. Since internal scheduling within a single monitor is mostly a solved problem, this thesis concentrates on extending internal scheduling to multiple monitors. Indeed, like the \gls{bulk-acq} semantics, internal scheduling extends to multiple monitors in a way that is natural to the user but requires additional complexity on the implementation side.
     220
     221% ======================================================================
     222% ======================================================================
     223\section{Internal scheduling} \label{insched}
     224% ======================================================================
     225% ======================================================================
     226In addition to mutual exclusion, the monitors at the core of \CFA's concurrency can also be used to achieve synchronisation. With monitors, this capability is generally achieved with internal or external scheduling as in\cit. Since internal scheduling within a single monitor is mostly a solved problem, this thesis concentrates on extending internal scheduling to multiple monitors. Indeed, like the \gls{bulk-acq} semantics, internal scheduling extends to multiple monitors in a way that is natural to the user but requires additional complexity on the implementation side.
    235227
    236228First, here is a simple example of such a technique:
     
    256248\end{cfacode}
    257249
    258 There are two details to note here. First, the \code{signal} is a delayed operation, it only unblocks the waiting thread when it reaches the end of the critical section. This semantic is needed to respect mutual-exclusion. The alternative is to return immediately after the call to \code{signal}, which is significantly more restrictive. Second, in \CFA, while it is common to store a \code{condition} as a field of the monitor, a \code{condition} variable can be stored/created independently of a monitor. Here routine \code{foo} waits for the \code{signal} from \code{bar} before making further progress, effectively ensuring a basic ordering.
    259 
    260 An important aspect of the implementation is that \CFA does not allow barging, which means that once function \code{bar} releases the monitor, \code{foo} is guaranteed to resume immediately after (unless some other thread waited on the same condition). This guarantees offers the benefit of not having to loop arount waits in order to guarantee that a condition is still met. The main reason \CFA offers this guarantee is that users can easily introduce barging if it becomes a necessity but adding barging prevention or barging avoidance is more involved without language support. Supporting barging prevention as well as extending internal scheduling to multiple monitors is the main source of complexity in the design of \CFA concurrency.
     250There are two details to note here. First, the \code{signal} is a delayed operation, it only unblocks the waiting thread when it reaches the end of the critical section. This semantic is needed to respect mutual-exclusion. Second, in \CFA, a \code{condition} variable can be stored/created independently of a monitor. Here routine \code{foo} waits for the \code{signal} from \code{bar} before making further progress, effectively ensuring a basic ordering.
     251
     252An important aspect of the implementation is that \CFA does not allow barging, which means that once function \code{bar} releases the monitor, foo is guaranteed to resume immediately after (unless some other thread waited on the same condition). This guarantees offers the benefit of not having to loop arount waits in order to guarantee that a condition is still met. The main reason \CFA offers this guarantee is that users can easily introduce barging if it becomes a necessity but adding barging prevention or barging avoidance is more involved without language support. Supporting barging prevention as well as extending internal scheduling to multiple monitors is the main source of complexity in the design of \CFA concurrency.
    261253
    262254% ======================================================================
     
    265257% ======================================================================
    266258% ======================================================================
    267 It is easier to understand the problem of multi-monitor scheduling using a series of pseudo-code. Note that for simplicity in the following snippets of pseudo-code, waiting and signalling is done using an implicit condition variable, like Java built-in monitors. Indeed, \code{wait} statements always use the implicit condition as paremeter and explicitly names the monitors (A and B) associated with the condition. Note that in \CFA, condition variables are tied to a set of monitors on first use (called branding) which means that using internal scheduling with distinct sets of monitors requires one condition variable per set of monitors.
     259It is easier to understand the problem of multi-monitor scheduling using a series of pseudo-code. Note that for simplicity in the following snippets of pseudo-code, waiting and signalling is done using an implicit condition variable, like Java built-in monitors. Indeed, \code{wait} statements always use a single condition as paremeter and waits on the monitors associated with the condition.
    268260
    269261\begin{multicols}{2}
     
    303295\end{pseudo}
    304296\end{multicols}
    305 This version uses \gls{bulk-acq} (denoted using the {\sf\&} symbol), but the presence of multiple monitors does not add a particularly new meaning. Synchronization happens between the two threads in exactly the same way and order. The only difference is that mutual exclusion covers more monitors. On the implementation side, handling multiple monitors does add a degree of complexity as the next few examples demonstrate.
    306 
    307 While deadlock issues can occur when nesting monitors, these issues are only a symptom of the fact that locks, and by extension monitors, are not perfectly composable. For monitors, a well known deadlock problem is the Nested Monitor Problem \cite{Lister77}, which occurs when a \code{wait} is made by a thread that holds more than one monitor. For example, the following pseudo-code runs into the nested-monitor problem :
     297This version uses \gls{bulk-acq} (denoted using the \& symbol), but the presence of multiple monitors does not add a particularly new meaning. Synchronization happens between the two threads in exactly the same way and order. The only difference is that mutual exclusion covers more monitors. On the implementation side, handling multiple monitors does add a degree of complexity as the next few examples demonstrate.
     298
     299While deadlock issues can occur when nesting monitors, these issues are only a symptom of the fact that locks, and by extension monitors, are not perfectly composable. For monitors, a well known deadlock problem is the Nested Monitor Problem\cit, which occurs when a \code{wait} is made on a thread that holds more than one monitor. For example, the following pseudo-code will run into the nested monitor problem :
    308300\begin{multicols}{2}
    309301\begin{pseudo}
     
    325317\end{pseudo}
    326318\end{multicols}
    327 
    328 The \code{wait} only releases monitor \code{B} so the signalling thread cannot acquire monitor \code{A} to get to the \code{signal}. Attempting release of all acquired monitors at the \code{wait} results in another set of problems such as releasing monitor \code{C}, which has nothing to do with the \code{signal}.
    329 
    330319However, for monitors as for locks, it is possible to write a program using nesting without encountering any problems if nesting is done correctly. For example, the next pseudo-code snippet acquires monitors {\sf A} then {\sf B} before waiting, while only acquiring {\sf B} when signalling, effectively avoiding the nested monitor problem.
    331320
     
    350339\end{multicols}
    351340
    352 % ======================================================================
    353 % ======================================================================
    354 \subsection{Internal Scheduling - in depth}
    355 % ======================================================================
    356 % ======================================================================
    357 
    358 A larger example is presented to show complex issuesfor \gls{bulk-acq} and all the implementation options are analyzed. Listing \ref{lst:int-bulk-pseudo} shows an example where \gls{bulk-acq} adds a significant layer of complexity to the internal signalling semantics, and listing \ref{lst:int-bulk-cfa} shows the corresponding \CFA code which implements the pseudo-code in listing \ref{lst:int-bulk-pseudo}. For the purpose of translating the given pseudo-code into \CFA-code any method of introducing monitor into context, other than a \code{mutex} parameter, is acceptable, e.g., global variables, pointer parameters or using locals with the \code{mutex}-statement.
     341Listing \ref{lst:int-bulk-pseudo} shows an example where \gls{bulk-acq} adds a significant layer of complexity to the internal signalling semantics. Listing \ref{lst:int-bulk-cfa} shows the corresponding \CFA code which implements the pseudo-code in listing \ref{lst:int-bulk-pseudo}. Note that listing \ref{lst:int-bulk-cfa} uses non-\code{mutex} parameter to introduce monitor \code{b} into context. However, for the purpose of translating the given pseudo-code into \CFA-code any method of introducing new monitors into context, other than a \code{mutex} parameter, is acceptable, e.g. global variables, pointer parameters or using locals with the \code{mutex}-statement.
    359342
    360343\begin{figure}[!b]
     
    393376
    394377\begin{figure}[!b]
    395 \begin{center}
    396 \begin{cfacode}[xleftmargin=.4\textwidth]
    397 monitor A a;
    398 monitor B b;
    399 condition c;
    400 \end{cfacode}
    401 \end{center}
    402378\begin{multicols}{2}
    403379Waiting thread
    404380\begin{cfacode}
    405 mutex(a) {
     381monitor A;
     382monitor B;
     383extern condition c;
     384void foo(A & mutex a, B & b) {
    406385        //Code Section 1
    407386        mutex(a, b) {
     
    418397Signalling thread
    419398\begin{cfacode}
    420 mutex(a) {
     399monitor A;
     400monitor B;
     401extern condition c;
     402void foo(A & mutex a, B & b) {
    421403        //Code Section 5
    422404        mutex(a, b) {
     
    433415\end{figure}
    434416
    435 The complexity begins at code sections 4 and 8, which are where the existing semantics of internal scheduling need to be extended for multiple monitors. The root of the problem is that \gls{bulk-acq} is used in a context where one of the monitors is already acquired and is why it is important to define the behaviour of the previous pseudo-code. When the signaller thread reaches the location where it should ``release \code{A & B}'' (line 16), it must actually transfer ownership of monitor \code{B} to the waiting thread. This ownership trasnfer is required in order to prevent barging. Since the signalling thread still needs monitor \code{A}, simply waking up the waiting thread is not an option because it violates mutual exclusion. There are three options.
     417It is particularly important to pay attention to code sections 4 and 8, which are where the existing semantics of internal scheduling need to be extended for multiple monitors. The root of the problem is that \gls{bulk-acq} is used in a context where one of the monitors is already acquired and is why it is important to define the behaviour of the previous pseudo-code. When the signaller thread reaches the location where it should "release A \& B" (line 16), it must actually transfer ownership of monitor B to the waiting thread. This ownership trasnfer is required in order to prevent barging. Since the signalling thread still needs monitor A, simply waking up the waiting thread is not an option because it would violate mutual exclusion. There are three options.
    436418
    437419\subsubsection{Delaying signals}
    438 The obvious solution to solve the problem of multi-monitor scheduling is to keep ownership of all locks until the last lock is ready to be transferred. It can be argued that that moment is when the last lock is no longer needed because this semantics fits most closely to the behaviour of single-monitor scheduling. This solution has the main benefit of transferring ownership of groups of monitors, which simplifies the semantics from mutiple objects to a single group of objects, effectively making the existing single-monitor semantic viable by simply changing monitors to monitor groups.
     420The first more obvious solution to solve the problem of multi-monitor scheduling is to keep ownership of all locks until the last lock is ready to be transferred. It can be argued that that moment is the correct time to transfer ownership when the last lock is no longer needed because this semantics fits most closely to the behaviour of single monitor scheduling. This solution has the main benefit of transferring ownership of groups of monitors, which simplifies the semantics from mutiple objects to a single group of objects, effectively making the existing single monitor semantic viable by simply changing monitors to monitor groups.
    439421\begin{multicols}{2}
    440422Waiter
     
    461443\end{multicols}
    462444However, this solution can become much more complicated depending on what is executed while secretly holding B (at line 10). Indeed, nothing prevents signalling monitor A on a different condition variable:
    463 \begin{figure}
    464 \begin{multicols}{3}
    465 Thread $\alpha$
     445\begin{multicols}{2}
     446Thread 1
    466447\begin{pseudo}[numbers=left, firstnumber=1]
    467448acquire A
     
    472453\end{pseudo}
    473454
    474 \columnbreak
    475 
    476 Thread $\gamma$
    477 \begin{pseudo}[numbers=left, firstnumber=1]
     455Thread 2
     456\begin{pseudo}[numbers=left, firstnumber=6]
     457acquire A
     458        wait A
     459release A
     460\end{pseudo}
     461
     462\columnbreak
     463
     464Thread 3
     465\begin{pseudo}[numbers=left, firstnumber=9]
    478466acquire A
    479467        acquire A & B
    480468                signal A & B
    481469        release A & B
     470        //Secretly keep B here
    482471        signal A
    483472release A
    484 \end{pseudo}
    485 
    486 \columnbreak
    487 
    488 Thread $\beta$
    489 \begin{pseudo}[numbers=left, firstnumber=1]
    490 acquire A
    491         wait A
    492 release A
    493 \end{pseudo}
    494 
    495 \end{multicols}
    496 \caption{Dependency graph}
    497 \label{lst:dependency}
    498 \end{figure}
     473//Wakeup thread 1 or 2?
     474//Who wakes up the other thread?
     475\end{pseudo}
     476\end{multicols}
    499477
    500478The goal in this solution is to avoid the need to transfer ownership of a subset of the condition monitors. However, this goal is unreacheable in the previous example. Depending on the order of signals (line 12 and 15) two cases can happen.
     
    506484Note that ordering is not determined by a race condition but by whether signalled threads are enqueued in FIFO or FILO order. However, regardless of the answer, users can move line 15 before line 11 and get the reverse effect.
    507485
    508 In both cases, the threads need to be able to distinguish, on a per monitor basis, which ones need to be released and which ones need to be transferred, which means monitors cannot be handled as a single homogenous group and therefore effectively precludes this approach.
     486In both cases, the threads need to be able to distinguish, on a per monitor basis, which ones need to be released and which ones need to be transferred, which means monitors cannot be handled as a single homogenous group and therefore invalidates the main benefit of this approach.
    509487
    510488\subsubsection{Dependency graphs}
    511 In the listing \ref{lst:int-bulk-pseudo} pseudo-code, there is a solution which statisfies both barging prevention and mutual exclusion. If ownership of both monitors is transferred to the waiter when the signaller releases \code{A & B} and then the waiter transfers back ownership of \code{A} when it releases it, then the problem is solved (\code{B} is no longer in use at this point). Dynamically finding the correct order is therefore the second possible solution. The problem it encounters is that it effectively boils down to resolving a dependency graph of ownership requirements. Here even the simplest of code snippets requires two transfers and it seems to increase in a manner closer to polynomial. For example, the following code, which is just a direct extension to three monitors, requires at least three ownership transfer and has multiple solutions:
     489In the Listing 1 pseudo-code, there is a solution which statisfies both barging prevention and mutual exclusion. If ownership of both monitors is transferred to the waiter when the signaller releases A and then the waiter transfers back ownership of A when it releases it, then the problem is solved. Dynamically finding the correct order is therefore the second possible solution. The problem it encounters is that it effectively boils down to resolving a dependency graph of ownership requirements. Here even the simplest of code snippets requires two transfers and it seems to increase in a manner closer to polynomial. For example, the following code, which is just a direct extension to three monitors, requires at least three ownership transfer and has multiple solutions:
    512490
    513491\begin{multicols}{2}
     
    536514
    537515\begin{figure}
     516\begin{multicols}{3}
     517Thread $\alpha$
     518\begin{pseudo}[numbers=left, firstnumber=1]
     519acquire A
     520        acquire A & B
     521                wait A & B
     522        release A & B
     523release A
     524\end{pseudo}
     525
     526\columnbreak
     527
     528Thread $\gamma$
     529\begin{pseudo}[numbers=left, firstnumber=1]
     530acquire A
     531        acquire A & B
     532                signal A & B
     533        release A & B
     534        signal A
     535release A
     536\end{pseudo}
     537
     538\columnbreak
     539
     540Thread $\beta$
     541\begin{pseudo}[numbers=left, firstnumber=1]
     542acquire A
     543        wait A
     544release A
     545\end{pseudo}
     546
     547\end{multicols}
     548\caption{Dependency graph}
     549\label{lst:dependency}
     550\end{figure}
     551
     552\begin{figure}
    538553\begin{center}
    539554\input{dependency}
    540555\end{center}
     556\label{fig:dependency}
    541557\caption{Dependency graph of the statements in listing \ref{lst:dependency}}
    542 \label{fig:dependency}
    543558\end{figure}
    544559
    545 Listing \ref{lst:dependency} is the three thread example rewritten for dependency graphs. Figure \ref{fig:dependency} shows the corresponding dependency graph that results, where every node is a statement of one of the three threads, and the arrows the dependency of that statement (e.g., $\alpha1$ must happen before $\alpha2$). The extra challenge is that this dependency graph is effectively post-mortem, but the runtime system needs to be able to build and solve these graphs as the dependency unfolds. Resolving dependency graph being a complex and expensive endeavour, this solution is not the preffered one.
     560Listing \ref{lst:dependency} is the three thread example rewritten for dependency graphs as well as the corresponding dependency graph. Figure \ref{fig:dependency} shows the corresponding dependency graph that results, where every node is a statement of one of the three threads, and the arrows the dependency of that statement. The extra challenge is that this dependency graph is effectively post-mortem, but the run time system needs to be able to build and solve these graphs as the dependency unfolds. Resolving dependency graph being a complex and expensive endeavour, this solution is not the preffered one.
    546561
    547562\subsubsection{Partial signalling} \label{partial-sig}
    548 Finally, the solution that is chosen for \CFA is to use partial signalling. Again using listing \ref{lst:int-bulk-pseudo}, the partial signalling solution transfers ownership of monitor B at lines 10 but does not wake the waiting thread since it is still using monitor A. Only when it reaches line 11 does it actually wakeup the waiting thread. This solution has the benefit that complexity is encapsulated into only two actions, passing monitors to the next owner when they should be release and conditionally waking threads if all conditions are met. This solution has a much simpler implementation than a dependency graph solving algorithm which is why it was chosen. Furthermore, after being fully implemented, this solution does not appear to have any downsides worth mentionning.
     563Finally, the solution that is chosen for \CFA is to use partial signalling. Consider the following case:
     564
     565\begin{multicols}{2}
     566\begin{pseudo}[numbers=left]
     567acquire A
     568        acquire A & B
     569                wait A & B
     570        release A & B
     571release A
     572\end{pseudo}
     573
     574\columnbreak
     575
     576\begin{pseudo}[numbers=left, firstnumber=6]
     577acquire A
     578        acquire A & B
     579                signal A & B
     580        release A & B
     581        //... More code
     582release A
     583\end{pseudo}
     584\end{multicols}
     585The partial signalling solution transfers ownership of monitor B at lines 10 but does not wake the waiting thread since it is still using monitor A. Only when it reaches line 11 does it actually wakeup the waiting thread. This solution has the benefit that complexity is encapsulated into only two actions, passing monitors to the next owner when they should be release and conditionally waking threads if all conditions are met. This solution has a much simpler implementation than a dependency graph solving algorithm which is why it was chosen.
    549586
    550587% ======================================================================
     
    553590% ======================================================================
    554591% ======================================================================
     592An important note is that, until now, signalling a monitor was a delayed operation. The ownership of the monitor is transferred only when the monitor would have otherwise been released, not at the point of the \code{signal} statement. However, in some cases, it may be more convenient for users to immediately transfer ownership to the thread that is waiting for cooperation, which is achieved using the \code{signal_block} routine\footnote{name to be discussed}.
     593
     594The example in listing \ref{lst:datingservice} highlights the difference in behaviour. As mentioned, \code{signal} only transfers ownership once the current critical section exits, this behaviour cause the need for additional synchronisation when a two-way handshake is needed. To avoid this extraneous synchronisation, the \code{condition} type offers the \code{signal_block} routine which handle two-way handshakes as shown in the example. This removes the need for a second condition variables and simplifies programming. Like every other monitor semantic, \code{signal_block} uses barging prevention which means mutual-exclusion is baton-passed both on the frond-end and the back-end of the call to \code{signal_block}, meaning no other thread can acquire the monitor neither before nor after the call.
    555595\begin{figure}
    556596\begin{tabular}{|c|c|}
     
    582622                girlPhoneNo = phoneNo;
    583623
    584                 //wake boy from chair
     624                //wake boy fron chair
    585625                signal(exchange);
    586626        }
     
    629669                girlPhoneNo = phoneNo;
    630670
    631                 //wake boy from chair
     671                //wake boy fron chair
    632672                signal(exchange);
    633673        }
     
    656696\label{lst:datingservice}
    657697\end{figure}
    658 An important note is that, until now, signalling a monitor was a delayed operation. The ownership of the monitor is transferred only when the monitor would have otherwise been released, not at the point of the \code{signal} statement. However, in some cases, it may be more convenient for users to immediately transfer ownership to the thread that is waiting for cooperation, which is achieved using the \code{signal_block} routine\footnote{name to be discussed}.
    659 
    660 The example in listing \ref{lst:datingservice} highlights the difference in behaviour. As mentioned, \code{signal} only transfers ownership once the current critical section exits, this behaviour requires additional synchronisation when a two-way handshake is needed. To avoid this extraneous synchronisation, the \code{condition} type offers the \code{signal_block} routine, which handles the two-way handshake as shown in the example. This removes the need for a second condition variables and simplifies programming. Like every other monitor semantic, \code{signal_block} uses barging prevention, which means mutual-exclusion is baton-passed both on the frond-end and the back-end of the call to \code{signal_block}, meaning no other thread can acquire the monitor neither before nor after the call.
    661698
    662699% ======================================================================
     
    665702% ======================================================================
    666703% ======================================================================
    667 An alternative to internal scheduling is external scheduling, e.g., in \uC.
     704An alternative to internal scheduling is to use external scheduling.
    668705\begin{center}
    669 \begin{tabular}{|c|c|c|}
    670 Internal Scheduling & External Scheduling & Go\\
     706\begin{tabular}{|c|c|}
     707Internal Scheduling & External Scheduling \\
    671708\hline
    672 \begin{ucppcode}[tabsize=3]
     709\begin{ucppcode}
    673710_Monitor Semaphore {
    674711        condition c;
     
    676713public:
    677714        void P() {
    678                 if(inUse)
    679                         wait(c);
     715                if(inUse) wait(c);
    680716                inUse = true;
    681717        }
     
    685721        }
    686722}
    687 \end{ucppcode}&\begin{ucppcode}[tabsize=3]
     723\end{ucppcode}&\begin{ucppcode}
    688724_Monitor Semaphore {
    689725
     
    691727public:
    692728        void P() {
    693                 if(inUse)
    694                         _Accept(V);
     729                if(inUse) _Accept(V);
    695730                inUse = true;
    696731        }
     
    700735        }
    701736}
    702 \end{ucppcode}&\begin{gocode}[tabsize=3]
    703 type MySem struct {
    704         inUse bool
    705         c     chan bool
    706 }
    707 
    708 // acquire
    709 func (s MySem) P() {
    710         if s.inUse {
    711                 select {
    712                 case <-s.c:
    713                 }
    714         }
    715         s.inUse = true
    716 }
    717 
    718 // release
    719 func (s MySem) V() {
    720         s.inUse = false
    721 
    722         //This actually deadlocks
    723         //when single thread
    724         s.c <- false
    725 }
    726 \end{gocode}
     737\end{ucppcode}
    727738\end{tabular}
    728739\end{center}
    729 This method is more constrained and explicit, which helps users tone down the undeterministic nature of concurrency. Indeed, as the following examples demonstrates, external scheduling allows users to wait for events from other threads without the concern of unrelated events occuring. External scheduling can generally be done either in terms of control flow (e.g., \uC with \code{_Accept}) or in terms of data (e.g., Go with channels). Of course, both of these paradigms have their own strenghts and weaknesses but for this project control-flow semantics were chosen to stay consistent with the rest of the languages semantics. Two challenges specific to \CFA arise when trying to add external scheduling with loose object definitions and multi-monitor routines. The previous example shows a simple use \code{_Accept} versus \code{wait}/\code{signal} and its advantages. Note that while other languages often use \code{accept}/\code{select} as the core external scheduling keyword, \CFA uses \code{waitfor} to prevent name collisions with existing socket \acrshort{api}s.
    730 
    731 For the \code{P} member above using internal scheduling, the call to \code{wait} only guarantees that \code{V} is the last routine to access the monitor, allowing a third routine, say \code{isInUse()}, acquire mutual exclusion several times while routine \code{P} is waiting. On the other hand, external scheduling guarantees that while routine \code{P} is waiting, no routine other than \code{V} can acquire the monitor.
     740This method is more constrained and explicit, which helps users tone down the undeterministic nature of concurrency. Indeed, as the following examples demonstrates, external scheduling allows users to wait for events from other threads without the concern of unrelated events occuring. External scheduling can generally be done either in terms of control flow (e.g., \uC with \code{_Accept}) or in terms of data (e.g. Go with channels). Of course, both of these paradigms have their own strenghts and weaknesses but for this project control-flow semantics were chosen to stay consistent with the rest of the languages semantics. Two challenges specific to \CFA arise when trying to add external scheduling with loose object definitions and multi-monitor routines. The previous example shows a simple use \code{_Accept} versus \code{wait}/\code{signal} and its advantages. Note that while other languages often use \code{accept}/\code{select} as the core external scheduling keyword, \CFA uses \code{waitfor} to prevent name collisions with existing socket \acrshort{api}s.
     741
     742In the case of internal scheduling, the call to \code{wait} only guarantees that \code{V} is the last routine to access the monitor. This entails that a third routine, say \code{isInUse()}, may have acquired mutual exclusion several times while routine \code{P} was waiting. On the other hand, external scheduling guarantees that while routine \code{P} was waiting, no routine other than \code{V} could acquire the monitor.
    732743
    733744% ======================================================================
     
    736747% ======================================================================
    737748% ======================================================================
    738 In \uC, monitor declarations include an exhaustive list of monitor operations. Since \CFA is not object oriented, monitors become both more difficult to implement and less clear for a user:
     749In \uC, monitor declarations include an exhaustive list of monitor operations. Since \CFA is not object oriented it becomes both more difficult to implement but also less clear for the user:
    739750
    740751\begin{cfacode}
     
    771782For the first two conditions, it is easy to implement a check that can evaluate the condition in a few instruction. However, a fast check for \pscode{monitor accepts me} is much harder to implement depending on the constraints put on the monitors. Indeed, monitors are often expressed as an entry queue and some acceptor queue as in the following figure:
    772783
    773 \begin{figure}[H]
    774784\begin{center}
    775785{\resizebox{0.4\textwidth}{!}{\input{monitor}}}
    776786\end{center}
    777 \label{fig:monitor}
    778 \end{figure}
    779 
    780 There are other alternatives to these pictures, but in the case of this picture, implementing a fast accept check is relatively easy. Restricted to a fixed number of mutex members, N, the accept check reduces to updating a bitmask when the acceptor queue changes, a check that executes in a single instruction even with a fairly large number (e.g., 128) of mutex members. This technique cannot be used in \CFA because it relies on the fact that the monitor type enumerates (declares) all the acceptable routines. For OO languages this does not compromise much since monitors already have an exhaustive list of member routines. However, for \CFA this is not the case; routines can be added to a type anywhere after its declaration. It is important to note that the bitmask approach does not actually require an exhaustive list of routines, but it requires a dense unique ordering of routines with an upper-bound and that ordering must be consistent across translation units.
    781 The alternative is to alter the implementeation like this:
     787
     788There are other alternatives to these pictures, but in the case of this picture, implementing a fast accept check is relatively easy. Indeed simply updating a bitmask when the acceptor queue changes is enough to have a check that executes in a single instruction, even with a fairly large number (e.g. 128) of mutex members. This technique cannot be used in \CFA because it relies on the fact that the monitor type declares all the acceptable routines. For OO languages this does not compromise much since monitors already have an exhaustive list of member routines. However, for \CFA this is not the case; routines can be added to a type anywhere after its declaration. Its important to note that the bitmask approach does not actually require an exhaustive list of routines, but it requires a dense unique ordering of routines with an upper-bound and that ordering must be consistent across translation units.
     789The alternative is to have a picture like this one:
    782790
    783791\begin{center}
     
    785793\end{center}
    786794
    787 Generating a mask dynamically means that the storage for the mask information can vary between calls to \code{waitfor}, allowing for more flexibility and extensions. Storing an array of accepted function-pointers replaces the single instruction bitmask compare with dereferencing a pointer followed by a linear search. Furthermore, supporting nested external scheduling (e.g., listing \ref{lst:nest-ext}) may now require additionnal searches on calls to \code{waitfor} statement to check if a routine is already queued in.
    788 
    789 \begin{figure}
     795Not storing the mask inside the monitor means that the storage for the mask information can vary between calls to \code{waitfor}, allowing for more flexibility and extensions. Storing an array of function-pointers would solve the issue of uniquely identifying acceptable routines. However, the single instruction bitmask compare has been replaced by dereferencing a pointer followed by a linear search. Furthermore, supporting nested external scheduling may now require additionnal searches on calls to waitfor to check if a routine is already queued in.
     796
     797Note that in the second picture, tasks need to always keep track of through which routine they are attempting to acquire the monitor and the routine mask needs to have both a function pointer and a set of monitors, as will be discussed in the next section. These details where omitted from the picture for the sake of simplifying the representation.
     798
     799At this point we must make a decision between flexibility and performance. Many design decisions in \CFA achieve both flexibility and performance, for example polymorphic routines add significant flexibility but inlining them means the optimizer can easily remove any runtime cost. Here however, the cost of flexibility cannot be trivially removed. In the end, the most flexible approach has been chosen since it allows users to write programs that would otherwise be prohibitively hard to write. This decision is based on the assumption that writing fast but inflexible locks is closer to a solved problems than writing locks that are as flexible as external scheduling in \CFA.
     800
     801% ======================================================================
     802% ======================================================================
     803\subsection{Multi-monitor scheduling}
     804% ======================================================================
     805% ======================================================================
     806
     807External scheduling, like internal scheduling, becomes significantly more complex when introducing multi-monitor syntax. Even in the simplest possible case, some new semantics need to be established:
    790808\begin{cfacode}
    791809monitor M {};
    792 void foo( M & mutex a ) {}
    793 void bar( M & mutex b ) {
    794         //Nested in the waitfor(bar, c) call
    795         waitfor(foo, b);
    796 }
    797 void baz( M & mutex c ) {
    798         waitfor(bar, c);
    799 }
    800 
    801 \end{cfacode}
    802 \caption{Example of nested external scheduling}
    803 \label{lst:nest-ext}
    804 \end{figure}
    805 
    806 Note that in the second picture, tasks need to always keep track of which routine they are attempting to acquire the monitor and the routine mask needs to have both a function pointer and a set of monitors, as will be discussed in the next section. These details where omitted from the picture for the sake of simplifying the representation.
    807 
    808 At this point, a decision must be made between flexibility and performance. Many design decisions in \CFA achieve both flexibility and performance, for example polymorphic routines add significant flexibility but inlining them means the optimizer can easily remove any runtime cost. Here however, the cost of flexibility cannot be trivially removed. In the end, the most flexible approach has been chosen since it allows users to write programs that would otherwise be prohibitively hard to write. This decision is based on the assumption that writing fast but inflexible locks is closer to a solved problems than writing locks that are as flexible as external scheduling in \CFA.
    809 
    810 % ======================================================================
    811 % ======================================================================
    812 \subsection{Multi-monitor scheduling}
    813 % ======================================================================
    814 % ======================================================================
    815 
    816 External scheduling, like internal scheduling, becomes significantly more complex when introducing multi-monitor syntax. Even in the simplest possible case, some new semantics need to be established:
    817 \begin{cfacode}
    818 monitor M {};
    819810
    820811void f(M & mutex a);
    821812
    822 void g(M & mutex b, M & mutex c) {
    823         waitfor(f); //two monitors M => unkown which to pass to f(M & mutex)
     813void g(M & mutex a, M & mutex b) {
     814        waitfor(f); //ambiguous, keep a pass b or other way around?
    824815}
    825816\end{cfacode}
     
    837828\end{cfacode}
    838829
    839 This syntax is unambiguous. Both locks are acquired and kept by \code{g}. When routine \code{f} is called, the lock for monitor \code{b} is temporarily transferred from \code{g} to \code{f} (while \code{g} still holds lock \code{a}). This behavior can be extended to multi-monitor \code{waitfor} statement as follows.
     830This syntax is unambiguous. Both locks are acquired and kept. When routine \code{f} is called, the lock for monitor \code{b} is temporarily transferred from \code{g} to \code{f} (while \code{g} still holds lock \code{a}). This behavior can be extended to multi-monitor waitfor statement as follows.
    840831
    841832\begin{cfacode}
     
    851842Note that the set of monitors passed to the \code{waitfor} statement must be entirely contained in the set of monitors already acquired in the routine. \code{waitfor} used in any other context is Undefined Behaviour.
    852843
    853 An important behavior to note is when a set of monitors only match partially :
     844An important behavior to note is that what happens when a set of monitors only match partially :
    854845
    855846\begin{cfacode}
     
    874865\end{cfacode}
    875866
    876 While the equivalent can happen when using internal scheduling, the fact that conditions are specific to a set of monitors means that users have to use two different condition variables. In both cases, partially matching monitor sets does not wake-up the waiting thread. It is also important to note that in the case of external scheduling, as for routine calls, the order of parameters is irrelevant; \code{waitfor(f,a,b)} and \code{waitfor(f,b,a)} are indistinguishable waiting condition.
     867While the equivalent can happen when using internal scheduling, the fact that conditions are specific to a set of monitors means that users have to use two different condition variables. In both cases, partially matching monitor sets does not wake-up the waiting thread. It is also important to note that in the case of external scheduling, as for routine calls, the order of parameters is important; \code{waitfor(f,a,b)} and \code{waitfor(f,b,a)} are to distinct waiting condition.
    877868
    878869% ======================================================================
     
    882873% ======================================================================
    883874
    884 Syntactically, the \code{waitfor} statement takes a function identifier and a set of monitors. While the set of monitors can be any list of expression, the function name is more restricted because the compiler validates at compile time the validity of the function type and the parameters used with the \code{waitfor} statement. It checks that the set of monitor passed in matches the requirements for a function call. Listing \ref{lst:waitfor} shows various usage of the waitfor statement and which are acceptable. The choice of the function type is made ignoring any non-\code{mutex} parameter. One limitation of the current implementation is that it does not handle overloading.
     875Syntactically, the \code{waitfor} statement takes a function identifier and a set of monitors. While the set of monitors can be any list of expression, the function name is more restricted. This is because the compiler validates at compile time the validity of the waitfor statement. It checks that the set of monitor passed in matches the requirements for a function call. Listing \ref{lst:waitfor} shows various usage of the waitfor statement and which are acceptable. The choice of the function type is made ignoring any non-\code{mutex} parameter. One limitation of the current implementation is that it does not handle overloading.
    885876\begin{figure}
    886877\begin{cfacode}
     
    907898        waitfor(f2, a1, a2); //Incorrect : Mutex arguments don't match
    908899        waitfor(f1, 1);      //Incorrect : 1 not a mutex argument
    909         waitfor(f9, a1);     //Incorrect : f9 function does not exist
    910         waitfor(*fp, a1 );   //Incorrect : fp not an identifier
     900        waitfor(f4, a1);     //Incorrect : f9 not a function
     901        waitfor(*fp, a1 );   //Incorrect : fp not a identifier
    911902        waitfor(f4, a1);     //Incorrect : f4 ambiguous
    912903
     
    918909\end{figure}
    919910
    920 Finally, for added flexibility, \CFA supports constructing complex \code{waitfor} mask using the \code{or}, \code{timeout} and \code{else}. Indeed, multiple \code{waitfor} can be chained together using \code{or}; this chain forms a single statement that uses baton-pass to any one function that fits one of the function+monitor set passed in. To eanble users to tell which accepted function is accepted, \code{waitfor}s are followed by a statement (including the null statement \code{;}) or a compound statement. When multiple \code{waitfor} are chained together, only the statement corresponding to the accepted function is executed. A \code{waitfor} chain can also be followed by a \code{timeout}, to signify an upper bound on the wait, or an \code{else}, to signify that the call should be non-blocking, that is only check of a matching function call already arrived and return immediately otherwise. Any and all of these clauses can be preceded by a \code{when} condition to dynamically construct the mask based on some current state. Listing \ref{lst:waitfor2}, demonstrates several complex masks and some incorrect ones.
     911Finally, for added flexibility, \CFA supports constructing complex waitfor mask using the \code{or}, \code{timeout} and \code{else}. Indeed, multiple \code{waitfor} can be chained together using \code{or}; this chain will form a single statement which will baton-pass to any one function that fits one of the function+monitor set which was passed in. To eanble users to tell which was the accepted function, \code{waitfor}s are followed by a statement (including the null statement \code{;}) or a compound statement. When multiple \code{waitfor} are chained together, only the statement corresponding to the accepted function is executed. A \code{waitfor} chain can also be followed by a \code{timeout}, to signify an upper bound on the wait, or an \code{else}, to signify that the call should be non-blocking, that is only check of a matching function already arrived and return immediately otherwise. Any and all of these clauses can be preceded by a \code{when} condition to dynamically construct the mask based on some current state. Listing \ref{lst:waitfor2}, demonstrates several complex masks and some incorrect ones.
    921912
    922913\begin{figure}
     
    982973\label{lst:waitfor2}
    983974\end{figure}
    984 
    985 % ======================================================================
    986 % ======================================================================
    987 \subsection{Waiting for the destructor}
    988 % ======================================================================
    989 % ======================================================================
    990 An interesting use for the \code{waitfor} statement is destructor semantics. Indeed, the \code{waitfor} statement can accept any \code{mutex} routine, which includes the destructor (see section \ref{data}). However, with the semantics discussed until now, waiting for the destructor does not make any sense since using an object after its destructor is called is undefined behaviour. The simplest approach is to disallow \code{waitfor} on a destructor. However, a more expressive approach is to flip execution ordering when waiting for the destructor, meaning that waiting for the destructor allows the destructor to run after the current \code{mutex} routine, similarly to how a condition is signalled.
    991 \begin{figure}
    992 \begin{cfacode}
    993 monitor Executer {};
    994 struct  Action;
    995 
    996 void ^?{}   (Executer & mutex this);
    997 void execute(Executer & mutex this, const Action & );
    998 void run    (Executer & mutex this) {
    999         while(true) {
    1000                    waitfor(execute, this);
    1001                 or waitfor(^?{}   , this) {
    1002                         break;
    1003                 }
    1004         }
    1005 }
    1006 \end{cfacode}
    1007 \caption{Example of an executor which executes action in series until the destructor is called.}
    1008 \label{lst:dtor-order}
    1009 \end{figure}
    1010 For example, listing \ref{lst:dtor-order} shows an example of an executor with an infinite loop, which waits for the destructor to break out of this loop. Switching the semantic meaning introduces an idiomatic way to terminate a task and/or wait for its termination via destruction.
  • doc/proposals/concurrency/text/future.tex

    r0fe4e62 rf5c3b6c  
    55% ======================================================================
    66
    7 \section{Flexible Scheduling} \label{futur:sched}
    8 An important part of concurrency is scheduling. Different scheduling algorithm can affact peformance (both in terms of average and variation). However, no single scheduler is optimal for all workloads and therefore there is value in being able to change the scheduler for given programs. One solution is to offer various tweaking options to users, allowing the scheduler to be adjusted the to requirements of the workload. However, in order to be truly flexible, it would be interesting to allow users to add arbitrary data and arbirary scheduling algorithms to the scheduler. For example, a web server could attach Type-of-Service information to threads and have a ``ToS aware'' scheduling algorithm tailored to this specific web server. This path of flexible schedulers will be explored for \CFA.
    9 
    10 \section{Non-Blocking IO} \label{futur:nbio}
    11 While most of the parallelism tools
    12 However, many modern workloads are not bound on computation but on IO operations, an common case being webservers and XaaS (anything as a service). These type of workloads often require significant engineering around amortising costs of blocking IO operations. While improving throughtput of these operations is outside what \CFA can do as a language, it can help users to make better use of the CPU time otherwise spent waiting on IO operations. The current trend is to use asynchronous programming using tools like callbacks and/or futurs and promises\cite. However, while these are valid solutions, they lead to code that is harder to read and maintain because it is much less linear
    13 
    14 \section{Other concurrency tools} \label{futur:tools}
    15 While monitors offer a flexible and powerful concurent core for \CFA, other concurrency tools are also necessary for a complete multi-paradigm concurrency package. Example of such tools can include simple locks and condition variables, futures and promises\cite{promises}, and executors. These additional features are useful when monitors offer a level of abstraction which is indaquate for certain tasks.
    16 
    17 \section{Implicit threading} \label{futur:implcit}
    18 Simpler applications can benefit greatly from having implicit parallelism. That is, parallelism that does not rely on the user to write concurrency. This type of parallelism can be achieved both at the language level and at the library level. The cannonical example of implcit parallelism is parallel for loops, which are the simplest example of a divide and conquer algorithm\cite{uC++book}. Listing \ref{lst:parfor} shows three different code examples that accomplish pointwise sums of large arrays. Note that none of these example explicitly declare any concurrency or parallelism objects.
    19 
    20 \begin{figure}
    21 \begin{center}
    22 \begin{tabular}[t]{|c|c|c|}
    23 Sequential & Library Parallel & Language Parallel \\
    24 \begin{cfacode}[tabsize=3]
    25 void big_sum(
    26         int* a, int* b,
    27         int* o,
    28         size_t len)
    29 {
    30         for(
    31                 int i = 0;
    32                 i < len;
    33                 ++i )
    34         {
    35                 o[i]=a[i]+b[i];
    36         }
    37 }
     7Concurrency and parallelism is still a very active field that strongly benefits from hardware advances. As such certain features that aren't necessarily mature enough in their current state could become relevant in the lifetime of \CFA.
     8\section{Non-Blocking IO}
    389
    3910
     11\section{Other concurrency tools}
    4012
    4113
    42 
    43 int* a[10000];
    44 int* b[10000];
    45 int* c[10000];
    46 //... fill in a & b
    47 big_sum(a,b,c,10000);
    48 \end{cfacode} &\begin{cfacode}[tabsize=3]
    49 void big_sum(
    50         int* a, int* b,
    51         int* o,
    52         size_t len)
    53 {
    54         range ar(a, a+len);
    55         range br(b, b+len);
    56         range or(o, o+len);
    57         parfor( ai, bi, oi,
    58         [](     int* ai,
    59                 int* bi,
    60                 int* oi)
    61         {
    62                 oi=ai+bi;
    63         });
    64 }
     14\section{Implicit threading}
     15% Finally, simpler applications can benefit greatly from having implicit parallelism. That is, parallelism that does not rely on the user to write concurrency. This type of parallelism can be achieved both at the language level and at the system level.
     16%
     17% \begin{center}
     18% \begin{tabular}[t]{|c|c|c|}
     19% Sequential & System Parallel & Language Parallel \\
     20% \begin{lstlisting}
     21% void big_sum(int* a, int* b,
     22%                int* out,
     23%                size_t length)
     24% {
     25%       for(int i = 0; i < length; ++i ) {
     26%               out[i] = a[i] + b[i];
     27%       }
     28% }
     29%
     30%
     31%
     32%
     33%
     34% int* a[10000];
     35% int* b[10000];
     36% int* c[10000];
     37% //... fill in a and b ...
     38% big_sum(a, b, c, 10000);
     39% \end{lstlisting} &\begin{lstlisting}
     40% void big_sum(int* a, int* b,
     41%                int* out,
     42%                size_t length)
     43% {
     44%       range ar(a, a + length);
     45%       range br(b, b + length);
     46%       range or(out, out + length);
     47%       parfor( ai, bi, oi,
     48%       [](int* ai, int* bi, int* oi) {
     49%               oi = ai + bi;
     50%       });
     51% }
     52%
     53% int* a[10000];
     54% int* b[10000];
     55% int* c[10000];
     56% //... fill in a and b ...
     57% big_sum(a, b, c, 10000);
     58% \end{lstlisting}&\begin{lstlisting}
     59% void big_sum(int* a, int* b,
     60%                int* out,
     61%                size_t length)
     62% {
     63%       for (ai, bi, oi) in (a, b, out) {
     64%               oi = ai + bi;
     65%       }
     66% }
     67%
     68%
     69%
     70%
     71%
     72% int* a[10000];
     73% int* b[10000];
     74% int* c[10000];
     75% //... fill in a and b ...
     76% big_sum(a, b, c, 10000);
     77% \end{lstlisting}
     78% \end{tabular}
     79% \end{center}
     80%
    6581
    6682
    67 int* a[10000];
    68 int* b[10000];
    69 int* c[10000];
    70 //... fill in a & b
    71 big_sum(a,b,c,10000);
    72 \end{cfacode}&\begin{cfacode}[tabsize=3]
    73 void big_sum(
    74         int* a, int* b,
    75         int* o,
    76         size_t len)
    77 {
    78         parfor (ai,bi,oi)
    79             in (a, b, o )
    80         {
    81                 oi = ai + bi;
    82         }
    83 }
     83\section{Multiple Paradigms}
    8484
    8585
    86 
    87 
    88 
    89 
    90 
    91 int* a[10000];
    92 int* b[10000];
    93 int* c[10000];
    94 //... fill in a & b
    95 big_sum(a,b,c,10000);
    96 \end{cfacode}
    97 \end{tabular}
    98 \end{center}
    99 \caption{For loop to sum numbers: Sequential, using library parallelism and language parallelism.}
    100 \label{lst:parfor}
    101 \end{figure}
    102 
    103 Implicit parallelism is a general solution and therefore has its limitations. However, it is a quick and simple approach to parallelism which may very well be sufficient for smaller applications and reduces the amount of boiler-plate that is needed to start benefiting from parallelism in modern CPUs.
    104 
    105 
     86\section{Transactions}
  • doc/proposals/concurrency/text/internals.tex

    r0fe4e62 rf5c3b6c  
    11
    22\chapter{Behind the scene}
    3 There are several challenges specific to \CFA when implementing concurrency. These challenges are a direct result of \gls{bulk-acq} and loose object-definitions. These two constraints are the root cause of most design decisions in the implementation. Furthermore, to avoid contention from dynamically allocating memory in a concurrent environment, the internal-scheduling design is (almost) entirely free of mallocs. This is to avoid the chicken and egg problem \cite{Chicken} of having a memory allocator that relies on the threading system and a threading system that relies on the runtime. This extra goal, means that memory management is a constant concern in the design of the system.
    43
    5 The main memory concern for concurrency is queues. All blocking operations are made by parking threads onto queues. The queue design needs to be intrusive\cite{IntrusiveData} to avoid the need for memory allocation, which entails that all the nodes need specific fields to keep track of all needed information. Since many concurrency operations can use an unbound amount of memory (depending on \gls{bulk-acq}), statically defining information in the intrusive fields of threads is insufficient. The only variable sized container that does not require memory allocation is the callstack, which is heavily used in the implementation of internal scheduling. Particularly variable length arrays, which are used extensively.
    6 
    7 Since stack allocation is based around scope, the first step of the implementation is to identify the scopes that are available to store the information, and which of these can have a variable length. The threads and the condition both allow a fixed amount of memory to be stored, while mutex-routines and the actual blocking call allow for an unbound amount (though the later is preferable in terms of performance).
    8 
    9 Note that since the major contributions of this thesis are extending monitor semantics to \gls{bulk-acq} and loose object definitions, any challenges that are not resulting of these characteristiques of \CFA are considered as solved problems and therefore not discussed further.
    104
    115% ======================================================================
    126% ======================================================================
    13 \section{Mutex routines}
     7\section{Implementation Details: Interaction with polymorphism}
    148% ======================================================================
    159% ======================================================================
     10Depending on the choice of semantics for when monitor locks are acquired, interaction between monitors and \CFA's concept of polymorphism can be complex to support. However, it is shown that entry-point locking solves most of the issues.
    1611
    17 The first step towards the monitor implementation is simple mutex-routines using monitors. In the single monitor case, this is done using the entry/exit procedure highlighted in listing \ref{lst:entry1}. This entry/exit procedure does not actually have to be extended to support multiple monitors, indeed it is sufficient to enter/leave monitors one-by-one as long as the order is correct to prevent deadlocks\cite{Havender68}. In \CFA, ordering of monitor relies on memory ordering, this is sufficient because all objects are guaranteed to have distinct non-overlaping memory layouts and mutual-exclusion for a monitor is only defined for its lifetime, meaning that destroying a monitor while it is acquired is undefined behavior. When a mutex call is made, the concerned monitors are agregated into a variable-length pointer array and sorted based on pointer values. This array presists for the entire duration of the mutual-exclusion and its ordering reused extensively.
     12First of all, interaction between \code{otype} polymorphism and monitors is impossible since monitors do not support copying. Therefore, the main question is how to support \code{dtype} polymorphism. Since a monitor's main purpose is to ensure mutual exclusion when accessing shared data, this implies that mutual exclusion is only required for routines that do in fact access shared data. However, since \code{dtype} polymorphism always handles incomplete types (by definition), no \code{dtype} polymorphic routine can access shared data since the data requires knowledge about the type. Therefore, the only concern when combining \code{dtype} polymorphism and monitors is to protect access to routines.
     13
     14Before looking into complex control-flow, it is important to present the difference between the two acquiring options : callsite and entry-point locking, i.e. acquiring the monitors before making a mutex routine call or as the first operation of the mutex routine-call. For example:
    1815\begin{figure}
    19 \begin{multicols}{2}
    20 Entry
    21 \begin{pseudo}
    22 if monitor is free
    23         enter
    24 elif already own the monitor
    25         continue
    26 else
    27         block
    28 increment recursions
    29 \end{pseudo}
    30 \columnbreak
    31 Exit
    32 \begin{pseudo}
    33 decrement recursion
    34 if recursion == 0
    35         if entry queue not empty
    36                 wake-up thread
    37 \end{pseudo}
    38 \end{multicols}
    39 \caption{Initial entry and exit routine for monitors}
    40 \label{lst:entry1}
    41 \end{figure}
    42 
    43 \subsection{ Details: Interaction with polymorphism}
    44 Depending on the choice of semantics for when monitor locks are acquired, interaction between monitors and \CFA's concept of polymorphism can be more complex to support. However, it is shown that entry-point locking solves most of the issues.
    45 
    46 First of all, interaction between \code{otype} polymorphism and monitors is impossible since monitors do not support copying. Therefore, the main question is how to support \code{dtype} polymorphism. It is important to present the difference between the two acquiring options : callsite and entry-point locking, i.e. acquiring the monitors before making a mutex routine call or as the first operation of the mutex routine-call. For example:
    47 \begin{figure}[H]
     16\label{fig:locking-site}
    4817\begin{center}
     18\setlength\tabcolsep{1.5pt}
    4919\begin{tabular}{|c|c|c|}
    5020Mutex & \gls{callsite-locking} & \gls{entry-point-locking} \\
     
    9666\end{tabular}
    9767\end{center}
    98 \caption{Call-site vs entry-point locking for mutex calls}
    99 \label{fig:locking-site}
     68\caption{Callsite vs entry-point locking for mutex calls}
    10069\end{figure}
    10170
    102 Note the \code{mutex} keyword relies on the type system, which means that in cases where a generic monitor routine is desired, writing the mutex routine is possible with the proper trait, for example:
     71
     72Note the \code{mutex} keyword relies on the type system, which means that in cases where a generic monitor routine is actually desired, writing a mutex routine is possible with the proper trait, which is possible because monitors are designed in terms a trait. For example:
    10373\begin{cfacode}
    104 //Incorrect: T may not be monitor
     74//Incorrect: T is not a monitor
    10575forall(dtype T)
    10676void foo(T * mutex t);
     
    11181\end{cfacode}
    11282
    113 Both entry-point and callsite locking are feasible implementations. The current \CFA implementations uses entry-point locking because it requires less work when using \gls{raii}, effectively transferring the burden of implementation to object construction/destruction. The same could be said of callsite locking, the difference being that the later does not necessarily have an existing scope that matches exactly the scope of the mutual exclusion, i.e.: the function body. Furthermore, entry-point locking requires less code generation since any useful routine is called at least as often as it is define, there can be only one entry-point but many callsites.
    11483
    11584% ======================================================================
    11685% ======================================================================
    117 \section{Threading} \label{impl:thread}
     86\section{Internal scheduling: Implementation} \label{inschedimpl}
    11887% ======================================================================
    11988% ======================================================================
     89There are several challenges specific to \CFA when implementing internal scheduling. These challenges are direct results of \gls{bulk-acq} and loose object definitions. These two constraints are to root cause of most design decisions in the implementation of internal scheduling. Furthermore, to avoid the head-aches of dynamically allocating memory in a concurrent environment, the internal-scheduling design is entirely free of mallocs and other dynamic memory allocation scheme. This is to avoid the chicken and egg problem \cite{Chicken} of having a memory allocator that relies on the threading system and a threading system that relies on the runtime. This extra goal, means that memory management is a constant concern in the design of the system.
    12090
    121 Figure \ref{fig:system1} shows a high-level picture if the \CFA runtime system in regards to concurrency. Each component of the picture is explained in details in the fllowing sections.
     91The main memory concern for concurrency is queues. All blocking operations are made by parking threads onto queues. These queues need to be intrinsic\cit to avoid the need memory allocation. This entails that all the fields needed to keep track of all needed information. Since internal scheduling can use an unbound amount of memory (depending on \gls{bulk-acq}) statically defining information information in the intrusive fields of threads is insufficient. The only variable sized container that does not require memory allocation is the callstack, which is heavily used in the implementation of internal scheduling. Particularly the GCC extension variable length arrays which is used extensively.
    12292
    123 \begin{figure}
    124 \begin{center}
    125 {\resizebox{\textwidth}{!}{\input{system.pstex_t}}}
    126 \end{center}
    127 \caption{Overview of the entire system}
    128 \label{fig:system1}
    129 \end{figure}
     93Since stack allocation is based around scope, the first step of the implementation is to identify the scopes that are available to store the information, and which of these can have a variable length. In the case of external scheduling, the threads and the condition both allow a fixed amount of memory to be stored, while mutex-routines and the actual blocking call allow for an unbound amount (though adding too much to the mutex routine stack size can become expansive faster).
    13094
    131 \subsection{Context Switching}
    132 As mentionned in section \ref{coroutine}, coroutines are a stepping stone for implementing threading. This is because they share the same mechanism for context-switching between different stacks. To improve performance and simplicity, context-switching is implemented using the following assumption: all context-switches happen inside a specific function call. This assumption means that the context-switch only has to copy the callee-saved registers onto the stack and then switch the stack registers with the ones of the target coroutine/thread. Note that the instruction pointer can be left untouched since the context-switch is always inside the same function. Threads however do not context-switch between each other directly. They context-switch to the scheduler. This method is called a 2-step context-switch and has the advantage of having a clear distinction between user code and the kernel where scheduling and other system operation happen. Obiously, this has the cost of doubling the context-switch cost because threads must context-switch to an intermediate stack. However, the performance of the 2-step context-switch is still superior to a \code{pthread_yield}(see section \ref{results}). additionally, for users in need for optimal performance, it is important to note that having a 2-step context-switch as the default does not prevent \CFA from offering a 1-step context-switch to use manually (or as part of monitors). This option is not currently present in \CFA but the changes required to add it are strictly additive.
     95The following figure is the traditionnal illustration of a monitor :
    13396
    134 \subsection{Processors}
    135 Parallelism in \CFA is built around using processors to specify how much parallelism is desired. \CFA processors are object wrappers around kernel threads, specifically pthreads in the current implementation of \CFA. Indeed, any parallelism must go through operating-system librairies. However, \glspl{uthread} are still the main source of concurrency, processors are simply the underlying source of parallelism. Indeed, processor \glspl{kthread} simply fetch a \glspl{uthread} from the scheduler and run, they are effectively executers for user-threads. The main benefit of this approach is that it offers a well defined boundary between kernel code and user code, for example, kernel thread quiescing, scheduling and interrupt handling. Processors internally use coroutines to take advantage of the existing context-switching semantics.
    136 
    137 \subsection{Stack management}
    138 One of the challenges of this system is to reduce the footprint as much as possible. Specifically, all pthreads created also have a stack created with them, which should be used as much as possible. Normally, coroutines also create there own stack to run on, however, in the case of the coroutines used for processors, these coroutines run directly on the kernel thread stack, effectively stealing the processor stack. The exception to this rule is the Main Processor, i.e. the initial kernel thread that is given to any program. In order to respect user expectations, the stack of the initial kernel thread, the main stack of the program, is used by the main user thread rather than the main processor.
    139 
    140 \subsection{Preemption} \label{preemption}
    141 Finally, an important aspect for any complete threading system is preemption. As mentionned in chapter \ref{basics}, preemption introduces an extra degree of uncertainty, which enables users to have multiple threads interleave transparently, rather than having to cooperate among threads for proper scheduling and CPU distribution. Indeed, preemption is desireable because it adds a degree of isolation among threads. In a fully cooperative system, any thread that runs into a long loop can starve other threads, while in a preemptive system starvation can still occur but it does not rely on every thread having to yield or block on a regular basis, which reduces significantly a programmer burden. Obviously, preemption is not optimal for every workload, however any preemptive system can become a cooperative system by making the time-slices extremely large. Which is why \CFA uses a preemptive threading system.
    142 
    143 Preemption in \CFA is based on kernel timers, which are used to run a discrete-event simulation. Every processor keeps track of the current time and registers an expiration time with the preemption system. When the preemption system receives a change in preemption, it sorts these expiration times in a list and sets a kernel timer for the closest one, effectively stepping between preemption events on each signals sent by the timer. These timers use the linux signal {\tt SIGALRM}, which is delivered to the process rather than the kernel-thread. This results in an implementation problem,because when delivering signals to a process, the kernel documentation states that the signal can be delivered to any kernel thread for which the signal is not blocked i.e. :
    144 \begin{quote}
    145 A process-directed signal may be delivered to any one of the threads that does not currently have the signal blocked. If more than one of the threads has the signal unblocked, then the kernel chooses an arbitrary thread to which to deliver the signal.
    146 SIGNAL(7) - Linux Programmer's Manual
    147 \end{quote}
    148 For the sake of simplicity and in order to prevent the case of having two threads receiving alarms simultaneously, \CFA programs block the {\tt SIGALRM} signal on every thread except one. Now because of how involontary context-switches are handled, the kernel thread handling {\tt SIGALRM} cannot also be a processor thread.
    149 
    150 Involuntary context-switching is done by sending signal {\tt SIGUSER1} to the corresponding processor and having the thread yield from inside the signal handler. Effectively context-switching away from the signal-handler back to the kernel and the signal-handler frame is eventually unwound when the thread is scheduled again. This approach means that a signal-handler can start on one kernel thread and terminate on a second kernel thread (but the same user thread). It is important to note that signal-handlers save and restore signal masks because user-thread migration can cause signal mask to migrate from one kernel thread to another. This behaviour is only a problem if all kernel threads among which a user thread can migrate differ in terms of signal masks\footnote{Sadly, official POSIX documentation is silent on what distiguishes ``async-signal-safe'' functions from other functions}. However, since the kernel thread hanlding preemption requires a different signal mask, executing user threads on the kernel alarm thread can cause deadlocks. For this reason, the alarm thread is on a tight loop around a system call to \code{sigwaitinfo}, requiring very little CPU time for preemption. One final detail about the alarm thread is how to wake it when additional communication is required (e.g., on thread termination). This unblocking is also done using {\tt SIGALRM}, but sent throught the \code{pthread_sigqueue}. Indeed, \code{sigwait} can differentiate signals sent from \code{pthread_sigqueue} from signals sent from alarms or the kernel.
    151 
    152 \subsection{Scheduler}
    153 Finally, an aspect that was not mentionned yet is the scheduling algorithm. Currently, the \CFA scheduler uses a single ready queue for all processors, which is the simplest approach to scheduling. Further discussion on scheduling is present in section \label{futur:sched}.
    154 
    155 % ======================================================================
    156 % ======================================================================
    157 \section{Internal scheduling} \label{impl:intsched}
    158 % ======================================================================
    159 % ======================================================================
    160 The following figure is the traditional illustration of a monitor (repeated from page~\pageref{fig:monitor} for convenience) :
    161 
    162 \begin{figure}[H]
    16397\begin{center}
    16498{\resizebox{0.4\textwidth}{!}{\input{monitor}}}
    16599\end{center}
    166 \caption{Traditional illustration of a monitor}
    167 \label{fig:monitor}
    168 \end{figure}
    169100
    170 This picture has several components, the two most important being the entry-queue and the AS-stack. The entry-queue is an (almost) FIFO list where threads waiting to enter are parked, while the acceptor-signalor (AS) stack is a FILO list used for threads that have been signalled or otherwise marked as running next.
     101For \CFA, the previous picture does not have support for blocking multiple monitors on a single condition. To support \gls{bulk-acq} two changes to this picture are required. First, it doesn't make sense to tie the condition to a single monitor since blocking two monitors as one would require arbitrarily picking a monitor to hold the condition. Secondly, the object waiting on the conditions and AS-stack cannot simply contain the waiting thread since a single thread can potentially wait on multiple monitors. As mentionned in section \ref{inschedimpl}, the handling in multiple monitors is done by partially passing, which entails that each concerned monitor needs to have a node object. However, for waiting on the condition, since all threads need to wait together, a single object needs to be queued in the condition. Moving out the condition and updating the node types yields :
    171102
    172 For \CFA, this picture does not have support for blocking multiple monitors on a single condition. To support \gls{bulk-acq} two changes to this picture are required. First, it is non longer helpful to attach the condition to a single monitor. Secondly, the thread waiting on the conditions has to be seperated multiple monitors, which yields :
    173 
    174 \begin{figure}[H]
    175103\begin{center}
    176104{\resizebox{0.8\textwidth}{!}{\input{int_monitor}}}
    177105\end{center}
    178 \caption{Illustration of \CFA monitor}
    179 \label{fig:monitor_cfa}
    180 \end{figure}
    181106
    182 This picture and the proper entry and leave algorithms is the fundamental implementation of internal scheduling (see listing \ref{lst:entry2}). Note that when threads are moved from the condition to the AS-stack, it splits the thread into to pieces. The thread is woken up when all the pieces have moved from the AS-stacks to the active thread seat. In this picture, the threads are split into halves but this is only because there are two monitors in this picture. For a specific signaling operation every monitor needs a piece of thread on its AS-stack.
     107\newpage
    183108
    184 \begin{figure}[b]
     109This picture and the proper entry and leave algorithms is the fundamental implementation of internal scheduling.
     110
    185111\begin{multicols}{2}
    186112Entry
    187 \begin{pseudo}
     113\begin{pseudo}[numbers=left]
    188114if monitor is free
    189115        enter
    190 elif already own the monitor
     116elif I already own the monitor
    191117        continue
    192118else
     
    197123\columnbreak
    198124Exit
    199 \begin{pseudo}
     125\begin{pseudo}[numbers=left, firstnumber=8]
    200126decrement recursion
    201127if recursion == 0
     
    209135\end{pseudo}
    210136\end{multicols}
    211 \caption{Entry and exit routine for monitors with internal scheduling}
    212 \label{lst:entry2}
    213 \end{figure}
    214137
    215 Some important things to notice about the exit routine. The solution discussed in \ref{intsched} can be seen in the exit routine of listing \ref{lst:entry2}. Basically, the solution boils down to having a seperate data structure for the condition queue and the AS-stack, and unconditionally transferring ownership of the monitors but only unblocking the thread when the last monitor has transferred ownership. This solution is deadlock safe as well as preventing any potential barging. The data structure used for the AS-stack are reused extensively for external scheduling, but in the case of internal scheduling, the data is allocated using variable-length arrays on the callstack of the \code{wait} and \code{signal_block} routines.
    216 
    217 \begin{figure}[H]
    218 \begin{center}
    219 {\resizebox{0.8\textwidth}{!}{\input{monitor_structs.pstex_t}}}
    220 \end{center}
    221 \caption{Data structures involved in internal/external scheduling}
    222 \label{fig:structs}
    223 \end{figure}
    224 
    225 Figure \ref{fig:structs} shows a high level representation of these data-structures. The main idea behind them is that, while figure \ref{fig:monitor_cfa} is a nice illustration in theory, in practice breaking a threads into multiple pieces to put unto intrusive stacks does not make sense. The \code{condition node} is the data structure that is queued into a condition variable and, when signaled, the condition queue is popped and each \code{condition criterion} are moved to the AS-stack. Once all the criterion have be popped from their respective AS-stacks, the thread is woken-up, which is what is shown in listing \ref{lst:entry2}.
     138Some important things to notice about the exit routine. The solution discussed in \ref{inschedimpl} can be seen on line 11 of the previous pseudo code. Basically, the solution boils down to having a seperate data structure for the condition queue and the AS-stack, and unconditionally transferring ownership of the monitors but only unblocking the thread when the last monitor has trasnferred ownership. This solution is safe as well as preventing any potential barging.
    226139
    227140% ======================================================================
    228141% ======================================================================
    229 \section{External scheduling}
     142\section{Implementation Details: External scheduling queues}
    230143% ======================================================================
    231144% ======================================================================
    232 Similarly to internal scheduling, external scheduling for multiple monitors relies on the idea that waiting-thread queues are no longer specific to a single monitor, as mentionned in section \ref{extsched}. For internal scheduling, these queues are part of condition variables which are still unique for a given scheduling operation (e.g., no single statment uses multiple conditions). However, in the case of external scheduling, there is no equivalent object which is associated with \code{waitfor} statements. This absence means the queues holding the waiting threads must be stored inside at least one of the monitors that is acquired. The monitors being the only objects that have sufficient lifetime and are available on both sides of the \code{waitfor} statment. This requires an algorithm to choose which monitor holds the relevant queue. It is also important that said algorithm be independent of the order in which users list parameters. The proposed algorithm is to fall back on monitor lock ordering and specify that the monitor that is acquired first is the one with the relevant wainting queue. This assumes that the lock acquiring order is static for the lifetime of all concerned objects but that is a reasonable constraint.
     145To support multi-monitor external scheduling means that some kind of entry-queues must be used that is aware of both monitors. However, acceptable routines must be aware of the entry queues which means they must be stored inside at least one of the monitors that will be acquired. This in turn adds the requirement a systematic algorithm of disambiguating which queue is relavant regardless of user ordering. The proposed algorithm is to fall back on monitors lock ordering and specify that the monitor that is acquired first is the lock with the relevant entry queue. This assumes that the lock acquiring order is static for the lifetime of all concerned objects but that is a reasonable constraint. This algorithm choice has two consequences, the entry queue of the highest priority monitor is no longer a true FIFO queue and the queue of the lowest priority monitor is both required and probably unused. The queue can no longer be a FIFO queue because instead of simply containing the waiting threads in order arrival, they also contain the second mutex. Therefore, another thread with the same highest priority monitor but a different lowest priority monitor may arrive first but enter the critical section after a thread with the correct pairing. Secondly, since it may not be known at compile time which monitor will be the lowest priority monitor, every monitor needs to have the correct queues even though it is probable that half the multi-monitor queues will go unused for the entire duration of the program.
    233146
    234 This algorithm choice has two consequences :
    235 \begin{itemize}
    236         \item The queue of the highest priority monitor is no longer a true FIFO queue because threads can be moved to the front of the queue. These queues need to contain a set of monitors for each of the waiting threads. Therefore, another thread whose set contains the same highest priority monitor but different lower priority monitors may arrive first but enter the critical section after a thread with the correct pairing.
    237         \item The queue of the lowest priority monitor is both required and potentially unused. Indeed, since it is not known at compile time which monitor will be the lowest priority monitor, every monitor needs to have the correct queues even though it is possible that some queues will go unused for the entire duration of the program, for example if a monitor is only used in a specific pair.
    238 \end{itemize}
    239147
    240 Therefore, the following modifications need to be made to support external scheduling :
    241 \begin{itemize}
    242         \item The threads waiting on the entry-queue need to keep track of which routine is trying to enter, and using which set of monitors. The \code{mutex} routine already has all the required information on its stack so the thread only needs to keep a pointer to that information.
    243         \item The monitors need to keep a mask of acceptable routines. This mask contains for each acceptable routine, a routine pointer and an array of monitors to go with it. It also needs storage to keep track of which routine was accepted. Since this information is not specific to any monitor, the monitors actually contain a pointer to an integer on the stack of the waiting thread. Note that the complete mask can be pushed to any owned monitors, regardless of \code{when} statements, the \code{waitfor} statement is used in a context where the thread already has full ownership of (at least) every concerned monitor and therefore monitors will refuse all calls no matter what.
    244         \item The entry/exit routine need to be updated as shown in listing \ref{lst:entry3}.
    245 \end{itemize}
    246 
    247 \subsection{External scheduling - destructors}
    248 Finally, to support the ordering inversion of destructors, the code generation needs to be modified to use a special entry routine. This routine is needed because of the storage requirements of the call order inversion. Indeed, when waiting for the destructors, storage is need for the waiting context and the lifetime of said storage needs to outlive the waiting operation it is needed for. For regular \code{waitfor} statements, the callstack of the routine itself matches this requirement but it is no longer the case when waiting for the destructor since it is pushed on to the AS-stack for later. The waitfor semantics can then be adjusted correspondingly, as seen in listing \ref{lst:entry-dtor}
    249 
    250 \begin{figure}
    251 \begin{multicols}{2}
    252 Entry
    253 \begin{pseudo}
    254 if monitor is free
    255         enter
    256 elif already own the monitor
    257         continue
    258 elif matches waitfor mask
    259         push criterions to AS-stack
    260         continue
    261 else
    262         block
    263 increment recursion
    264 \end{pseudo}
    265 \columnbreak
    266 Exit
    267 \begin{pseudo}
    268 decrement recursion
    269 if recursion == 0
    270         if signal_stack not empty
    271                 set_owner to thread
    272                 if all monitors ready
    273                         wake-up thread
    274                 endif
    275         endif
    276 
    277         if entry queue not empty
    278                 wake-up thread
    279         endif
    280 \end{pseudo}
    281 \end{multicols}
    282 \caption{Entry and exit routine for monitors with internal scheduling and external scheduling}
    283 \label{lst:entry3}
    284 \end{figure}
    285 
    286 \begin{figure}
    287 \begin{multicols}{2}
    288 Destructor Entry
    289 \begin{pseudo}
    290 if monitor is free
    291         enter
    292 elif already own the monitor
    293         increment recursion
    294         return
    295 create wait context
    296 if matches waitfor mask
    297         reset mask
    298         push self to AS-stack
    299         baton pass
    300 else
    301         wait
    302 increment recursion
    303 \end{pseudo}
    304 \columnbreak
    305 Waitfor
    306 \begin{pseudo}
    307 if matching thread is already there
    308         if found destructor
    309                 push destructor to AS-stack
    310                 unlock all monitors
    311         else
    312                 push self to AS-stack
    313                 baton pass
    314         endif
    315         return
    316 endif
    317 if non-blocking
    318         Unlock all monitors
    319         Return
    320 endif
    321 
    322 push self to AS-stack
    323 set waitfor mask
    324 block
    325 return
    326 \end{pseudo}
    327 \end{multicols}
    328 \caption{Pseudo code for the \code{waitfor} routine and the \code{mutex} entry routine for destructors}
    329 \label{lst:entry-dtor}
    330 \end{figure}
     148\section{Internals}
     149The complete mask can be pushed to any one, we are in a context where we already have full ownership of (at least) every concerned monitor and therefore monitors will refuse all calls no matter what.
  • doc/proposals/concurrency/text/intro.tex

    r0fe4e62 rf5c3b6c  
    33% ======================================================================
    44
    5 This thesis provides a minimal concurrency \acrshort{api} that is simple, efficient and can be reused to build higher-level features. The simplest possible concurrency system is a thread and a lock but this low-level approach is hard to master. An easier approach for users is to support higher-level constructs as the basis of concurrency. Indeed, for highly productive concurrent programming, high-level approaches are much more popular~\cite{HPP:Study}. Examples are task based, message passing and implicit threading. The high-level approach and its minimal \acrshort{api} are tested in a dialect of C, call \CFA. Furthermore, the proposed \acrshort{api} doubles as an early definition of the \CFA language and library. This thesis also comes with an implementation of the concurrency library for \CFA as well as all the required language features added to the source-to-source translator.
     5This thesis provides a minimal concurrency \acrshort{api} that is simple, efficient and can be reused to build higher-level features. The simplest possible concurrency system is a thread and a lock but this low-level approach is hard to master. An easier approach for users is to support higher-level constructs as the basis of concurrency. Indeed, for highly productive concurrent programming, high-level approaches are much more popular~\cite{HPP:Study}. Examples are task based, message passing and implicit threading. The high-level approach and its minimal \acrshort{api} are tested in a dialect of C, call \CFA. [Is there value to say that this thesis is also an early definition of the \CFA language and library in regards to concurrency?]
    66
    77There are actually two problems that need to be solved in the design of concurrency for a programming language: which concurrency and which parallelism tools are available to the programmer. While these two concepts are often combined, they are in fact distinct, requiring different tools~\cite{Buhr05a}. Concurrency tools need to handle mutual exclusion and synchronization, while parallelism tools are about performance, cost and resource utilization.
  • doc/proposals/concurrency/text/parallelism.tex

    r0fe4e62 rf5c3b6c  
    1515Examples of languages that support \glspl{uthread} are Erlang~\cite{Erlang} and \uC~\cite{uC++book}.
    1616
    17 \subsection{Fibers : user-level threads without preemption} \label{fibers}
    18 A popular varient of \glspl{uthread} is what is often refered to as \glspl{fiber}. However, \glspl{fiber} do not present meaningful semantical differences with \glspl{uthread}. The significant difference between \glspl{uthread} and \glspl{fiber} is the lack of \gls{preemption} in the later one. Advocates of \glspl{fiber} list their high performance and ease of implementation as majors strenghts of \glspl{fiber} but the performance difference between \glspl{uthread} and \glspl{fiber} is controversial, and the ease of implementation, while true, is a weak argument in the context of language design. Therefore this proposal largely ignores fibers.
     17\subsection{Fibers : user-level threads without preemption}
     18A popular varient of \glspl{uthread} is what is often refered to as \glspl{fiber}. However, \glspl{fiber} do not present meaningful semantical differences with \glspl{uthread}. Advocates of \glspl{fiber} list their high performance and ease of implementation as majors strenghts of \glspl{fiber} but the performance difference between \glspl{uthread} and \glspl{fiber} is controversial, and the ease of implementation, while true, is a weak argument in the context of language design. Therefore this proposal largely ignore fibers.
    1919
    2020An example of a language that uses fibers is Go~\cite{Go}
     
    2626
    2727\subsection{Paradigm performance}
    28 While the choice between the three paradigms listed above may have significant performance implication, it is difficult to pindown the performance implications of chosing a model at the language level. Indeed, in many situations one of these paradigms may show better performance but it all strongly depends on the workload. Having a large amount of mostly independent units of work to execute almost guarantess that the \gls{pool} based system has the best performance thanks to the lower memory overhead (i.e., no thread stack per job). However, interactions among jobs can easily exacerbate contention. User-level threads allow fine-grain context switching, which results in better resource utilisation, but a context switch is more expensive and the extra control means users need to tweak more variables to get the desired performance. Finally, if the units of uninterrupted work are large enough the paradigm choice is largely amortised by the actual work done.
     28While the choice between the three paradigms listed above may have significant performance implication, it is difficult to pindown the performance implications of chosing a model at the language level. Indeed, in many situations one of these paradigms may show better performance but it all strongly depends on the workload. Having a large amount of mostly independent units of work to execute almost guarantess that the \gls{pool} based system has the best performance thanks to the lower memory overhead (i.e., not thread stack per job). However, interactions among jobs can easily exacerbate contention. User-level threads allow fine-grain context switching, which results in better resource utilisation, but a context switch is more expensive and the extra control means users need to tweak more variables to get the desired performance. Finally, if the units of uninterrupted work are large enough the paradigm choice is largely amortised by the actual work done.
     29
     30\TODO
    2931
    3032\section{The \protect\CFA\ Kernel : Processors, Clusters and Threads}\label{kernel}
    3133
    32 \Glspl{cfacluster} have not been fully implmented in the context of this thesis, currently \CFA only supports one \gls{cfacluster}, the initial one. The objective of \gls{cfacluster} is to group \gls{kthread} with identical settings together. \Glspl{uthread} can be scheduled on a \glspl{kthread} of a given \gls{cfacluster}, allowing organization between \glspl{kthread} and \glspl{uthread}. It is important that \glspl{kthread} belonging to a same \glspl{cfacluster} have homogenous settings, otherwise migrating a \gls{uthread} from one \gls{kthread} to the other can cause issues.
    3334
    3435\subsection{Future Work: Machine setup}\label{machine}
    35 While this was not done in the context of this thesis, another important aspect of clusters is affinity. While many common desktop and laptop PCs have homogeneous CPUs, other devices often have more heteregenous setups. For example, system using \acrshort{numa} configurations may benefit from users being able to tie clusters and\/or kernel threads to certains CPU cores. OS support for CPU affinity is now common \cite{affinityLinux, affinityWindows, affinityFreebsd, affinityNetbsd, affinityMacosx} which means it is both possible and desirable for \CFA to offer an abstraction mechanism for portable CPU affinity.
     36While this was not done in the context of this thesis, another important aspect of clusters is affinity. While many common desktop and laptop PCs have homogeneous CPUs, other devices often have more heteregenous setups. For example, system using \acrshort{numa} configurations may benefit from users being able to tie clusters and/or kernel threads to certains CPU cores. OS support for CPU affinity is now common \cit, which means it is both possible and desirable for \CFA to offer an abstraction mechanism for portable CPU affinity.
    3637
    37 % \subsection{Paradigms}\label{cfaparadigms}
    38 % Given these building blocks, it is possible to reproduce all three of the popular paradigms. Indeed, \glspl{uthread} is the default paradigm in \CFA. However, disabling \gls{preemption} on the \gls{cfacluster} means \glspl{cfathread} effectively become \glspl{fiber}. Since several \glspl{cfacluster} with different scheduling policy can coexist in the same application, this allows \glspl{fiber} and \glspl{uthread} to coexist in the runtime of an application. Finally, it is possible to build executors for thread pools from \glspl{uthread} or \glspl{fiber}.
     38\subsection{Paradigms}\label{cfaparadigms}
     39Given these building blocks, it is possible to reproduce all three of the popular paradigms. Indeed, \glspl{uthread} is the default paradigm in \CFA. However, disabling \glspl{preemption} on the \gls{cfacluster} means \glspl{cfathread} effectively become \glspl{fiber}. Since several \glspl{cfacluster} with different scheduling policy can coexist in the same application, this allows \glspl{fiber} and \glspl{uthread} to coexist in the runtime of an application. Finally, it is possible to build executors for thread pools from \glspl{uthread} or \glspl{fiber}.
  • doc/proposals/concurrency/text/together.tex

    r0fe4e62 rf5c3b6c  
    77
    88\section{Threads as monitors}
    9 As it was subtely alluded in section \ref{threads}, \code{threads} in \CFA are in fact monitors, which means that all monitor features are available when using threads. For example, here is a very simple two thread pipeline that could be used for a simulator of a game engine :
     9As it was subtely alluded in section \ref{threads}, \code{threads} in \CFA are in fact monitors. This means that all the monitors features are available when using threads. For example, here is a very simple two thread pipeline that could be used for a simulator of a game engine :
    1010\begin{cfacode}
    1111// Visualization declaration
     
    3636}
    3737\end{cfacode}
    38 One of the obvious complaints of the previous code snippet (other than its toy-like simplicity) is that it does not handle exit conditions and just goes on forever. Luckily, the monitor semantics can also be used to clearly enforce a shutdown order in a concise manner :
     38One of the obvious complaints of the previous code snippet (other than its toy-like simplicity) is that it does not handle exit conditions and just goes on for ever. Luckily, the monitor semantics can also be used to clearly enforce a shutdown order in a concise manner :
    3939\begin{cfacode}
    4040// Visualization declaration
     
    7272        }
    7373}
    74 
    75 // Call destructor for simulator once simulator finishes
    76 // Call destructor for renderer to signify shutdown
    7774\end{cfacode}
    7875
    7976\section{Fibers \& Threads}
    80 As mentionned in section \ref{preemption}, \CFA uses preemptive threads by default but can use fibers on demand. Currently, using fibers is done by adding the following line of code to the program~:
    81 \begin{cfacode}
    82 unsigned int default_preemption() {
    83         return 0;
    84 }
    85 \end{cfacode}
    86 This function is called by the kernel to fetch the default preemption rate, where 0 signifies an infinite time-slice i.e. no preemption. However, once clusters are fully implemented, it will be possible to create fibers and uthreads in on the same system :
    87 \begin{figure}
    88 \begin{cfacode}
    89 //Cluster forward declaration
    90 struct cluster;
    91 
    92 //Processor forward declaration
    93 struct processor;
    94 
    95 //Construct clusters with a preemption rate
    96 void ?{}(cluster& this, unsigned int rate);
    97 //Construct processor and add it to cluster
    98 void ?{}(processor& this, cluster& cluster);
    99 //Construct thread and schedule it on cluster
    100 void ?{}(thread& this, cluster& cluster);
    101 
    102 //Declare two clusters
    103 cluster thread_cluster = { 10`ms };                     //Preempt every 10 ms
    104 cluster fibers_cluster = { 0 };                         //Never preempt
    105 
    106 //Construct 4 processors
    107 processor processors[4] = {
    108         //2 for the thread cluster
    109         thread_cluster;
    110         thread_cluster;
    111         //2 for the fibers cluster
    112         fibers_cluster;
    113         fibers_cluster;
    114 };
    115 
    116 //Declares thread
    117 thread UThread {};
    118 void ?{}(UThread& this) {
    119         //Construct underlying thread to automatically
    120         //be scheduled on the thread cluster
    121         (this){ thread_cluster }
    122 }
    123 
    124 void main(UThread & this);
    125 
    126 //Declares fibers
    127 thread Fiber {};
    128 void ?{}(Fiber& this) {
    129         //Construct underlying thread to automatically
    130         //be scheduled on the fiber cluster
    131         (this.__thread){ fibers_cluster }
    132 }
    133 
    134 void main(Fiber & this);
    135 \end{cfacode}
    136 \end{figure}
  • doc/proposals/concurrency/thesis.tex

    r0fe4e62 rf5c3b6c  
    3535\usepackage[pagewise]{lineno}
    3636\usepackage{fancyhdr}
    37 \usepackage{float}
    3837\renewcommand{\linenumberfont}{\scriptsize\sffamily}
    39 \usepackage{siunitx}
    40 \sisetup{ binary-units=true }
    4138\input{style}                                                   % bespoke macros used in the document
    4239\usepackage[dvips,plainpages=false,pdfpagelabels,pdfpagemode=UseNone,colorlinks=true,pagebackref=true,linkcolor=blue,citecolor=blue,urlcolor=blue,pagebackref=true,breaklinks=true]{hyperref}
     
    110107\input{together}
    111108
    112 \input{results}
    113 
    114109\input{future}
    115110
  • doc/proposals/concurrency/version

    r0fe4e62 rf5c3b6c  
    1 0.11.129
     10.10.212
  • src/Common/Debug.h

    r0fe4e62 rf5c3b6c  
    2424#include "SynTree/Declaration.h"
    2525
    26 #define DEBUG
     26/// debug codegen a translation unit
     27static inline void debugCodeGen( const std::list< Declaration * > & translationUnit, const std::string & label ) {
     28        std::list< Declaration * > decls;
    2729
    28 namespace Debug {
    29         /// debug codegen a translation unit
    30         static inline void codeGen( __attribute__((unused)) const std::list< Declaration * > & translationUnit, __attribute__((unused)) const std::string & label ) {
    31         #ifdef DEBUG
    32                 std::list< Declaration * > decls;
     30        filter( translationUnit.begin(), translationUnit.end(), back_inserter( decls ), []( Declaration * decl ) {
     31                return ! LinkageSpec::isBuiltin( decl->get_linkage() );
     32        });
    3333
    34                 filter( translationUnit.begin(), translationUnit.end(), back_inserter( decls ), []( Declaration * decl ) {
    35                         return ! LinkageSpec::isBuiltin( decl->get_linkage() );
    36                 });
    37 
    38                 std::cerr << "======" << label << "======" << std::endl;
    39                 CodeGen::generate( decls, std::cerr, false, true );
    40         #endif
    41         } // dump
    42 
    43         static inline void treeDump( __attribute__((unused)) const std::list< Declaration * > & translationUnit, __attribute__((unused)) const std::string & label ) {
    44         #ifdef DEBUG
    45                 std::list< Declaration * > decls;
    46 
    47                 filter( translationUnit.begin(), translationUnit.end(), back_inserter( decls ), []( Declaration * decl ) {
    48                         return ! LinkageSpec::isBuiltin( decl->get_linkage() );
    49                 });
    50 
    51                 std::cerr << "======" << label << "======" << std::endl;
    52                 printAll( decls, std::cerr );
    53         #endif
    54         } // dump
    55 }
     34        std::cerr << "======" << label << "======" << std::endl;
     35        CodeGen::generate( decls, std::cerr, false, true );
     36} // dump
    5637
    5738// Local Variables: //
  • src/Concurrency/Keywords.cc

    r0fe4e62 rf5c3b6c  
    553553                        ),
    554554                        new ListInit(
    555                                 map_range < std::list<Initializer*> > ( args, [](DeclarationWithType * var ){
     555                                map_range < std::list<Initializer*> > ( args, [this](DeclarationWithType * var ){
    556556                                        Type * type = var->get_type()->clone();
    557557                                        type->set_mutex( false );
  • src/InitTweak/GenInit.cc

    r0fe4e62 rf5c3b6c  
    214214                }
    215215                // a type is managed if it appears in the map of known managed types, or if it contains any polymorphism (is a type variable or generic type containing a type variable)
    216                 return managedTypes.find( SymTab::Mangler::mangleConcrete( type ) ) != managedTypes.end() || GenPoly::isPolyType( type );
     216                return managedTypes.find( SymTab::Mangler::mangle( type ) ) != managedTypes.end() || GenPoly::isPolyType( type );
    217217        }
    218218
     
    232232                        Type * type = InitTweak::getPointerBase( params.front()->get_type() );
    233233                        assert( type );
    234                         managedTypes.insert( SymTab::Mangler::mangleConcrete( type ) );
     234                        managedTypes.insert( SymTab::Mangler::mangle( type ) );
    235235                }
    236236        }
     
    242242                        if ( ObjectDecl * field = dynamic_cast< ObjectDecl * >( member ) ) {
    243243                                if ( isManaged( field ) ) {
    244                                         // generic parameters should not play a role in determining whether a generic type is constructed - construct all generic types, so that
    245                                         // polymorphic constructors make generic types managed types
    246244                                        StructInstType inst( Type::Qualifiers(), aggregateDecl );
    247                                         managedTypes.insert( SymTab::Mangler::mangleConcrete( &inst ) );
     245                                        managedTypes.insert( SymTab::Mangler::mangle( &inst ) );
    248246                                        break;
    249247                                }
  • src/InitTweak/InitTweak.cc

    r0fe4e62 rf5c3b6c  
    9999        class InitExpander::ExpanderImpl {
    100100        public:
    101                 virtual ~ExpanderImpl() = default;
    102101                virtual std::list< Expression * > next( std::list< Expression * > & indices ) = 0;
    103102                virtual Statement * buildListInit( UntypedExpr * callExpr, std::list< Expression * > & indices ) = 0;
     
    107106        public:
    108107                InitImpl( Initializer * init ) : init( init ) {}
    109                 virtual ~InitImpl() = default;
    110108
    111109                virtual std::list< Expression * > next( __attribute((unused)) std::list< Expression * > & indices ) {
     
    124122        public:
    125123                ExprImpl( Expression * expr ) : arg( expr ) {}
    126                 virtual ~ExprImpl() { delete arg; }
     124
     125                ~ExprImpl() { delete arg; }
    127126
    128127                virtual std::list< Expression * > next( std::list< Expression * > & indices ) {
  • src/ResolvExpr/AlternativeFinder.cc

    r0fe4e62 rf5c3b6c  
    2222#include <memory>                  // for allocator_traits<>::value_type
    2323#include <utility>                 // for pair
    24 #include <vector>                  // for vector
    2524
    2625#include "Alternative.h"           // for AltList, Alternative
     
    334333                tmpCost.incPoly( -tmpCost.get_polyCost() );
    335334                if ( tmpCost != Cost::zero ) {
     335                // if ( convCost != Cost::zero ) {
    336336                        Type *newType = formalType->clone();
    337337                        env.apply( newType );
     
    405405///     needAssertions.insert( needAssertions.end(), (*tyvar)->get_assertions().begin(), (*tyvar)->get_assertions().end() );
    406406                }
     407        }
     408
     409        /// instantiate a single argument by matching actuals from [actualIt, actualEnd) against formalType,
     410        /// producing expression(s) in out and their total cost in cost.
     411        template< typename AltIterator, typename OutputIterator >
     412        bool instantiateArgument( Type * formalType, Initializer * defaultValue, AltIterator & actualIt, AltIterator actualEnd, OpenVarSet & openVars, TypeEnvironment & resultEnv, AssertionSet & resultNeed, AssertionSet & resultHave, const SymTab::Indexer & indexer, Cost & cost, OutputIterator out ) {
     413                if ( TupleType * tupleType = dynamic_cast< TupleType * >( formalType ) ) {
     414                        // formalType is a TupleType - group actuals into a TupleExpr whose type unifies with the TupleType
     415                        std::list< Expression * > exprs;
     416                        for ( Type * type : *tupleType ) {
     417                                if ( ! instantiateArgument( type, defaultValue, actualIt, actualEnd, openVars, resultEnv, resultNeed, resultHave, indexer, cost, back_inserter( exprs ) ) ) {
     418                                        deleteAll( exprs );
     419                                        return false;
     420                                }
     421                        }
     422                        *out++ = new TupleExpr( exprs );
     423                } else if ( TypeInstType * ttype = Tuples::isTtype( formalType ) ) {
     424                        // xxx - mixing default arguments with variadic??
     425                        std::list< Expression * > exprs;
     426                        for ( ; actualIt != actualEnd; ++actualIt ) {
     427                                exprs.push_back( actualIt->expr->clone() );
     428                                cost += actualIt->cost;
     429                        }
     430                        Expression * arg = nullptr;
     431                        if ( exprs.size() == 1 && Tuples::isTtype( exprs.front()->get_result() ) ) {
     432                                // the case where a ttype value is passed directly is special, e.g. for argument forwarding purposes
     433                                // xxx - what if passing multiple arguments, last of which is ttype?
     434                                // xxx - what would happen if unify was changed so that unifying tuple types flattened both before unifying lists? then pass in TupleType(ttype) below.
     435                                arg = exprs.front();
     436                        } else {
     437                                arg = new TupleExpr( exprs );
     438                        }
     439                        assert( arg && arg->get_result() );
     440                        if ( ! unify( ttype, arg->get_result(), resultEnv, resultNeed, resultHave, openVars, indexer ) ) {
     441                                return false;
     442                        }
     443                        *out++ = arg;
     444                } else if ( actualIt != actualEnd ) {
     445                        // both actualType and formalType are atomic (non-tuple) types - if they unify
     446                        // then accept actual as an argument, otherwise return false (fail to instantiate argument)
     447                        Expression * actual = actualIt->expr;
     448                        Type * actualType = actual->get_result();
     449
     450                        PRINT(
     451                                std::cerr << "formal type is ";
     452                                formalType->print( std::cerr );
     453                                std::cerr << std::endl << "actual type is ";
     454                                actualType->print( std::cerr );
     455                                std::cerr << std::endl;
     456                        )
     457                        if ( ! unify( formalType, actualType, resultEnv, resultNeed, resultHave, openVars, indexer ) ) {
     458                                // std::cerr << "unify failed" << std::endl;
     459                                return false;
     460                        }
     461                        // move the expression from the alternative to the output iterator
     462                        *out++ = actual;
     463                        actualIt->expr = nullptr;
     464                        cost += actualIt->cost;
     465                        ++actualIt;
     466                } else {
     467                        // End of actuals - Handle default values
     468                        if ( SingleInit *si = dynamic_cast<SingleInit *>( defaultValue )) {
     469                                if ( CastExpr * castExpr = dynamic_cast< CastExpr * >( si->get_value() ) ) {
     470                                        // so far, only constant expressions are accepted as default values
     471                                        if ( ConstantExpr *cnstexpr = dynamic_cast<ConstantExpr *>( castExpr->get_arg() ) ) {
     472                                                if ( Constant *cnst = dynamic_cast<Constant *>( cnstexpr->get_constant() ) ) {
     473                                                        if ( unify( formalType, cnst->get_type(), resultEnv, resultNeed, resultHave, openVars, indexer ) ) {
     474                                                                *out++ = cnstexpr->clone();
     475                                                                return true;
     476                                                        } // if
     477                                                } // if
     478                                        } // if
     479                                }
     480                        } // if
     481                        return false;
     482                } // if
     483                return true;
     484        }
     485
     486        bool AlternativeFinder::instantiateFunction( std::list< DeclarationWithType* >& formals, const AltList &actuals, bool isVarArgs, OpenVarSet& openVars, TypeEnvironment &resultEnv, AssertionSet &resultNeed, AssertionSet &resultHave, AltList & out ) {
     487                simpleCombineEnvironments( actuals.begin(), actuals.end(), resultEnv );
     488                // make sure we don't widen any existing bindings
     489                for ( TypeEnvironment::iterator i = resultEnv.begin(); i != resultEnv.end(); ++i ) {
     490                        i->allowWidening = false;
     491                }
     492                resultEnv.extractOpenVars( openVars );
     493
     494                // flatten actuals so that each actual has an atomic (non-tuple) type
     495                AltList exploded;
     496                Tuples::explode( actuals, indexer, back_inserter( exploded ) );
     497
     498                AltList::iterator actualExpr = exploded.begin();
     499                AltList::iterator actualEnd = exploded.end();
     500                for ( DeclarationWithType * formal : formals ) {
     501                        // match flattened actuals with formal parameters - actuals will be grouped to match
     502                        // with formals as appropriate
     503                        Cost cost = Cost::zero;
     504                        std::list< Expression * > newExprs;
     505                        ObjectDecl * obj = strict_dynamic_cast< ObjectDecl * >( formal );
     506                        if ( ! instantiateArgument( obj->get_type(), obj->get_init(), actualExpr, actualEnd, openVars, resultEnv, resultNeed, resultHave, indexer, cost, back_inserter( newExprs ) ) ) {
     507                                deleteAll( newExprs );
     508                                return false;
     509                        }
     510                        // success - produce argument as a new alternative
     511                        assert( newExprs.size() == 1 );
     512                        out.push_back( Alternative( newExprs.front(), resultEnv, cost ) );
     513                }
     514                if ( actualExpr != actualEnd ) {
     515                        // there are still actuals remaining, but we've run out of formal parameters to match against
     516                        // this is okay only if the function is variadic
     517                        if ( ! isVarArgs ) {
     518                                return false;
     519                        }
     520                        out.splice( out.end(), exploded, actualExpr, actualEnd );
     521                }
     522                return true;
    407523        }
    408524
     
    559675        }
    560676
    561         /// Gets a default value from an initializer, nullptr if not present
    562         ConstantExpr* getDefaultValue( Initializer* init ) {
    563                 if ( SingleInit* si = dynamic_cast<SingleInit*>( init ) ) {
    564                         if ( CastExpr* ce = dynamic_cast<CastExpr*>( si->get_value() ) ) {
    565                                 return dynamic_cast<ConstantExpr*>( ce->get_arg() );
    566                         }
    567                 }
    568                 return nullptr;
    569         }
    570 
    571         /// State to iteratively build a match of parameter expressions to arguments
    572         struct ArgPack {
    573                 AltList actuals;                 ///< Arguments included in this pack
    574                 TypeEnvironment env;             ///< Environment for this pack
    575                 AssertionSet need;               ///< Assertions outstanding for this pack
    576                 AssertionSet have;               ///< Assertions found for this pack
    577                 OpenVarSet openVars;             ///< Open variables for this pack
    578                 unsigned nextArg;                ///< Index of next argument in arguments list
    579                 std::vector<Alternative> expls;  ///< Exploded actuals left over from last match
    580                 unsigned nextExpl;               ///< Index of next exploded alternative to use
    581                 std::vector<unsigned> tupleEls;  /// Number of elements in current tuple element(s)
    582 
    583                 ArgPack(const TypeEnvironment& env, const AssertionSet& need, const AssertionSet& have,
    584                                 const OpenVarSet& openVars)
    585                         : actuals(), env(env), need(need), have(have), openVars(openVars), nextArg(0),
    586                           expls(), nextExpl(0), tupleEls() {}
    587 
    588                 /// Starts a new tuple expression
    589                 void beginTuple() {
    590                         if ( ! tupleEls.empty() ) ++tupleEls.back();
    591                         tupleEls.push_back(0);
    592                 }
    593 
    594                 /// Ends a tuple expression, consolidating the appropriate actuals
    595                 void endTuple() {
    596                         // set up new Tuple alternative
    597                         std::list<Expression*> exprs;
    598                         Cost cost = Cost::zero;
    599 
    600                         // transfer elements into alternative
    601                         for (unsigned i = 0; i < tupleEls.back(); ++i) {
    602                                 exprs.push_front( actuals.back().expr );
    603                                 actuals.back().expr = nullptr;
    604                                 cost += actuals.back().cost;
    605                                 actuals.pop_back();
    606                         }
    607                         tupleEls.pop_back();
    608 
    609                         // build new alternative
    610                         actuals.emplace_back( new TupleExpr( exprs ), this->env, cost );
    611                 }
    612 
    613                 /// Clones and adds an actual, returns this
    614                 ArgPack& withArg( Expression* expr, Cost cost = Cost::zero ) {
    615                         actuals.emplace_back( expr->clone(), this->env, cost );
    616                         if ( ! tupleEls.empty() ) ++tupleEls.back();
    617                         return *this;
    618                 }
    619         };
    620 
    621         /// Instantiates an argument to match a formal, returns false if no results left
    622         bool instantiateArgument( Type* formalType, Initializer* initializer,
    623                         const std::vector< AlternativeFinder >& args,
    624                         std::vector<ArgPack>& results, std::vector<ArgPack>& nextResults,
    625                         const SymTab::Indexer& indexer ) {
    626                 if ( TupleType* tupleType = dynamic_cast<TupleType*>( formalType ) ) {
    627                         // formalType is a TupleType - group actuals into a TupleExpr
    628                         for ( ArgPack& result : results ) { result.beginTuple(); }
    629                         for ( Type* type : *tupleType ) {
    630                                 // xxx - dropping initializer changes behaviour from previous, but seems correct
    631                                 if ( ! instantiateArgument( type, nullptr, args, results, nextResults, indexer ) )
    632                                         return false;
    633                         }
    634                         for ( ArgPack& result : results ) { result.endTuple(); }
    635                         return true;
    636                 } else if ( TypeInstType* ttype = Tuples::isTtype( formalType ) ) {
    637                         // formalType is a ttype, consumes all remaining arguments
    638                         // xxx - mixing default arguments with variadic??
    639                         std::vector<ArgPack> finalResults{};  /// list of completed tuples
    640                         // start tuples
    641                         for ( ArgPack& result : results ) {
    642                                 result.beginTuple();
    643 
    644                                 // use rest of exploded tuple if present
    645                                 while ( result.nextExpl < result.expls.size() ) {
    646                                         const Alternative& actual = result.expls[result.nextExpl];
    647                                         result.env.addActual( actual.env, result.openVars );
    648                                         result.withArg( actual.expr );
    649                                         ++result.nextExpl;
    650                                 }
    651                         }
    652                         // iterate until all results completed
    653                         while ( ! results.empty() ) {
    654                                 // add another argument to results
    655                                 for ( ArgPack& result : results ) {
    656                                         // finish result when out of arguments
    657                                         if ( result.nextArg >= args.size() ) {
    658                                                 Type* argType = result.actuals.back().expr->get_result();
    659                                                 if ( result.tupleEls.back() == 1 && Tuples::isTtype( argType ) ) {
    660                                                         // the case where a ttype value is passed directly is special, e.g. for
    661                                                         // argument forwarding purposes
    662                                                         // xxx - what if passing multiple arguments, last of which is ttype?
    663                                                         // xxx - what would happen if unify was changed so that unifying tuple
    664                                                         // types flattened both before unifying lists? then pass in TupleType
    665                                                         // (ttype) below.
    666                                                         result.tupleEls.pop_back();
    667                                                 } else {
    668                                                         // collapse leftover arguments into tuple
    669                                                         result.endTuple();
    670                                                         argType = result.actuals.back().expr->get_result();
    671                                                 }
    672                                                 // check unification for ttype before adding to final
    673                                                 if ( unify( ttype, argType, result.env, result.need, result.have,
    674                                                                 result.openVars, indexer ) ) {
    675                                                         finalResults.push_back( std::move(result) );
    676                                                 }
    677                                                 continue;
    678                                         }
    679 
    680                                         // add each possible next argument
    681                                         for ( const Alternative& actual : args[result.nextArg] ) {
    682                                                 ArgPack aResult = result;  // copy to clone everything
    683                                                 // add details of actual to result
    684                                                 aResult.env.addActual( actual.env, aResult.openVars );
    685                                                 Cost cost = actual.cost;
    686 
    687                                                 // explode argument
    688                                                 std::vector<Alternative> exploded;
    689                                                 Tuples::explode( actual, indexer, back_inserter( exploded ) );
    690 
    691                                                 // add exploded argument to tuple
    692                                                 for ( Alternative& aActual : exploded ) {
    693                                                         aResult.withArg( aActual.expr, cost );
    694                                                         cost = Cost::zero;
    695                                                 }
    696                                                 ++aResult.nextArg;
    697                                                 nextResults.push_back( std::move(aResult) );
    698                                         }
    699                                 }
    700 
    701                                 // reset for next round
    702                                 results.swap( nextResults );
    703                                 nextResults.clear();
    704                         }
    705                         results.swap( finalResults );
    706                         return ! results.empty();
    707                 }
    708 
    709                 // iterate each current subresult
    710                 for ( unsigned iResult = 0; iResult < results.size(); ++iResult ) {
    711                         ArgPack& result = results[iResult];
    712 
    713                         if ( result.nextExpl < result.expls.size() ) {
    714                                 // use remainder of exploded tuple if present
    715                                 const Alternative& actual = result.expls[result.nextExpl];
    716                                 result.env.addActual( actual.env, result.openVars );
    717                                 Type* actualType = actual.expr->get_result();
    718 
    719                                 PRINT(
    720                                         std::cerr << "formal type is ";
    721                                         formalType->print( std::cerr );
    722                                         std::cerr << std::endl << "actual type is ";
    723                                         actualType->print( std::cerr );
    724                                         std::cerr << std::endl;
    725                                 )
    726 
    727                                 if ( unify( formalType, actualType, result.env, result.need, result.have,
    728                                                 result.openVars, indexer ) ) {
    729                                         ++result.nextExpl;
    730                                         nextResults.push_back( std::move(result.withArg( actual.expr )) );
    731                                 }
    732 
    733                                 continue;
    734                         } else if ( result.nextArg >= args.size() ) {
    735                                 // use default initializers if out of arguments
    736                                 if ( ConstantExpr* cnstExpr = getDefaultValue( initializer ) ) {
    737                                         if ( Constant* cnst = dynamic_cast<Constant*>( cnstExpr->get_constant() ) ) {
    738                                                 if ( unify( formalType, cnst->get_type(), result.env, result.need,
    739                                                                 result.have, result.openVars, indexer ) ) {
    740                                                         nextResults.push_back( std::move(result.withArg( cnstExpr )) );
    741                                                 }
    742                                         }
    743                                 }
    744                                 continue;
    745                         }
    746 
    747                         // Check each possible next argument
    748                         for ( const Alternative& actual : args[result.nextArg] ) {
    749                                 ArgPack aResult = result;  // copy to clone everything
    750                                 // add details of actual to result
    751                                 aResult.env.addActual( actual.env, aResult.openVars );
    752 
    753                                 // explode argument
    754                                 std::vector<Alternative> exploded;
    755                                 Tuples::explode( actual, indexer, back_inserter( exploded ) );
    756                                 if ( exploded.empty() ) {
    757                                         // skip empty tuple arguments
    758                                         ++aResult.nextArg;
    759                                         results.push_back( std::move(aResult) );
    760                                         continue;
    761                                 }
    762 
    763                                 // consider only first exploded actual
    764                                 const Alternative& aActual = exploded.front();
    765                                 Type* actualType = aActual.expr->get_result()->clone();
    766 
    767                                 PRINT(
    768                                         std::cerr << "formal type is ";
    769                                         formalType->print( std::cerr );
    770                                         std::cerr << std::endl << "actual type is ";
    771                                         actualType->print( std::cerr );
    772                                         std::cerr << std::endl;
    773                                 )
    774 
    775                                 // attempt to unify types
    776                                 if ( unify( formalType, actualType, aResult.env, aResult.need, aResult.have, aResult.openVars, indexer ) ) {
    777                                         // add argument
    778                                         aResult.withArg( aActual.expr, actual.cost );
    779                                         ++aResult.nextArg;
    780                                         if ( exploded.size() > 1 ) {
    781                                                 // other parts of tuple left over
    782                                                 aResult.expls = std::move( exploded );
    783                                                 aResult.nextExpl = 1;
    784                                         }
    785                                         nextResults.push_back( std::move(aResult) );
    786                                 }
    787                         }
    788                 }
    789 
    790                 // reset for next parameter
    791                 results.swap( nextResults );
    792                 nextResults.clear();
    793 
    794                 return ! results.empty();
    795         }
    796 
    797         template<typename OutputIterator>
    798         void AlternativeFinder::makeFunctionAlternatives( const Alternative &func,
    799                         FunctionType *funcType, const std::vector< AlternativeFinder > &args,
    800                         OutputIterator out ) {
    801                 OpenVarSet funcOpenVars;
    802                 AssertionSet funcNeed, funcHave;
    803                 TypeEnvironment funcEnv( func.env );
    804                 makeUnifiableVars( funcType, funcOpenVars, funcNeed );
    805                 // add all type variables as open variables now so that those not used in the parameter
    806                 // list are still considered open.
    807                 funcEnv.add( funcType->get_forall() );
    808 
     677        template< typename OutputIterator >
     678        void AlternativeFinder::makeFunctionAlternatives( const Alternative &func, FunctionType *funcType, const AltList &actualAlt, OutputIterator out ) {
     679                OpenVarSet openVars;
     680                AssertionSet resultNeed, resultHave;
     681                TypeEnvironment resultEnv( func.env );
     682                makeUnifiableVars( funcType, openVars, resultNeed );
     683                resultEnv.add( funcType->get_forall() ); // add all type variables as open variables now so that those not used in the parameter list are still considered open
     684                AltList instantiatedActuals; // filled by instantiate function
    809685                if ( targetType && ! targetType->isVoid() && ! funcType->get_returnVals().empty() ) {
    810686                        // attempt to narrow based on expected target type
    811687                        Type * returnType = funcType->get_returnVals().front()->get_type();
    812                         if ( ! unify( returnType, targetType, funcEnv, funcNeed, funcHave, funcOpenVars,
    813                                         indexer ) ) {
    814                                 // unification failed, don't pursue this function alternative
     688                        if ( ! unify( returnType, targetType, resultEnv, resultNeed, resultHave, openVars, indexer ) ) {
     689                                // unification failed, don't pursue this alternative
    815690                                return;
    816691                        }
    817692                }
    818693
    819                 // iteratively build matches, one parameter at a time
    820                 std::vector<ArgPack> results{ ArgPack{ funcEnv, funcNeed, funcHave, funcOpenVars } };
    821                 std::vector<ArgPack> nextResults{};
    822                 for ( DeclarationWithType* formal : funcType->get_parameters() ) {
    823                         ObjectDecl* obj = strict_dynamic_cast< ObjectDecl* >( formal );
    824                         if ( ! instantiateArgument(
    825                                         obj->get_type(), obj->get_init(), args, results, nextResults, indexer ) )
    826                                 return;
    827                 }
    828 
    829                 // filter out results that don't use all the arguments, and aren't variadic
    830                 std::vector<ArgPack> finalResults{};
    831                 if ( funcType->get_isVarArgs() ) {
    832                         for ( ArgPack& result : results ) {
    833                                 // use rest of exploded tuple if present
    834                                 while ( result.nextExpl < result.expls.size() ) {
    835                                         const Alternative& actual = result.expls[result.nextExpl];
    836                                         result.env.addActual( actual.env, result.openVars );
    837                                         result.withArg( actual.expr );
    838                                         ++result.nextExpl;
    839                                 }
    840                         }
    841 
    842                         while ( ! results.empty() ) {
    843                                 // build combinations for all remaining arguments
    844                                 for ( ArgPack& result : results ) {
    845                                         // keep if used all arguments
    846                                         if ( result.nextArg >= args.size() ) {
    847                                                 finalResults.push_back( std::move(result) );
    848                                                 continue;
    849                                         }
    850 
    851                                         // add each possible next argument
    852                                         for ( const Alternative& actual : args[result.nextArg] ) {
    853                                                 ArgPack aResult = result; // copy to clone everything
    854                                                 // add details of actual to result
    855                                                 aResult.env.addActual( actual.env, aResult.openVars );
    856                                                 Cost cost = actual.cost;
    857 
    858                                                 // explode argument
    859                                                 std::vector<Alternative> exploded;
    860                                                 Tuples::explode( actual, indexer, back_inserter( exploded ) );
    861 
    862                                                 // add exploded argument to arg list
    863                                                 for ( Alternative& aActual : exploded ) {
    864                                                         aResult.withArg( aActual.expr, cost );
    865                                                         cost = Cost::zero;
    866                                                 }
    867                                                 ++aResult.nextArg;
    868                                                 nextResults.push_back( std::move(aResult) );
    869                                         }
    870                                 }
    871 
    872                                 // reset for next round
    873                                 results.swap( nextResults );
    874                                 nextResults.clear();
    875                         }
    876                 } else {
    877                         // filter out results that don't use all the arguments
    878                         for ( ArgPack& result : results ) {
    879                                 if ( result.nextExpl >= result.expls.size() && result.nextArg >= args.size() ) {
    880                                         finalResults.push_back( std::move(result) );
    881                                 }
    882                         }
    883                 }
    884 
    885                 // validate matching combos, add to final result list
    886                 for ( ArgPack& result : finalResults ) {
     694                if ( instantiateFunction( funcType->get_parameters(), actualAlt, funcType->get_isVarArgs(), openVars, resultEnv, resultNeed, resultHave, instantiatedActuals ) ) {
    887695                        ApplicationExpr *appExpr = new ApplicationExpr( func.expr->clone() );
    888                         Alternative newAlt( appExpr, result.env, sumCost( result.actuals ) );
    889                         makeExprList( result.actuals, appExpr->get_args() );
     696                        Alternative newAlt( appExpr, resultEnv, sumCost( instantiatedActuals ) );
     697                        makeExprList( instantiatedActuals, appExpr->get_args() );
    890698                        PRINT(
    891699                                std::cerr << "instantiate function success: " << appExpr << std::endl;
    892700                                std::cerr << "need assertions:" << std::endl;
    893                                 printAssertionSet( result.need, std::cerr, 8 );
     701                                printAssertionSet( resultNeed, std::cerr, 8 );
    894702                        )
    895                         inferParameters( result.need, result.have, newAlt, result.openVars, out );
     703                        inferParameters( resultNeed, resultHave, newAlt, openVars, out );
    896704                }
    897705        }
     
    903711                if ( funcFinder.alternatives.empty() ) return;
    904712
    905                 std::vector< AlternativeFinder > argAlternatives;
    906                 findSubExprs( untypedExpr->begin_args(), untypedExpr->end_args(),
    907                         back_inserter( argAlternatives ) );
     713                std::list< AlternativeFinder > argAlternatives;
     714                findSubExprs( untypedExpr->begin_args(), untypedExpr->end_args(), back_inserter( argAlternatives ) );
     715
     716                std::list< AltList > possibilities;
     717                combos( argAlternatives.begin(), argAlternatives.end(), back_inserter( possibilities ) );
    908718
    909719                // take care of possible tuple assignments
    910720                // if not tuple assignment, assignment is taken care of as a normal function call
    911                 Tuples::handleTupleAssignment( *this, untypedExpr, argAlternatives );
     721                Tuples::handleTupleAssignment( *this, untypedExpr, possibilities );
    912722
    913723                // find function operators
     
    934744                                                Alternative newFunc( *func );
    935745                                                referenceToRvalueConversion( newFunc.expr );
    936                                                 makeFunctionAlternatives( newFunc, function, argAlternatives,
    937                                                         std::back_inserter( candidates ) );
     746                                                for ( std::list< AltList >::iterator actualAlt = possibilities.begin(); actualAlt != possibilities.end(); ++actualAlt ) {
     747                                                        // XXX
     748                                                        //Designators::check_alternative( function, *actualAlt );
     749                                                        makeFunctionAlternatives( newFunc, function, *actualAlt, std::back_inserter( candidates ) );
     750                                                }
    938751                                        }
    939752                                } else if ( TypeInstType *typeInst = dynamic_cast< TypeInstType* >( func->expr->get_result()->stripReferences() ) ) { // handle ftype (e.g. *? on function pointer)
     
    943756                                                        Alternative newFunc( *func );
    944757                                                        referenceToRvalueConversion( newFunc.expr );
    945                                                         makeFunctionAlternatives( newFunc, function, argAlternatives,
    946                                                                 std::back_inserter( candidates ) );
     758                                                        for ( std::list< AltList >::iterator actualAlt = possibilities.begin(); actualAlt != possibilities.end(); ++actualAlt ) {
     759                                                                makeFunctionAlternatives( newFunc, function, *actualAlt, std::back_inserter( candidates ) );
     760                                                        } // for
    947761                                                } // if
    948762                                        } // if
    949763                                }
     764
     765                                // try each function operator ?() with the current function alternative and each of the argument combinations
     766                                for ( AltList::iterator funcOp = funcOpFinder.alternatives.begin(); funcOp != funcOpFinder.alternatives.end(); ++funcOp ) {
     767                                        // check if the type is pointer to function
     768                                        if ( PointerType *pointer = dynamic_cast< PointerType* >( funcOp->expr->get_result()->stripReferences() ) ) {
     769                                                if ( FunctionType *function = dynamic_cast< FunctionType* >( pointer->get_base() ) ) {
     770                                                        Alternative newFunc( *funcOp );
     771                                                        referenceToRvalueConversion( newFunc.expr );
     772                                                        for ( std::list< AltList >::iterator actualAlt = possibilities.begin(); actualAlt != possibilities.end(); ++actualAlt ) {
     773                                                                AltList currentAlt;
     774                                                                currentAlt.push_back( *func );
     775                                                                currentAlt.insert( currentAlt.end(), actualAlt->begin(), actualAlt->end() );
     776                                                                makeFunctionAlternatives( newFunc, function, currentAlt, std::back_inserter( candidates ) );
     777                                                        } // for
     778                                                } // if
     779                                        } // if
     780                                } // for
    950781                        } catch ( SemanticError &e ) {
    951782                                errors.append( e );
    952783                        }
    953784                } // for
    954 
    955                 // try each function operator ?() with each function alternative
    956                 if ( ! funcOpFinder.alternatives.empty() ) {
    957                         // add function alternatives to front of argument list
    958                         argAlternatives.insert( argAlternatives.begin(), std::move(funcFinder) );
    959 
    960                         for ( AltList::iterator funcOp = funcOpFinder.alternatives.begin();
    961                                         funcOp != funcOpFinder.alternatives.end(); ++funcOp ) {
    962                                 try {
    963                                         // check if type is a pointer to function
    964                                         if ( PointerType* pointer = dynamic_cast<PointerType*>(
    965                                                         funcOp->expr->get_result()->stripReferences() ) ) {
    966                                                 if ( FunctionType* function =
    967                                                                 dynamic_cast<FunctionType*>( pointer->get_base() ) ) {
    968                                                         Alternative newFunc( *funcOp );
    969                                                         referenceToRvalueConversion( newFunc.expr );
    970                                                         makeFunctionAlternatives( newFunc, function, argAlternatives,
    971                                                                 std::back_inserter( candidates ) );
    972                                                 }
    973                                         }
    974                                 } catch ( SemanticError &e ) {
    975                                         errors.append( e );
    976                                 }
    977                         }
    978                 }
    979785
    980786                // Implement SFINAE; resolution errors are only errors if there aren't any non-erroneous resolutions
     
    1007813                candidates.splice( candidates.end(), alternatives );
    1008814
    1009                 // use a new list so that alternatives are not examined by addAnonConversions twice.
    1010                 AltList winners;
    1011                 findMinCost( candidates.begin(), candidates.end(), std::back_inserter( winners ) );
     815                findMinCost( candidates.begin(), candidates.end(), std::back_inserter( alternatives ) );
    1012816
    1013817                // function may return struct or union value, in which case we need to add alternatives for implicit
    1014818                // conversions to each of the anonymous members, must happen after findMinCost since anon conversions
    1015819                // are never the cheapest expression
    1016                 for ( const Alternative & alt : winners ) {
     820                for ( const Alternative & alt : alternatives ) {
    1017821                        addAnonConversions( alt );
    1018822                }
    1019                 alternatives.splice( alternatives.begin(), winners );
    1020823
    1021824                if ( alternatives.empty() && targetType && ! targetType->isVoid() ) {
  • src/ResolvExpr/AlternativeFinder.h

    r0fe4e62 rf5c3b6c  
    3434          public:
    3535                AlternativeFinder( const SymTab::Indexer &indexer, const TypeEnvironment &env );
    36 
    37                 AlternativeFinder( const AlternativeFinder& o )
    38                         : indexer(o.indexer), alternatives(o.alternatives), env(o.env),
    39                           targetType(o.targetType) {}
    40                
    41                 AlternativeFinder( AlternativeFinder&& o )
    42                         : indexer(o.indexer), alternatives(std::move(o.alternatives)), env(o.env),
    43                           targetType(o.targetType) {}
    44                
    45                 AlternativeFinder& operator= ( const AlternativeFinder& o ) {
    46                         if (&o == this) return *this;
    47                        
    48                         // horrific nasty hack to rebind references...
    49                         alternatives.~AltList();
    50                         new(this) AlternativeFinder(o);
    51                         return *this;
    52                 }
    53 
    54                 AlternativeFinder& operator= ( AlternativeFinder&& o ) {
    55                         if (&o == this) return *this;
    56                        
    57                         // horrific nasty hack to rebind references...
    58                         alternatives.~AltList();
    59                         new(this) AlternativeFinder(std::move(o));
    60                         return *this;
    61                 }
    62 
    6336                void find( Expression *expr, bool adjust = false, bool prune = true, bool failFast = true );
    6437                /// Calls find with the adjust flag set; adjustment turns array and function types into equivalent pointer types
     
    12699                /// Adds alternatives for offsetof expressions, given the base type and name of the member
    127100                template< typename StructOrUnionType > void addOffsetof( StructOrUnionType *aggInst, const std::string &name );
    128                 template<typename OutputIterator>
    129                 void makeFunctionAlternatives( const Alternative &func, FunctionType *funcType, const std::vector< AlternativeFinder >& args, OutputIterator out );
     101                bool instantiateFunction( std::list< DeclarationWithType* >& formals, const AltList &actuals, bool isVarArgs, OpenVarSet& openVars, TypeEnvironment &resultEnv, AssertionSet &resultNeed, AssertionSet &resultHave, AltList & out );
     102                template< typename OutputIterator >
     103                void makeFunctionAlternatives( const Alternative &func, FunctionType *funcType, const AltList &actualAlt, OutputIterator out );
    130104                template< typename OutputIterator >
    131105                void inferParameters( const AssertionSet &need, AssertionSet &have, const Alternative &newAlt, OpenVarSet &openVars, OutputIterator out );
  • src/ResolvExpr/CurrentObject.cc

    r0fe4e62 rf5c3b6c  
    260260
    261261                AggregateIterator( const std::string & kind, const std::string & name, Type * inst, const MemberList & members ) : kind( kind ), name( name ), inst( inst ), members( members ), curMember( members.begin() ), sub( makeGenericSubstitution( inst ) ) {
    262                         PRINT( std::cerr << "Creating " << kind << "(" << name << ")"; )
    263262                        init();
    264263                }
  • src/ResolvExpr/RenameVars.cc

    r0fe4e62 rf5c3b6c  
    2929        RenameVars global_renamer;
    3030
    31         RenameVars::RenameVars() : level( 0 ), resetCount( 0 ) {
     31        RenameVars::RenameVars() : level( 0 ) {
    3232                mapStack.push_front( std::map< std::string, std::string >() );
    3333        }
     
    3535        void RenameVars::reset() {
    3636                level = 0;
    37                 resetCount++;
    3837        }
    3938
     
    131130                        for ( Type::ForallList::iterator i = type->get_forall().begin(); i != type->get_forall().end(); ++i ) {
    132131                                std::ostringstream output;
    133                                 output << "_" << resetCount << "_" << level << "_" << (*i)->get_name();
     132                                output << "_" << level << "_" << (*i)->get_name();
    134133                                std::string newname( output.str() );
    135134                                mapStack.front()[ (*i)->get_name() ] = newname;
  • src/ResolvExpr/RenameVars.h

    r0fe4e62 rf5c3b6c  
    4848                void typeBefore( Type *type );
    4949                void typeAfter( Type *type );
    50                 int level, resetCount;
     50                int level;
    5151                std::list< std::map< std::string, std::string > > mapStack;
    5252        };
  • src/ResolvExpr/TypeEnvironment.cc

    r0fe4e62 rf5c3b6c  
    201201        }
    202202
    203         void TypeEnvironment::addActual( const TypeEnvironment& actualEnv, OpenVarSet& openVars ) {
    204                 for ( const EqvClass& c : actualEnv ) {
    205                         EqvClass c2 = c;
    206                         c2.allowWidening = false;
    207                         for ( const std::string& var : c2.vars ) {
    208                                 openVars[ var ] = c2.data;
    209                         }
    210                         env.push_back( std::move(c2) );
    211                 }
    212         }
    213 
    214203} // namespace ResolvExpr
    215204
  • src/ResolvExpr/TypeEnvironment.h

    r0fe4e62 rf5c3b6c  
    8686                TypeEnvironment *clone() const { return new TypeEnvironment( *this ); }
    8787
    88                 /// Iteratively adds the environment of a new actual (with allowWidening = false),
    89                 /// and extracts open variables.
    90                 void addActual( const TypeEnvironment& actualEnv, OpenVarSet& openVars );
    91 
    9288                typedef std::list< EqvClass >::iterator iterator;
    9389                iterator begin() { return env.begin(); }
  • src/SymTab/Mangler.cc

    r0fe4e62 rf5c3b6c  
    3232namespace SymTab {
    3333        std::string Mangler::mangleType( Type * ty ) {
    34                 Mangler mangler( false, true, true );
     34                Mangler mangler( false, true );
    3535                maybeAccept( ty, mangler );
    3636                return mangler.get_mangleName();
    3737        }
    3838
    39         std::string Mangler::mangleConcrete( Type* ty ) {
    40                 Mangler mangler( false, false, false );
    41                 maybeAccept( ty, mangler );
    42                 return mangler.get_mangleName();
    43         }
    44 
    45         Mangler::Mangler( bool mangleOverridable, bool typeMode, bool mangleGenericParams )
    46                 : nextVarNum( 0 ), isTopLevel( true ), mangleOverridable( mangleOverridable ), typeMode( typeMode ), mangleGenericParams( mangleGenericParams ) {}
     39        Mangler::Mangler( bool mangleOverridable, bool typeMode )
     40                : nextVarNum( 0 ), isTopLevel( true ), mangleOverridable( mangleOverridable ), typeMode( typeMode ) {}
    4741
    4842        Mangler::Mangler( const Mangler &rhs ) : mangleName() {
     
    172166
    173167                mangleName << ( refType->get_name().length() + prefix.length() ) << prefix << refType->get_name();
    174 
    175                 if ( mangleGenericParams ) {
    176                         std::list< Expression* >& params = refType->get_parameters();
    177                         if ( ! params.empty() ) {
    178                                 mangleName << "_";
    179                                 for ( std::list< Expression* >::const_iterator param = params.begin(); param != params.end(); ++param ) {
    180                                         TypeExpr *paramType = dynamic_cast< TypeExpr* >( *param );
    181                                         assertf(paramType, "Aggregate parameters should be type expressions: %s", toString(*param).c_str());
    182                                         maybeAccept( paramType->get_type(), *this );
    183                                 }
    184                                 mangleName << "_";
     168        }
     169
     170        void Mangler::mangleGenericRef( ReferenceToType * refType, std::string prefix ) {
     171                printQualifiers( refType );
     172
     173                std::ostringstream oldName( mangleName.str() );
     174                mangleName.clear();
     175
     176                mangleName << prefix << refType->get_name();
     177
     178                std::list< Expression* >& params = refType->get_parameters();
     179                if ( ! params.empty() ) {
     180                        mangleName << "_";
     181                        for ( std::list< Expression* >::const_iterator param = params.begin(); param != params.end(); ++param ) {
     182                                TypeExpr *paramType = dynamic_cast< TypeExpr* >( *param );
     183                                assertf(paramType, "Aggregate parameters should be type expressions: %s", toString(*param).c_str());
     184                                maybeAccept( paramType->get_type(), *this );
    185185                        }
     186                        mangleName << "_";
    186187                }
     188
     189                oldName << mangleName.str().length() << mangleName.str();
     190                mangleName.str( oldName.str() );
    187191        }
    188192
    189193        void Mangler::visit( StructInstType * aggregateUseType ) {
    190                 mangleRef( aggregateUseType, "s" );
     194                if ( typeMode ) mangleGenericRef( aggregateUseType, "s" );
     195                else mangleRef( aggregateUseType, "s" );
    191196        }
    192197
    193198        void Mangler::visit( UnionInstType * aggregateUseType ) {
    194                 mangleRef( aggregateUseType, "u" );
     199                if ( typeMode ) mangleGenericRef( aggregateUseType, "u" );
     200                else mangleRef( aggregateUseType, "u" );
    195201        }
    196202
     
    279285                                varNums[ (*i)->name ] = std::pair< int, int >( nextVarNum++, (int)(*i)->get_kind() );
    280286                                for ( std::list< DeclarationWithType* >::iterator assert = (*i)->assertions.begin(); assert != (*i)->assertions.end(); ++assert ) {
    281                                         Mangler sub_mangler( mangleOverridable, typeMode, mangleGenericParams );
     287                                        Mangler sub_mangler( mangleOverridable, typeMode );
    282288                                        sub_mangler.nextVarNum = nextVarNum;
    283289                                        sub_mangler.isTopLevel = false;
  • src/SymTab/Mangler.h

    r0fe4e62 rf5c3b6c  
    3030                /// Mangle syntax tree object; primary interface to clients
    3131                template< typename SynTreeClass >
    32             static std::string mangle( SynTreeClass *decl, bool mangleOverridable = true, bool typeMode = false, bool mangleGenericParams = true );
     32            static std::string mangle( SynTreeClass *decl, bool mangleOverridable = true, bool typeMode = false );
    3333                /// Mangle a type name; secondary interface
    3434                static std::string mangleType( Type* ty );
    35                 /// Mangle ignoring generic type parameters
    36                 static std::string mangleConcrete( Type* ty );
    37 
    3835
    3936                virtual void visit( ObjectDecl *declaration );
     
    6562                bool mangleOverridable;         ///< Specially mangle overridable built-in methods
    6663                bool typeMode;                  ///< Produce a unique mangled name for a type
    67                 bool mangleGenericParams;       ///< Include generic parameters in name mangling if true
    6864
    69                 Mangler( bool mangleOverridable, bool typeMode, bool mangleGenericParams );
     65                Mangler( bool mangleOverridable, bool typeMode );
    7066                Mangler( const Mangler & );
    7167
    7268                void mangleDecl( DeclarationWithType *declaration );
    7369                void mangleRef( ReferenceToType *refType, std::string prefix );
     70                void mangleGenericRef( ReferenceToType *refType, std::string prefix );
    7471
    7572                void printQualifiers( Type *type );
     
    7774
    7875        template< typename SynTreeClass >
    79         std::string Mangler::mangle( SynTreeClass *decl, bool mangleOverridable, bool typeMode, bool mangleGenericParams ) {
    80                 Mangler mangler( mangleOverridable, typeMode, mangleGenericParams );
     76        std::string Mangler::mangle( SynTreeClass *decl, bool mangleOverridable, bool typeMode ) {
     77                Mangler mangler( mangleOverridable, typeMode );
    8178                maybeAccept( decl, mangler );
    8279                return mangler.get_mangleName();
  • src/SymTab/Validate.cc

    r0fe4e62 rf5c3b6c  
    268268                HoistStruct::hoistStruct( translationUnit ); // must happen after EliminateTypedef, so that aggregate typedefs occur in the correct order
    269269                ReturnTypeFixer::fix( translationUnit ); // must happen before autogen
    270                 acceptAll( translationUnit, epc ); // must happen before VerifyCtorDtorAssign, because void return objects should not exist; before LinkReferenceToTypes because it is an indexer and needs correct types for mangling
    271270                acceptAll( translationUnit, lrt ); // must happen before autogen, because sized flag needs to propagate to generated functions
    272271                acceptAll( translationUnit, genericParams );  // check as early as possible - can't happen before LinkReferenceToTypes
     272                acceptAll( translationUnit, epc ); // must happen before VerifyCtorDtorAssign, because void return objects should not exist
    273273                VerifyCtorDtorAssign::verify( translationUnit );  // must happen before autogen, because autogen examines existing ctor/dtors
    274274                ReturnChecker::checkFunctionReturns( translationUnit );
  • src/Tuples/TupleAssignment.cc

    r0fe4e62 rf5c3b6c  
    2020#include <memory>                          // for unique_ptr, allocator_trai...
    2121#include <string>                          // for string
    22 #include <vector>
    2322
    2423#include "CodeGen/OperatorTable.h"
     
    3433#include "ResolvExpr/Resolver.h"           // for resolveCtorInit
    3534#include "ResolvExpr/TypeEnvironment.h"    // for TypeEnvironment
    36 #include "ResolvExpr/typeops.h"            // for combos
    3735#include "SynTree/Declaration.h"           // for ObjectDecl
    3836#include "SynTree/Expression.h"            // for Expression, CastExpr, Name...
     
    5452                // dispatcher for Tuple (multiple and mass) assignment operations
    5553                TupleAssignSpotter( ResolvExpr::AlternativeFinder & );
    56                 void spot( UntypedExpr * expr, std::vector<ResolvExpr::AlternativeFinder> &args );
     54                void spot( UntypedExpr * expr, const std::list<ResolvExpr::AltList> &possibilities );
    5755
    5856          private:
     
    6159                struct Matcher {
    6260                  public:
    63                         Matcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList& lhs, const
    64                                 ResolvExpr::AltList& rhs );
     61                        Matcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList & alts );
    6562                        virtual ~Matcher() {}
    6663                        virtual void match( std::list< Expression * > &out ) = 0;
     
    7572                struct MassAssignMatcher : public Matcher {
    7673                  public:
    77                         MassAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList& lhs,
    78                                 const ResolvExpr::AltList& rhs ) : Matcher(spotter, lhs, rhs) {}
     74                        MassAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList & alts );
    7975                        virtual void match( std::list< Expression * > &out );
    8076                };
     
    8278                struct MultipleAssignMatcher : public Matcher {
    8379                  public:
    84                         MultipleAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList& lhs,
    85                                 const ResolvExpr::AltList& rhs ) : Matcher(spotter, lhs, rhs) {}
     80                        MultipleAssignMatcher( TupleAssignSpotter &spot, const ResolvExpr::AltList & alts );
    8681                        virtual void match( std::list< Expression * > &out );
    8782                };
     
    119114        }
    120115
    121         void handleTupleAssignment( ResolvExpr::AlternativeFinder & currentFinder, UntypedExpr * expr,
    122                                 std::vector<ResolvExpr::AlternativeFinder> &args ) {
     116        void handleTupleAssignment( ResolvExpr::AlternativeFinder & currentFinder, UntypedExpr * expr, const std::list<ResolvExpr::AltList> &possibilities ) {
    123117                TupleAssignSpotter spotter( currentFinder );
    124                 spotter.spot( expr, args );
     118                spotter.spot( expr, possibilities );
    125119        }
    126120
     
    128122                : currentFinder(f) {}
    129123
    130         void TupleAssignSpotter::spot( UntypedExpr * expr,
    131                         std::vector<ResolvExpr::AlternativeFinder> &args ) {
     124        void TupleAssignSpotter::spot( UntypedExpr * expr, const std::list<ResolvExpr::AltList> &possibilities ) {
    132125                if (  NameExpr *op = dynamic_cast< NameExpr * >(expr->get_function()) ) {
    133126                        if ( CodeGen::isCtorDtorAssign( op->get_name() ) ) {
    134                                 fname = op->get_name();
    135 
    136                                 // AlternativeFinder will naturally handle this case case, if it's legal
    137                                 if ( args.size() == 0 ) return;
    138 
    139                                 // if an assignment only takes 1 argument, that's odd, but maybe someone wrote
    140                                 // the function, in which case AlternativeFinder will handle it normally
    141                                 if ( args.size() == 1 && CodeGen::isAssignment( fname ) ) return;
    142 
    143                                 // look over all possible left-hand-sides
    144                                 for ( ResolvExpr::Alternative& lhsAlt : args[0] ) {
    145                                         // skip non-tuple LHS
    146                                         if ( ! refToTuple(lhsAlt.expr) ) continue;
    147 
    148                                         // explode is aware of casts - ensure every LHS expression is sent into explode
    149                                         // with a reference cast
    150                                         // xxx - this seems to change the alternatives before the normal
    151                                         //  AlternativeFinder flow; maybe this is desired?
    152                                         if ( ! dynamic_cast<CastExpr*>( lhsAlt.expr ) ) {
    153                                                 lhsAlt.expr = new CastExpr( lhsAlt.expr,
    154                                                                 new ReferenceType( Type::Qualifiers(),
    155                                                                         lhsAlt.expr->get_result()->clone() ) );
     127                               fname = op->get_name();
     128                                PRINT( std::cerr << "TupleAssignment: " << fname << std::endl; )
     129                                for ( std::list<ResolvExpr::AltList>::const_iterator ali = possibilities.begin(); ali != possibilities.end(); ++ali ) {
     130                                        if ( ali->size() == 0 ) continue; // AlternativeFinder will natrually handle this case, if it's legal
     131                                        if ( ali->size() <= 1 && CodeGen::isAssignment( op->get_name() ) ) {
     132                                                // what does it mean if an assignment takes 1 argument? maybe someone defined such a function, in which case AlternativeFinder will naturally handle it
     133                                                continue;
    156134                                        }
    157135
    158                                         // explode the LHS so that each field of a tuple-valued-expr is assigned
    159                                         ResolvExpr::AltList lhs;
    160                                         explode( lhsAlt, currentFinder.get_indexer(), back_inserter(lhs), true );
    161                                         for ( ResolvExpr::Alternative& alt : lhs ) {
    162                                                 // each LHS value must be a reference - some come in with a cast expression,
    163                                                 // if not just cast to reference here
    164                                                 if ( ! dynamic_cast<ReferenceType*>( alt.expr->get_result() ) ) {
    165                                                         alt.expr = new CastExpr( alt.expr,
    166                                                                 new ReferenceType( Type::Qualifiers(),
    167                                                                         alt.expr->get_result()->clone() ) );
     136                                        assert( ! ali->empty() );
     137                                        // grab args 2-N and group into a TupleExpr
     138                                        const ResolvExpr::Alternative & alt1 = ali->front();
     139                                        auto begin = std::next(ali->begin(), 1), end = ali->end();
     140                                        PRINT( std::cerr << "alt1 is " << alt1.expr << std::endl; )
     141                                        if ( refToTuple(alt1.expr) ) {
     142                                                PRINT( std::cerr << "and is reference to tuple" << std::endl; )
     143                                                if ( isMultAssign( begin, end ) ) {
     144                                                        PRINT( std::cerr << "possible multiple assignment" << std::endl; )
     145                                                        matcher.reset( new MultipleAssignMatcher( *this, *ali ) );
     146                                                } else {
     147                                                        // mass assignment
     148                                                        PRINT( std::cerr << "possible mass assignment" << std::endl; )
     149                                                        matcher.reset( new MassAssignMatcher( *this,  *ali ) );
    168150                                                }
    169                                         }
    170 
    171                                         if ( args.size() == 1 ) {
    172                                                 // mass default-initialization/destruction
    173                                                 ResolvExpr::AltList rhs{};
    174                                                 matcher.reset( new MassAssignMatcher( *this, lhs, rhs ) );
    175151                                                match();
    176                                         } else if ( args.size() > 2 ) {
    177                                                 // expand all possible RHS possibilities
    178                                                 // TODO build iterative version of this instead of using combos
    179                                                 std::vector< ResolvExpr::AltList > rhsAlts;
    180                                                 combos( std::next(args.begin(), 1), args.end(),
    181                                                         std::back_inserter( rhsAlts ) );
    182                                                 for ( const ResolvExpr::AltList& rhsAlt : rhsAlts ) {
    183                                                         // multiple assignment
    184                                                         ResolvExpr::AltList rhs;
    185                                                         explode( rhsAlt, currentFinder.get_indexer(),
    186                                                                 std::back_inserter(rhs), true );
    187                                                         matcher.reset( new MultipleAssignMatcher( *this, lhs, rhs ) );
    188                                                         match();
    189                                                 }
    190                                         } else {
    191                                                 for ( const ResolvExpr::Alternative& rhsAlt : args[1] ) {
    192                                                         ResolvExpr::AltList rhs;
    193                                                         if ( isTuple(rhsAlt.expr) ) {
    194                                                                 // multiple assignment
    195                                                                 explode( rhsAlt, currentFinder.get_indexer(), 
    196                                                                         std::back_inserter(rhs), true );
    197                                                                 matcher.reset( new MultipleAssignMatcher( *this, lhs, rhs ) );
    198                                                         } else {
    199                                                                 // mass assignment
    200                                                                 rhs.push_back( rhsAlt );
    201                                                                 matcher.reset( new MassAssignMatcher( *this, lhs, rhs ) );
    202                                                         }
    203                                                         match();
    204                                                 }
    205152                                        }
    206153                                }
     
    222169                ResolvExpr::AltList current;
    223170                // now resolve new assignments
    224                 for ( std::list< Expression * >::iterator i = new_assigns.begin();
    225                                 i != new_assigns.end(); ++i ) {
     171                for ( std::list< Expression * >::iterator i = new_assigns.begin(); i != new_assigns.end(); ++i ) {
    226172                        PRINT(
    227173                                std::cerr << "== resolving tuple assign ==" << std::endl;
     
    229175                        )
    230176
    231                         ResolvExpr::AlternativeFinder finder{ currentFinder.get_indexer(),
    232                                 currentFinder.get_environ() };
     177                        ResolvExpr::AlternativeFinder finder( currentFinder.get_indexer(), currentFinder.get_environ() );
    233178                        try {
    234179                                finder.findWithAdjustment(*i);
     
    251196                // combine assignment environments into combined expression environment
    252197                simpleCombineEnvironments( current.begin(), current.end(), matcher->compositeEnv );
    253                 currentFinder.get_alternatives().push_front( ResolvExpr::Alternative(
    254                         new TupleAssignExpr(solved_assigns, matcher->tmpDecls), matcher->compositeEnv,
    255                         ResolvExpr::sumCost( current ) + matcher->baseCost ) );
    256         }
    257 
    258         TupleAssignSpotter::Matcher::Matcher( TupleAssignSpotter &spotter,
    259                 const ResolvExpr::AltList &lhs, const ResolvExpr::AltList &rhs )
    260         : lhs(lhs), rhs(rhs), spotter(spotter),
    261           baseCost( ResolvExpr::sumCost( lhs ) + ResolvExpr::sumCost( rhs ) ) {
    262                 simpleCombineEnvironments( lhs.begin(), lhs.end(), compositeEnv );
    263                 simpleCombineEnvironments( rhs.begin(), rhs.end(), compositeEnv );
     198                currentFinder.get_alternatives().push_front( ResolvExpr::Alternative(new TupleAssignExpr(solved_assigns, matcher->tmpDecls), matcher->compositeEnv, ResolvExpr::sumCost( current  ) + matcher->baseCost ) );
     199        }
     200
     201        TupleAssignSpotter::Matcher::Matcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList &alts ) : spotter(spotter), baseCost( ResolvExpr::sumCost( alts ) ) {
     202                assert( ! alts.empty() );
     203                // combine argument environments into combined expression environment
     204                simpleCombineEnvironments( alts.begin(), alts.end(), compositeEnv );
     205
     206                ResolvExpr::Alternative lhsAlt = alts.front();
     207                // explode is aware of casts - ensure every LHS expression is sent into explode with a reference cast
     208                if ( ! dynamic_cast< CastExpr * >( lhsAlt.expr ) ) {
     209                        lhsAlt.expr = new CastExpr( lhsAlt.expr, new ReferenceType( Type::Qualifiers(), lhsAlt.expr->get_result()->clone() ) );
     210                }
     211
     212                // explode the lhs so that each field of the tuple-valued-expr is assigned.
     213                explode( lhsAlt, spotter.currentFinder.get_indexer(), back_inserter(lhs), true );
     214
     215                for ( ResolvExpr::Alternative & alt : lhs ) {
     216                        // every LHS value must be a reference - some come in with a cast expression, if it doesn't just cast to reference here.
     217                        if ( ! dynamic_cast< ReferenceType * >( alt.expr->get_result() ) ) {
     218                                alt.expr = new CastExpr( alt.expr, new ReferenceType( Type::Qualifiers(), alt.expr->get_result()->clone() ) );
     219                        }
     220                }
     221        }
     222
     223        TupleAssignSpotter::MassAssignMatcher::MassAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList & alts ) : Matcher( spotter, alts ) {
     224                assert( alts.size() == 1 || alts.size() == 2 );
     225                if ( alts.size() == 2 ) {
     226                        rhs.push_back( alts.back() );
     227                }
     228        }
     229
     230        TupleAssignSpotter::MultipleAssignMatcher::MultipleAssignMatcher( TupleAssignSpotter &spotter, const ResolvExpr::AltList & alts ) : Matcher( spotter, alts ) {
     231                // explode the rhs so that each field of the tuple-valued-expr is assigned.
     232                explode( std::next(alts.begin(), 1), alts.end(), spotter.currentFinder.get_indexer(), back_inserter(rhs), true );
    264233        }
    265234
  • src/Tuples/Tuples.h

    r0fe4e62 rf5c3b6c  
    1717
    1818#include <string>
    19 #include <vector>
    2019
    2120#include "SynTree/Expression.h"
     
    2726namespace Tuples {
    2827        // TupleAssignment.cc
    29         void handleTupleAssignment( ResolvExpr::AlternativeFinder & currentFinder, UntypedExpr * assign,
    30                 std::vector< ResolvExpr::AlternativeFinder >& args );
    31        
     28        void handleTupleAssignment( ResolvExpr::AlternativeFinder & currentFinder, UntypedExpr * assign, const std::list<ResolvExpr::AltList> & possibilities );
     29
    3230        // TupleExpansion.cc
    3331        /// expands z.[a, b.[x, y], c] into [z.a, z.b.x, z.b.y, z.c], inserting UniqueExprs as appropriate
  • src/benchmark/Makefile.am

    r0fe4e62 rf5c3b6c  
    2323STATS    = ${TOOLSDIR}stat.py
    2424repeats  = 30
    25 TIME_FORMAT = "%E"
    26 PRINT_FORMAT = '%20s\t'
    2725
    2826.NOTPARALLEL:
     
    3028noinst_PROGRAMS =
    3129
    32 all : ctxswitch$(EXEEXT) mutex$(EXEEXT) signal$(EXEEXT) waitfor$(EXEEXT) creation$(EXEEXT)
     30bench$(EXEEXT) :
     31        @for ccflags in "-debug" "-nodebug"; do \
     32                echo ${CC} ${AM_CFLAGS} ${CFLAGS} ${ccflags} @CFA_FLAGS@ -lrt bench.c;\
     33                ${CC} ${AM_CFLAGS} ${CFLAGS} $${ccflags} -lrt bench.c;\
     34                ./a.out ; \
     35        done ; \
     36        rm -f ./a.out ;
    3337
    34 %.run : %$(EXEEXT) ${REPEAT}
    35         @rm -f .result.log
    36         @echo "------------------------------------------------------"
    37         @echo $<
    38         @${REPEAT} ${repeats} ./a.out | tee -a .result.log
    39         @${STATS} .result.log
    40         @echo "------------------------------------------------------"
    41         @rm -f a.out .result.log
    42 
    43 %.runquiet :
    44         @+make $(basename $@)
     38csv-data$(EXEEXT):
     39        @${CC} ${AM_CFLAGS} ${CFLAGS} ${ccflags} @CFA_FLAGS@ -nodebug -lrt -quiet -DN=50000000 csv-data.c
    4540        @./a.out
    46         @rm -f a.out
    47 
    48 %.make :
    49         @printf "${PRINT_FORMAT}" $(basename $(subst compile-,,$@))
    50         @+/usr/bin/time -f ${TIME_FORMAT} make $(basename $@) 2>&1
    51 
    52 ${REPEAT} :
    53         @+make -C ${TOOLSDIR} repeat
    54 
    55 ## =========================================================================================================
    56 
    57 jenkins$(EXEEXT):
    58         @echo "{"
    59         @echo -e '\t"githash": "'${githash}'",'
    60         @echo -e '\t"arch": "'   ${arch}   '",'
    61         @echo -e '\t"compile": {'
    62         @+make compile TIME_FORMAT='%e,' PRINT_FORMAT='\t\t\"%s\" :'
    63         @echo -e '\t\t"dummy" : {}'
    64         @echo -e '\t},'
    65         @echo -e '\t"ctxswitch": {'
    66         @echo -en '\t\t"coroutine":'
    67         @+make ctxswitch-cfa_coroutine.runquiet
    68         @echo -en '\t\t,"thread":'
    69         @+make ctxswitch-cfa_thread.runquiet
    70         @echo -e '\t},'
    71         @echo -e '\t"mutex": ['
    72         @echo -en '\t\t'
    73         @+make mutex-cfa1.runquiet
    74         @echo -en '\t\t,'
    75         @+make mutex-cfa2.runquiet
    76         @echo -e '\t],'
    77         @echo -e '\t"scheduling": ['
    78         @echo -en '\t\t'
    79         @+make signal-cfa1.runquiet
    80         @echo -en '\t\t,'
    81         @+make signal-cfa2.runquiet
    82         @echo -en '\t\t,'
    83         @+make waitfor-cfa1.runquiet
    84         @echo -en '\t\t,'
    85         @+make waitfor-cfa2.runquiet
    86         @echo -e '\n\t],'
    87         @echo -e '\t"epoch": ' $(shell date +%s)
    88         @echo "}"
     41        @rm -f ./a.out
    8942
    9043## =========================================================================================================
     
    9750
    9851ctxswitch-cfa_coroutine$(EXEEXT):
    99         @${CC}        ctxswitch/cfa_cor.c   -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     52        ${CC}        ctxswitch/cfa_cor.c   -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    10053
    10154ctxswitch-cfa_thread$(EXEEXT):
    102         @${CC}        ctxswitch/cfa_thrd.c  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     55        ${CC}        ctxswitch/cfa_thrd.c  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    10356
    10457ctxswitch-upp_coroutine$(EXEEXT):
    105         @u++          ctxswitch/upp_cor.cc  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     58        u++          ctxswitch/upp_cor.cc  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    10659
    10760ctxswitch-upp_thread$(EXEEXT):
    108         @u++          ctxswitch/upp_thrd.cc -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     61        u++          ctxswitch/upp_thrd.cc -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    10962
    11063ctxswitch-pthread$(EXEEXT):
    111         @@BACKEND_CC@ ctxswitch/pthreads.c  -DBENCH_N=50000000  -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     64        @BACKEND_CC@ ctxswitch/pthreads.c  -DBENCH_N=50000000  -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     65
     66## =========================================================================================================
     67creation$(EXEEXT) :\
     68        creation-pthread.run            \
     69        creation-cfa_coroutine.run      \
     70        creation-cfa_thread.run         \
     71        creation-upp_coroutine.run      \
     72        creation-upp_thread.run
     73
     74creation-cfa_coroutine$(EXEEXT):
     75        ${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     76
     77creation-cfa_thread$(EXEEXT):
     78        ${CC}        creation/cfa_thrd.c  -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     79
     80creation-upp_coroutine$(EXEEXT):
     81        u++          creation/upp_cor.cc  -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     82
     83creation-upp_thread$(EXEEXT):
     84        u++          creation/upp_thrd.cc -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     85
     86creation-pthread$(EXEEXT):
     87        @BACKEND_CC@ creation/pthreads.c  -DBENCH_N=250000     -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    11288
    11389## =========================================================================================================
     
    12197
    12298mutex-function$(EXEEXT):
    123         @@BACKEND_CC@ mutex/function.c    -DBENCH_N=500000000   -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     99        @BACKEND_CC@ mutex/function.c    -DBENCH_N=500000000   -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    124100
    125101mutex-pthread_lock$(EXEEXT):
    126         @@BACKEND_CC@ mutex/pthreads.c    -DBENCH_N=50000000    -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     102        @BACKEND_CC@ mutex/pthreads.c    -DBENCH_N=50000000    -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    127103
    128104mutex-upp$(EXEEXT):
    129         @u++          mutex/upp.cc        -DBENCH_N=50000000    -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     105        u++          mutex/upp.cc        -DBENCH_N=50000000    -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    130106
    131107mutex-cfa1$(EXEEXT):
    132         @${CC}        mutex/cfa1.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     108        ${CC}        mutex/cfa1.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    133109
    134110mutex-cfa2$(EXEEXT):
    135         @${CC}        mutex/cfa2.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     111        ${CC}        mutex/cfa2.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    136112
    137113mutex-cfa4$(EXEEXT):
    138         @${CC}        mutex/cfa4.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     114        ${CC}        mutex/cfa4.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    139115
    140116## =========================================================================================================
     
    146122
    147123signal-upp$(EXEEXT):
    148         @u++          schedint/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     124        u++          schedint/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    149125
    150126signal-cfa1$(EXEEXT):
    151         @${CC}        schedint/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     127        ${CC}        schedint/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    152128
    153129signal-cfa2$(EXEEXT):
    154         @${CC}        schedint/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     130        ${CC}        schedint/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    155131
    156132signal-cfa4$(EXEEXT):
    157         @${CC}        schedint/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     133        ${CC}        schedint/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    158134
    159135## =========================================================================================================
     
    165141
    166142waitfor-upp$(EXEEXT):
    167         @u++          schedext/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     143        u++          schedext/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    168144
    169145waitfor-cfa1$(EXEEXT):
    170         @${CC}        schedext/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     146        ${CC}        schedext/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    171147
    172148waitfor-cfa2$(EXEEXT):
    173         @${CC}        schedext/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     149        ${CC}        schedext/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    174150
    175151waitfor-cfa4$(EXEEXT):
    176         @${CC}        schedext/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    177 
    178 ## =========================================================================================================
    179 creation$(EXEEXT) :\
    180         creation-pthread.run                    \
    181         creation-cfa_coroutine.run              \
    182         creation-cfa_coroutine_eager.run        \
    183         creation-cfa_thread.run                 \
    184         creation-upp_coroutine.run              \
    185         creation-upp_thread.run
    186 
    187 creation-cfa_coroutine$(EXEEXT):
    188         @${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    189 
    190 creation-cfa_coroutine_eager$(EXEEXT):
    191         @${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags} -DEAGER
    192 
    193 creation-cfa_thread$(EXEEXT):
    194         @${CC}        creation/cfa_thrd.c  -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    195 
    196 creation-upp_coroutine$(EXEEXT):
    197         @u++          creation/upp_cor.cc  -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    198 
    199 creation-upp_thread$(EXEEXT):
    200         @u++          creation/upp_thrd.cc -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    201 
    202 creation-pthread$(EXEEXT):
    203         @@BACKEND_CC@ creation/pthreads.c  -DBENCH_N=250000     -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     152        ${CC}        schedext/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    204153
    205154## =========================================================================================================
    206155
    207 compile$(EXEEXT) :\
    208         compile-array.make      \
    209         compile-attributes.make \
    210         compile-empty.make      \
    211         compile-expression.make \
    212         compile-io.make         \
    213         compile-monitor.make    \
    214         compile-operators.make  \
    215         compile-typeof.make
     156%.run : %$(EXEEXT) ${REPEAT}
     157        @rm -f .result.log
     158        @echo "------------------------------------------------------"
     159        @echo $<
     160        @${REPEAT} ${repeats} ./a.out | tee -a .result.log
     161        @${STATS} .result.log
     162        @echo "------------------------------------------------------"
     163        @rm -f a.out .result.log
    216164
    217 
    218 compile-array$(EXEEXT):
    219         @${CC} -nodebug -quiet -fsyntax-only -w ../tests/array.c
    220 
    221 compile-attributes$(EXEEXT):
    222         @${CC} -nodebug -quiet -fsyntax-only -w ../tests/attributes.c
    223 
    224 compile-empty$(EXEEXT):
    225         @${CC} -nodebug -quiet -fsyntax-only -w compile/empty.c
    226 
    227 compile-expression$(EXEEXT):
    228         @${CC} -nodebug -quiet -fsyntax-only -w ../tests/expression.c
    229 
    230 compile-io$(EXEEXT):
    231         @${CC} -nodebug -quiet -fsyntax-only -w ../tests/io.c
    232 
    233 compile-monitor$(EXEEXT):
    234         @${CC} -nodebug -quiet -fsyntax-only -w ../tests/monitor.c
    235 
    236 compile-operators$(EXEEXT):
    237         @${CC} -nodebug -quiet -fsyntax-only -w ../tests/operators.c
    238 
    239 compile-thread$(EXEEXT):
    240         @${CC} -nodebug -quiet -fsyntax-only -w ../tests/thread.c
    241 
    242 compile-typeof$(EXEEXT):
    243         @${CC} -nodebug -quiet -fsyntax-only -w ../tests/typeof.c
    244 
     165${REPEAT} :
     166        @+make -C ${TOOLSDIR} repeat
  • src/benchmark/Makefile.in

    r0fe4e62 rf5c3b6c  
    124124  esac
    125125am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
    126 am__DIST_COMMON = $(srcdir)/Makefile.in compile
     126am__DIST_COMMON = $(srcdir)/Makefile.in
    127127DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
    128128ACLOCAL = @ACLOCAL@
     
    253253STATS = ${TOOLSDIR}stat.py
    254254repeats = 30
    255 TIME_FORMAT = "%E"
    256 PRINT_FORMAT = '%20s\t'
    257255all: all-am
    258256
     
    446444.NOTPARALLEL:
    447445
    448 all : ctxswitch$(EXEEXT) mutex$(EXEEXT) signal$(EXEEXT) waitfor$(EXEEXT) creation$(EXEEXT)
     446bench$(EXEEXT) :
     447        @for ccflags in "-debug" "-nodebug"; do \
     448                echo ${CC} ${AM_CFLAGS} ${CFLAGS} ${ccflags} @CFA_FLAGS@ -lrt bench.c;\
     449                ${CC} ${AM_CFLAGS} ${CFLAGS} $${ccflags} -lrt bench.c;\
     450                ./a.out ; \
     451        done ; \
     452        rm -f ./a.out ;
     453
     454csv-data$(EXEEXT):
     455        @${CC} ${AM_CFLAGS} ${CFLAGS} ${ccflags} @CFA_FLAGS@ -nodebug -lrt -quiet -DN=50000000 csv-data.c
     456        @./a.out
     457        @rm -f ./a.out
     458
     459ctxswitch$(EXEEXT): \
     460        ctxswitch-pthread.run           \
     461        ctxswitch-cfa_coroutine.run     \
     462        ctxswitch-cfa_thread.run        \
     463        ctxswitch-upp_coroutine.run     \
     464        ctxswitch-upp_thread.run
     465
     466ctxswitch-cfa_coroutine$(EXEEXT):
     467        ${CC}        ctxswitch/cfa_cor.c   -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     468
     469ctxswitch-cfa_thread$(EXEEXT):
     470        ${CC}        ctxswitch/cfa_thrd.c  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     471
     472ctxswitch-upp_coroutine$(EXEEXT):
     473        u++          ctxswitch/upp_cor.cc  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     474
     475ctxswitch-upp_thread$(EXEEXT):
     476        u++          ctxswitch/upp_thrd.cc -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     477
     478ctxswitch-pthread$(EXEEXT):
     479        @BACKEND_CC@ ctxswitch/pthreads.c  -DBENCH_N=50000000  -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     480
     481creation$(EXEEXT) :\
     482        creation-pthread.run            \
     483        creation-cfa_coroutine.run      \
     484        creation-cfa_thread.run         \
     485        creation-upp_coroutine.run      \
     486        creation-upp_thread.run
     487
     488creation-cfa_coroutine$(EXEEXT):
     489        ${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     490
     491creation-cfa_thread$(EXEEXT):
     492        ${CC}        creation/cfa_thrd.c  -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     493
     494creation-upp_coroutine$(EXEEXT):
     495        u++          creation/upp_cor.cc  -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     496
     497creation-upp_thread$(EXEEXT):
     498        u++          creation/upp_thrd.cc -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     499
     500creation-pthread$(EXEEXT):
     501        @BACKEND_CC@ creation/pthreads.c  -DBENCH_N=250000     -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     502
     503mutex$(EXEEXT) :\
     504        mutex-function.run      \
     505        mutex-pthread_lock.run  \
     506        mutex-upp.run           \
     507        mutex-cfa1.run          \
     508        mutex-cfa2.run          \
     509        mutex-cfa4.run
     510
     511mutex-function$(EXEEXT):
     512        @BACKEND_CC@ mutex/function.c    -DBENCH_N=500000000   -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     513
     514mutex-pthread_lock$(EXEEXT):
     515        @BACKEND_CC@ mutex/pthreads.c    -DBENCH_N=50000000    -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     516
     517mutex-upp$(EXEEXT):
     518        u++          mutex/upp.cc        -DBENCH_N=50000000    -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     519
     520mutex-cfa1$(EXEEXT):
     521        ${CC}        mutex/cfa1.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     522
     523mutex-cfa2$(EXEEXT):
     524        ${CC}        mutex/cfa2.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     525
     526mutex-cfa4$(EXEEXT):
     527        ${CC}        mutex/cfa4.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     528
     529signal$(EXEEXT) :\
     530        signal-upp.run          \
     531        signal-cfa1.run         \
     532        signal-cfa2.run         \
     533        signal-cfa4.run
     534
     535signal-upp$(EXEEXT):
     536        u++          schedint/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     537
     538signal-cfa1$(EXEEXT):
     539        ${CC}        schedint/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     540
     541signal-cfa2$(EXEEXT):
     542        ${CC}        schedint/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     543
     544signal-cfa4$(EXEEXT):
     545        ${CC}        schedint/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     546
     547waitfor$(EXEEXT) :\
     548        waitfor-upp.run         \
     549        waitfor-cfa1.run                \
     550        waitfor-cfa2.run                \
     551        waitfor-cfa4.run
     552
     553waitfor-upp$(EXEEXT):
     554        u++          schedext/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     555
     556waitfor-cfa1$(EXEEXT):
     557        ${CC}        schedext/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     558
     559waitfor-cfa2$(EXEEXT):
     560        ${CC}        schedext/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
     561
     562waitfor-cfa4$(EXEEXT):
     563        ${CC}        schedext/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    449564
    450565%.run : %$(EXEEXT) ${REPEAT}
     
    457572        @rm -f a.out .result.log
    458573
    459 %.runquiet :
    460         @+make $(basename $@)
    461         @./a.out
    462         @rm -f a.out
    463 
    464 %.make :
    465         @printf "${PRINT_FORMAT}" $(basename $(subst compile-,,$@))
    466         @+/usr/bin/time -f ${TIME_FORMAT} make $(basename $@) 2>&1
    467 
    468574${REPEAT} :
    469575        @+make -C ${TOOLSDIR} repeat
    470 
    471 jenkins$(EXEEXT):
    472         @echo "{"
    473         @echo -e '\t"githash": "'${githash}'",'
    474         @echo -e '\t"arch": "'   ${arch}   '",'
    475         @echo -e '\t"compile": {'
    476         @+make compile TIME_FORMAT='%e,' PRINT_FORMAT='\t\t\"%s\" :'
    477         @echo -e '\t\t"dummy" : {}'
    478         @echo -e '\t},'
    479         @echo -e '\t"ctxswitch": {'
    480         @echo -en '\t\t"coroutine":'
    481         @+make ctxswitch-cfa_coroutine.runquiet
    482         @echo -en '\t\t,"thread":'
    483         @+make ctxswitch-cfa_thread.runquiet
    484         @echo -e '\t},'
    485         @echo -e '\t"mutex": ['
    486         @echo -en '\t\t'
    487         @+make mutex-cfa1.runquiet
    488         @echo -en '\t\t,'
    489         @+make mutex-cfa2.runquiet
    490         @echo -e '\t],'
    491         @echo -e '\t"scheduling": ['
    492         @echo -en '\t\t'
    493         @+make signal-cfa1.runquiet
    494         @echo -en '\t\t,'
    495         @+make signal-cfa2.runquiet
    496         @echo -en '\t\t,'
    497         @+make waitfor-cfa1.runquiet
    498         @echo -en '\t\t,'
    499         @+make waitfor-cfa2.runquiet
    500         @echo -e '\n\t],'
    501         @echo -e '\t"epoch": ' $(shell date +%s)
    502         @echo "}"
    503 
    504 ctxswitch$(EXEEXT): \
    505         ctxswitch-pthread.run           \
    506         ctxswitch-cfa_coroutine.run     \
    507         ctxswitch-cfa_thread.run        \
    508         ctxswitch-upp_coroutine.run     \
    509         ctxswitch-upp_thread.run
    510 
    511 ctxswitch-cfa_coroutine$(EXEEXT):
    512         @${CC}        ctxswitch/cfa_cor.c   -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    513 
    514 ctxswitch-cfa_thread$(EXEEXT):
    515         @${CC}        ctxswitch/cfa_thrd.c  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    516 
    517 ctxswitch-upp_coroutine$(EXEEXT):
    518         @u++          ctxswitch/upp_cor.cc  -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    519 
    520 ctxswitch-upp_thread$(EXEEXT):
    521         @u++          ctxswitch/upp_thrd.cc -DBENCH_N=50000000  -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    522 
    523 ctxswitch-pthread$(EXEEXT):
    524         @@BACKEND_CC@ ctxswitch/pthreads.c  -DBENCH_N=50000000  -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    525 
    526 mutex$(EXEEXT) :\
    527         mutex-function.run      \
    528         mutex-pthread_lock.run  \
    529         mutex-upp.run           \
    530         mutex-cfa1.run          \
    531         mutex-cfa2.run          \
    532         mutex-cfa4.run
    533 
    534 mutex-function$(EXEEXT):
    535         @@BACKEND_CC@ mutex/function.c    -DBENCH_N=500000000   -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    536 
    537 mutex-pthread_lock$(EXEEXT):
    538         @@BACKEND_CC@ mutex/pthreads.c    -DBENCH_N=50000000    -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    539 
    540 mutex-upp$(EXEEXT):
    541         @u++          mutex/upp.cc        -DBENCH_N=50000000    -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    542 
    543 mutex-cfa1$(EXEEXT):
    544         @${CC}        mutex/cfa1.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    545 
    546 mutex-cfa2$(EXEEXT):
    547         @${CC}        mutex/cfa2.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    548 
    549 mutex-cfa4$(EXEEXT):
    550         @${CC}        mutex/cfa4.c        -DBENCH_N=5000000     -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    551 
    552 signal$(EXEEXT) :\
    553         signal-upp.run          \
    554         signal-cfa1.run         \
    555         signal-cfa2.run         \
    556         signal-cfa4.run
    557 
    558 signal-upp$(EXEEXT):
    559         @u++          schedint/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    560 
    561 signal-cfa1$(EXEEXT):
    562         @${CC}        schedint/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    563 
    564 signal-cfa2$(EXEEXT):
    565         @${CC}        schedint/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    566 
    567 signal-cfa4$(EXEEXT):
    568         @${CC}        schedint/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    569 
    570 waitfor$(EXEEXT) :\
    571         waitfor-upp.run         \
    572         waitfor-cfa1.run                \
    573         waitfor-cfa2.run                \
    574         waitfor-cfa4.run
    575 
    576 waitfor-upp$(EXEEXT):
    577         @u++          schedext/upp.cc     -DBENCH_N=5000000     -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    578 
    579 waitfor-cfa1$(EXEEXT):
    580         @${CC}        schedext/cfa1.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    581 
    582 waitfor-cfa2$(EXEEXT):
    583         @${CC}        schedext/cfa2.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    584 
    585 waitfor-cfa4$(EXEEXT):
    586         @${CC}        schedext/cfa4.c     -DBENCH_N=500000      -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    587 
    588 creation$(EXEEXT) :\
    589         creation-pthread.run                    \
    590         creation-cfa_coroutine.run              \
    591         creation-cfa_coroutine_eager.run        \
    592         creation-cfa_thread.run                 \
    593         creation-upp_coroutine.run              \
    594         creation-upp_thread.run
    595 
    596 creation-cfa_coroutine$(EXEEXT):
    597         @${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    598 
    599 creation-cfa_coroutine_eager$(EXEEXT):
    600         @${CC}        creation/cfa_cor.c   -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags} -DEAGER
    601 
    602 creation-cfa_thread$(EXEEXT):
    603         @${CC}        creation/cfa_thrd.c  -DBENCH_N=10000000   -I. -nodebug -lrt -quiet @CFA_FLAGS@ ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    604 
    605 creation-upp_coroutine$(EXEEXT):
    606         @u++          creation/upp_cor.cc  -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    607 
    608 creation-upp_thread$(EXEEXT):
    609         @u++          creation/upp_thrd.cc -DBENCH_N=50000000   -I. -nodebug -lrt -quiet             ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    610 
    611 creation-pthread$(EXEEXT):
    612         @@BACKEND_CC@ creation/pthreads.c  -DBENCH_N=250000     -I. -lrt -pthread                    ${AM_CFLAGS} ${CFLAGS} ${ccflags}
    613 
    614 compile$(EXEEXT) :\
    615         compile-array.make      \
    616         compile-attributes.make \
    617         compile-empty.make      \
    618         compile-expression.make \
    619         compile-io.make         \
    620         compile-monitor.make    \
    621         compile-operators.make  \
    622         compile-typeof.make
    623 
    624 compile-array$(EXEEXT):
    625         @${CC} -nodebug -quiet -fsyntax-only -w ../tests/array.c
    626 
    627 compile-attributes$(EXEEXT):
    628         @${CC} -nodebug -quiet -fsyntax-only -w ../tests/attributes.c
    629 
    630 compile-empty$(EXEEXT):
    631         @${CC} -nodebug -quiet -fsyntax-only -w compile/empty.c
    632 
    633 compile-expression$(EXEEXT):
    634         @${CC} -nodebug -quiet -fsyntax-only -w ../tests/expression.c
    635 
    636 compile-io$(EXEEXT):
    637         @${CC} -nodebug -quiet -fsyntax-only -w ../tests/io.c
    638 
    639 compile-monitor$(EXEEXT):
    640         @${CC} -nodebug -quiet -fsyntax-only -w ../tests/monitor.c
    641 
    642 compile-operators$(EXEEXT):
    643         @${CC} -nodebug -quiet -fsyntax-only -w ../tests/operators.c
    644 
    645 compile-thread$(EXEEXT):
    646         @${CC} -nodebug -quiet -fsyntax-only -w ../tests/thread.c
    647 
    648 compile-typeof$(EXEEXT):
    649         @${CC} -nodebug -quiet -fsyntax-only -w ../tests/typeof.c
    650576
    651577# Tell versions [3.59,3.63) of GNU make to not export all variables.
  • src/benchmark/creation/cfa_cor.c

    r0fe4e62 rf5c3b6c  
    55
    66coroutine MyCoroutine {};
    7 void ?{} (MyCoroutine & this) {
    8 #ifdef EAGER
    9         prime(this);
    10 #endif
    11 }
     7void ?{} (MyCoroutine & this) { prime(this); }
    128void main(MyCoroutine & this) {}
    139
  • src/benchmark/csv-data.c

    r0fe4e62 rf5c3b6c  
    2828// coroutine context switch
    2929long long int measure_coroutine() {
    30         const unsigned int NoOfTimes = 50000000;
     30        const unsigned int NoOfTimes = N;
    3131        long long int StartTime, EndTime;
    3232
     
    4343// thread context switch
    4444long long int measure_thread() {
    45         const unsigned int NoOfTimes = 50000000;
     45        const unsigned int NoOfTimes = N;
    4646        long long int StartTime, EndTime;
    4747
     
    6161
    6262long long int measure_1_monitor_entry() {
    63         const unsigned int NoOfTimes = 5000000;
     63        const unsigned int NoOfTimes = N;
    6464        long long int StartTime, EndTime;
    6565        mon_t mon;
     
    7979
    8080long long int measure_2_monitor_entry() {
    81         const unsigned int NoOfTimes = 5000000;
     81        const unsigned int NoOfTimes = N;
    8282        long long int StartTime, EndTime;
    8383        mon_t mon1, mon2;
     
    9494//-----------------------------------------------------------------------------
    9595// single internal sched entry
    96 const unsigned int NoOfTimes = 500000;
    97 
    9896mon_t mon1;
    9997
     
    109107
    110108void side1A( mon_t & mutex a, long long int * out ) {
    111         const unsigned int NoOfTimes = 500000;
    112         long long int StartTime, EndTime;
    113 
    114         StartTime = Time();
    115         for( int i = 0;; i++ ) {
    116                 signal(cond1a);
    117                 if( i > NoOfTimes ) break;
    118                 wait(cond1b);
    119         }
    120         EndTime = Time();
    121 
    122         *out = ( EndTime - StartTime ) / NoOfTimes;
     109        long long int StartTime, EndTime;
     110
     111        StartTime = Time();
     112        for( int i = 0;; i++ ) {
     113                signal(&cond1a);
     114                if( i > N ) break;
     115                wait(&cond1b);
     116        }
     117        EndTime = Time();
     118
     119        *out = ( EndTime - StartTime ) / N;
    123120}
    124121
    125122void side1B( mon_t & mutex a ) {
    126123        for( int i = 0;; i++ ) {
    127                 signal(cond1b);
    128                 if( i > N ) break;
    129                 wait(cond1a);
     124                signal(&cond1b);
     125                if( i > N ) break;
     126                wait(&cond1a);
    130127        }
    131128}
     
    144141
    145142//-----------------------------------------------------------------------------
    146 // multi internal sched
     143// multi internal sched entry
    147144mon_t mon2;
    148145
     
    158155
    159156void side2A( mon_t & mutex a, mon_t & mutex b, long long int * out ) {
    160         const unsigned int NoOfTimes = 500000;
    161         long long int StartTime, EndTime;
    162 
    163         StartTime = Time();
    164         for( int i = 0;; i++ ) {
    165                 signal(cond2a);
    166                 if( i > NoOfTimes ) break;
    167                 wait(cond2b);
    168         }
    169         EndTime = Time();
    170 
    171         *out = ( EndTime - StartTime ) / NoOfTimes;
     157        long long int StartTime, EndTime;
     158
     159        StartTime = Time();
     160        for( int i = 0;; i++ ) {
     161                signal(&cond2a);
     162                if( i > N ) break;
     163                wait(&cond2b);
     164        }
     165        EndTime = Time();
     166
     167        *out = ( EndTime - StartTime ) / N;
    172168}
    173169
    174170void side2B( mon_t & mutex a, mon_t & mutex b ) {
    175171        for( int i = 0;; i++ ) {
    176                 signal(cond2b);
    177                 if( i > N ) break;
    178                 wait(cond2a);
     172                signal(&cond2b);
     173                if( i > N ) break;
     174                wait(&cond2a);
    179175        }
    180176}
     
    193189
    194190//-----------------------------------------------------------------------------
    195 // single external sched
    196 
    197 volatile int go = 0;
    198 
    199 void __attribute__((noinline)) call( mon_t & mutex m1 ) {}
    200 
    201 long long int  __attribute__((noinline)) wait( mon_t & mutex m1 ) {
    202         go = 1;
    203         const unsigned int NoOfTimes = 5000000;
    204         long long int StartTime, EndTime;
    205 
    206         StartTime = Time();
    207         for (size_t i = 0; i < NoOfTimes; i++) {
    208                 waitfor(call, m1);
    209         }
    210 
    211         EndTime = Time();
    212         go = 0;
    213         return ( EndTime - StartTime ) / NoOfTimes;
    214 }
    215 
    216 thread thrd3 {};
    217 void ^?{}( thrd3 & mutex this ) {}
    218 void main( thrd3 & this ) {
    219         while(go == 0) { yield(); }
    220         while(go == 1) { call(mon1); }
    221 
    222 }
    223 
    224 long long int measure_1_sched_ext() {
    225         go = 0;
    226         thrd3 t;
    227         return wait(mon1);
    228 }
    229 
    230 //-----------------------------------------------------------------------------
    231 // multi external sched
    232 
    233 void __attribute__((noinline)) call( mon_t & mutex m1, mon_t & mutex m2 ) {}
    234 
    235 long long int  __attribute__((noinline)) wait( mon_t & mutex m1, mon_t & mutex m2 ) {
    236         go = 1;
    237         const unsigned int NoOfTimes = 5000000;
    238         long long int StartTime, EndTime;
    239 
    240         StartTime = Time();
    241         for (size_t i = 0; i < NoOfTimes; i++) {
    242                 waitfor(call, m1, m2);
    243         }
    244 
    245         EndTime = Time();
    246         go = 0;
    247         return ( EndTime - StartTime ) / NoOfTimes;
    248 }
    249 
    250 thread thrd4 {};
    251 void ^?{}( thrd4 & mutex this ) {}
    252 void main( thrd4 & this ) {
    253         while(go == 0) { yield(); }
    254         while(go == 1) { call(mon1, mon2); }
    255 
    256 }
    257 
    258 long long int measure_2_sched_ext() {
    259         go = 0;
    260         thrd3 t;
    261         return wait(mon1, mon2);
    262 }
    263 
    264 //-----------------------------------------------------------------------------
    265191// main loop
    266192int main()
    267193{
    268         sout | "\tepoch:" | time(NULL) | ',' | endl;
    269         sout | "\tctxswitch: {" | endl;
    270         sout | "\t\tcoroutine: "| measure_coroutine() | ',' | endl;
    271         sout | "\t\tthread:" | measure_thread() | ',' | endl;
    272         sout | "\t}," | endl;
    273         sout | "\tmutex: ["     | measure_1_monitor_entry()     | ',' | measure_2_monitor_entry()       | "]," | endl;
    274         sout | "\tscheduling: ["| measure_1_sched_int()         | ',' | measure_2_sched_int()   | ','  |
    275                                           measure_1_sched_ext()         | ',' | measure_2_sched_ext()   | "]," | endl;
    276 }
     194        sout | time(NULL) | ',';
     195        sout | measure_coroutine() | ',';
     196        sout | measure_thread() | ',';
     197        sout | measure_1_monitor_entry() | ',';
     198        sout | measure_2_monitor_entry() | ',';
     199        sout | measure_1_sched_int() | ',';
     200        sout | measure_2_sched_int() | endl;
     201}
  • src/benchmark/schedint/cfa1.c

    r0fe4e62 rf5c3b6c  
    1515
    1616void __attribute__((noinline)) call( M & mutex a1 ) {
    17         signal(c);
     17        signal(&c);
    1818}
    1919
     
    2222        BENCH(
    2323                for (size_t i = 0; i < n; i++) {
    24                         wait(c);
     24                        wait(&c);
    2525                },
    2626                result
  • src/benchmark/schedint/cfa2.c

    r0fe4e62 rf5c3b6c  
    1515
    1616void __attribute__((noinline)) call( M & mutex a1, M & mutex a2 ) {
    17         signal(c);
     17        signal(&c);
    1818}
    1919
     
    2222        BENCH(
    2323                for (size_t i = 0; i < n; i++) {
    24                         wait(c);
     24                        wait(&c);
    2525                },
    2626                result
  • src/benchmark/schedint/cfa4.c

    r0fe4e62 rf5c3b6c  
    1515
    1616void __attribute__((noinline)) call( M & mutex a1, M & mutex a2, M & mutex a3, M & mutex a4 ) {
    17         signal(c);
     17        signal(&c);
    1818}
    1919
     
    2222        BENCH(
    2323                for (size_t i = 0; i < n; i++) {
    24                         wait(c);
     24                        wait(&c);
    2525                },
    2626                result
  • src/libcfa/Makefile.am

    r0fe4e62 rf5c3b6c  
    9595
    9696cfa_includedir = $(CFA_INCDIR)
    97 nobase_cfa_include_HEADERS =    \
    98         ${headers}                      \
    99         ${stdhdr}                       \
    100         math                            \
    101         gmp                             \
    102         bits/defs.h             \
    103         bits/locks.h            \
    104         concurrency/invoke.h    \
    105         libhdr.h                        \
    106         libhdr/libalign.h       \
    107         libhdr/libdebug.h       \
    108         libhdr/libtools.h
     97nobase_cfa_include_HEADERS = ${headers} ${stdhdr} math gmp concurrency/invoke.h
    10998
    11099CLEANFILES = libcfa-prelude.c
  • src/libcfa/Makefile.in

    r0fe4e62 rf5c3b6c  
    264264        containers/result containers/vector concurrency/coroutine \
    265265        concurrency/thread concurrency/kernel concurrency/monitor \
    266         ${shell echo stdhdr/*} math gmp bits/defs.h bits/locks.h \
    267         concurrency/invoke.h libhdr.h libhdr/libalign.h \
    268         libhdr/libdebug.h libhdr/libtools.h
     266        ${shell echo stdhdr/*} math gmp concurrency/invoke.h
    269267HEADERS = $(nobase_cfa_include_HEADERS)
    270268am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
     
    432430stdhdr = ${shell echo stdhdr/*}
    433431cfa_includedir = $(CFA_INCDIR)
    434 nobase_cfa_include_HEADERS = \
    435         ${headers}                      \
    436         ${stdhdr}                       \
    437         math                            \
    438         gmp                             \
    439         bits/defs.h             \
    440         bits/locks.h            \
    441         concurrency/invoke.h    \
    442         libhdr.h                        \
    443         libhdr/libalign.h       \
    444         libhdr/libdebug.h       \
    445         libhdr/libtools.h
    446 
     432nobase_cfa_include_HEADERS = ${headers} ${stdhdr} math gmp concurrency/invoke.h
    447433CLEANFILES = libcfa-prelude.c
    448434all: all-am
  • src/libcfa/concurrency/alarm.c

    r0fe4e62 rf5c3b6c  
    186186
    187187        disable_interrupts();
    188         lock( event_kernel->lock DEBUG_CTX2 );
     188        lock( &event_kernel->lock DEBUG_CTX2 );
    189189        {
    190190                verify( validate( alarms ) );
     
    196196                }
    197197        }
    198         unlock( event_kernel->lock );
     198        unlock( &event_kernel->lock );
    199199        this->set = true;
    200200        enable_interrupts( DEBUG_CTX );
     
    203203void unregister_self( alarm_node_t * this ) {
    204204        disable_interrupts();
    205         lock( event_kernel->lock DEBUG_CTX2 );
     205        lock( &event_kernel->lock DEBUG_CTX2 );
    206206        {
    207207                verify( validate( &event_kernel->alarms ) );
    208208                remove( &event_kernel->alarms, this );
    209209        }
    210         unlock( event_kernel->lock );
     210        unlock( &event_kernel->lock );
    211211        enable_interrupts( DEBUG_CTX );
    212212        this->set = false;
  • src/libcfa/concurrency/coroutine.c

    r0fe4e62 rf5c3b6c  
    156156                this->limit = (char *)libCeiling( (unsigned long)this->storage, 16 ); // minimum alignment
    157157        } // if
    158         assertf( this->size >= MinStackSize, "Stack size %zd provides less than minimum of %d bytes for a stack.", this->size, MinStackSize );
     158        assertf( this->size >= MinStackSize, "Stack size %d provides less than minimum of %d bytes for a stack.", this->size, MinStackSize );
    159159
    160160        this->base = (char *)this->limit + this->size;
  • src/libcfa/concurrency/invoke.h

    r0fe4e62 rf5c3b6c  
    1414//
    1515
    16 #include "bits/defs.h"
    17 #include "bits/locks.h"
     16#include <stdbool.h>
     17#include <stdint.h>
    1818
    1919#ifdef __CFORALL__
     
    2525#define _INVOKE_H_
    2626
    27         typedef void (*fptr_t)();
    28         typedef int_fast16_t __lock_size_t;
     27      #define unlikely(x)    __builtin_expect(!!(x), 0)
     28      #define thread_local _Thread_local
    2929
    30         struct __thread_queue_t {
    31                 struct thread_desc * head;
    32                 struct thread_desc ** tail;
    33         };
     30      typedef void (*fptr_t)();
    3431
    35         struct __condition_stack_t {
    36                 struct __condition_criterion_t * top;
    37         };
     32      struct spinlock {
     33            volatile int lock;
     34            #ifdef __CFA_DEBUG__
     35                  const char * prev_name;
     36                  void* prev_thrd;
     37            #endif
     38      };
    3839
    39         #ifdef __CFORALL__
    40         extern "Cforall" {
    41                 void ?{}( struct __thread_queue_t & );
    42                 void append( struct __thread_queue_t &, struct thread_desc * );
    43                 struct thread_desc * pop_head( struct __thread_queue_t & );
    44                 struct thread_desc * remove( struct __thread_queue_t &, struct thread_desc ** );
     40      struct __thread_queue_t {
     41            struct thread_desc * head;
     42            struct thread_desc ** tail;
     43      };
    4544
    46                 void ?{}( struct __condition_stack_t & );
    47                 void push( struct __condition_stack_t &, struct __condition_criterion_t * );
    48                 struct __condition_criterion_t * pop( struct __condition_stack_t & );
    49         }
    50         #endif
     45      struct __condition_stack_t {
     46            struct __condition_criterion_t * top;
     47      };
    5148
    52         struct coStack_t {
    53                 // size of stack
    54                 size_t size;
     49      #ifdef __CFORALL__
     50      extern "Cforall" {
     51            void ?{}( struct __thread_queue_t & );
     52            void append( struct __thread_queue_t *, struct thread_desc * );
     53            struct thread_desc * pop_head( struct __thread_queue_t * );
     54            struct thread_desc * remove( struct __thread_queue_t *, struct thread_desc ** );
    5555
    56                 // pointer to stack
    57                 void *storage;
     56            void ?{}( struct __condition_stack_t & );
     57            void push( struct __condition_stack_t *, struct __condition_criterion_t * );
     58            struct __condition_criterion_t * pop( struct __condition_stack_t * );
    5859
    59                 // stack grows towards stack limit
    60                 void *limit;
     60            void ?{}(spinlock & this);
     61            void ^?{}(spinlock & this);
     62      }
     63      #endif
    6164
    62                 // base of stack
    63                 void *base;
     65      struct coStack_t {
     66            unsigned int size;                        // size of stack
     67            void *storage;                            // pointer to stack
     68            void *limit;                              // stack grows towards stack limit
     69            void *base;                               // base of stack
     70            void *context;                            // address of cfa_context_t
     71            void *top;                                // address of top of storage
     72            bool userStack;                           // whether or not the user allocated the stack
     73      };
    6474
    65                 // address of cfa_context_t
    66                 void *context;
     75      enum coroutine_state { Halted, Start, Inactive, Active, Primed };
    6776
    68                 // address of top of storage
    69                 void *top;
     77      struct coroutine_desc {
     78            struct coStack_t stack;                   // stack information of the coroutine
     79            const char *name;                         // textual name for coroutine/task, initialized by uC++ generated code
     80            int errno_;                               // copy of global UNIX variable errno
     81            enum coroutine_state state;               // current execution status for coroutine
     82            struct coroutine_desc * starter;          // first coroutine to resume this one
     83            struct coroutine_desc * last;             // last coroutine to resume this one
     84      };
    7085
    71                 // whether or not the user allocated the stack
    72                 bool userStack;
    73         };
     86      struct __waitfor_mask_t {
     87            short * accepted;                         // the index of the accepted function, -1 if none
     88            struct __acceptable_t * clauses;          // list of acceptable functions, null if any
     89            short size;                               // number of acceptable functions
     90      };
    7491
    75         enum coroutine_state { Halted, Start, Inactive, Active, Primed };
     92      struct monitor_desc {
     93            struct spinlock lock;                     // spinlock to protect internal data
     94            struct thread_desc * owner;               // current owner of the monitor
     95            struct __thread_queue_t entry_queue;      // queue of threads that are blocked waiting for the monitor
     96            struct __condition_stack_t signal_stack;  // stack of conditions to run next once we exit the monitor
     97            unsigned int recursion;                   // monitor routines can be called recursively, we need to keep track of that
     98            struct __waitfor_mask_t mask;             // mask used to know if some thread is waiting for something while holding the monitor
     99            struct __condition_node_t * dtor_node;    // node used to signal the dtor in a waitfor dtor
     100      };
    76101
    77         struct coroutine_desc {
    78                 // stack information of the coroutine
    79                 struct coStack_t stack;
     102      struct __monitor_group_t {
     103            struct monitor_desc ** list;              // currently held monitors
     104            short                  size;              // number of currently held monitors
     105            fptr_t                 func;              // last function that acquired monitors
     106      };
    80107
    81                 // textual name for coroutine/task, initialized by uC++ generated code
    82                 const char *name;
     108      struct thread_desc {
     109            // Core threading fields
     110            struct coroutine_desc  self_cor;          // coroutine body used to store context
     111            struct monitor_desc    self_mon;          // monitor body used for mutual exclusion
     112            struct monitor_desc *  self_mon_p;        // pointer to monitor with sufficient lifetime for current monitors
     113            struct __monitor_group_t monitors;        // monitors currently held by this thread
    83114
    84                 // copy of global UNIX variable errno
    85                 int errno_;
     115            // Link lists fields
     116            struct thread_desc * next;                // instrusive link field for threads
    86117
    87                 // current execution status for coroutine
    88                 enum coroutine_state state;
    89118
    90                 // first coroutine to resume this one
    91                 struct coroutine_desc * starter;
    92 
    93                 // last coroutine to resume this one
    94                 struct coroutine_desc * last;
    95         };
    96 
    97         struct __waitfor_mask_t {
    98                 // the index of the accepted function, -1 if none
    99                 short * accepted;
    100 
    101                 // list of acceptable functions, null if any
    102                 struct __acceptable_t * clauses;
    103 
    104                 // number of acceptable functions
    105                 __lock_size_t size;
    106         };
    107 
    108         struct monitor_desc {
    109                 // spinlock to protect internal data
    110                 struct __spinlock_t lock;
    111 
    112                 // current owner of the monitor
    113                 struct thread_desc * owner;
    114 
    115                 // queue of threads that are blocked waiting for the monitor
    116                 struct __thread_queue_t entry_queue;
    117 
    118                 // stack of conditions to run next once we exit the monitor
    119                 struct __condition_stack_t signal_stack;
    120 
    121                 // monitor routines can be called recursively, we need to keep track of that
    122                 unsigned int recursion;
    123 
    124                 // mask used to know if some thread is waiting for something while holding the monitor
    125                 struct __waitfor_mask_t mask;
    126 
    127                 // node used to signal the dtor in a waitfor dtor
    128                 struct __condition_node_t * dtor_node;
    129         };
    130 
    131         struct __monitor_group_t {
    132                 // currently held monitors
    133                 struct monitor_desc ** list;
    134 
    135                 // number of currently held monitors
    136                 __lock_size_t size;
    137 
    138                 // last function that acquired monitors
    139                 fptr_t func;
    140         };
    141 
    142         struct thread_desc {
    143                 // Core threading fields
    144                 // coroutine body used to store context
    145                 struct coroutine_desc  self_cor;
    146 
    147                 // monitor body used for mutual exclusion
    148                 struct monitor_desc    self_mon;
    149 
    150                 // pointer to monitor with sufficient lifetime for current monitors
    151                 struct monitor_desc *  self_mon_p;
    152 
    153                 // monitors currently held by this thread
    154                 struct __monitor_group_t monitors;
    155 
    156                 // Link lists fields
    157                 // instrusive link field for threads
    158                 struct thread_desc * next;
    159119     };
    160120
    161121     #ifdef __CFORALL__
    162122     extern "Cforall" {
    163                 static inline monitor_desc * ?[?]( const __monitor_group_t & this, ptrdiff_t index ) {
    164                         return this.list[index];
    165                 }
     123            static inline monitor_desc * ?[?]( const __monitor_group_t & this, ptrdiff_t index ) {
     124                  return this.list[index];
     125            }
    166126
    167                 static inline bool ?==?( const __monitor_group_t & lhs, const __monitor_group_t & rhs ) {
    168                         if( (lhs.list != 0) != (rhs.list != 0) ) return false;
    169                         if( lhs.size != rhs.size ) return false;
    170                         if( lhs.func != rhs.func ) return false;
     127            static inline bool ?==?( const __monitor_group_t & lhs, const __monitor_group_t & rhs ) {
     128                  if( (lhs.list != 0) != (rhs.list != 0) ) return false;
     129                  if( lhs.size != rhs.size ) return false;
     130                  if( lhs.func != rhs.func ) return false;
    171131
    172                         // Check that all the monitors match
    173                         for( int i = 0; i < lhs.size; i++ ) {
    174                                 // If not a match, check next function
    175                                 if( lhs[i] != rhs[i] ) return false;
    176                         }
     132                  // Check that all the monitors match
     133                  for( int i = 0; i < lhs.size; i++ ) {
     134                        // If not a match, check next function
     135                        if( lhs[i] != rhs[i] ) return false;
     136                  }
    177137
    178                         return true;
    179                 }
    180         }
    181         #endif
     138                  return true;
     139            }
     140      }
     141      #endif
    182142
    183143#endif //_INVOKE_H_
     
    186146#define _INVOKE_PRIVATE_H_
    187147
    188         struct machine_context_t {
    189                 void *SP;
    190                 void *FP;
    191                 void *PC;
    192         };
     148      struct machine_context_t {
     149            void *SP;
     150            void *FP;
     151            void *PC;
     152      };
    193153
    194         // assembler routines that performs the context switch
    195         extern void CtxInvokeStub( void );
    196         void CtxSwitch( void * from, void * to ) asm ("CtxSwitch");
     154      // assembler routines that performs the context switch
     155      extern void CtxInvokeStub( void );
     156      void CtxSwitch( void * from, void * to ) asm ("CtxSwitch");
    197157
    198         #if   defined( __x86_64__ )
    199         #define CtxGet( ctx ) __asm__ ( \
    200                         "movq %%rsp,%0\n"   \
    201                         "movq %%rbp,%1\n"   \
    202                 : "=rm" (ctx.SP), "=rm" (ctx.FP) )
    203         #elif defined( __i386__ )
    204         #define CtxGet( ctx ) __asm__ ( \
    205                         "movl %%esp,%0\n"   \
    206                         "movl %%ebp,%1\n"   \
    207                 : "=rm" (ctx.SP), "=rm" (ctx.FP) )
    208         #endif
     158      #if   defined( __x86_64__ )
     159      #define CtxGet( ctx ) __asm__ ( \
     160                  "movq %%rsp,%0\n"   \
     161                  "movq %%rbp,%1\n"   \
     162            : "=rm" (ctx.SP), "=rm" (ctx.FP) )
     163      #elif defined( __i386__ )
     164      #define CtxGet( ctx ) __asm__ ( \
     165                  "movl %%esp,%0\n"   \
     166                  "movl %%ebp,%1\n"   \
     167            : "=rm" (ctx.SP), "=rm" (ctx.FP) )
     168      #endif
    209169
    210170#endif //_INVOKE_PRIVATE_H_
  • src/libcfa/concurrency/kernel

    r0fe4e62 rf5c3b6c  
    2626//-----------------------------------------------------------------------------
    2727// Locks
    28 // // Lock the spinlock, spin if already acquired
    29 // void lock      ( spinlock * DEBUG_CTX_PARAM2 );
    30 
    31 // // Lock the spinlock, yield repeatedly if already acquired
    32 // void lock_yield( spinlock * DEBUG_CTX_PARAM2 );
    33 
    34 // // Lock the spinlock, return false if already acquired
    35 // bool try_lock  ( spinlock * DEBUG_CTX_PARAM2 );
    36 
    37 // // Unlock the spinlock
    38 // void unlock    ( spinlock * );
     28void lock      ( spinlock * DEBUG_CTX_PARAM2 );       // Lock the spinlock, spin if already acquired
     29void lock_yield( spinlock * DEBUG_CTX_PARAM2 );       // Lock the spinlock, yield repeatedly if already acquired
     30bool try_lock  ( spinlock * DEBUG_CTX_PARAM2 );       // Lock the spinlock, return false if already acquired
     31void unlock    ( spinlock * );                        // Unlock the spinlock
    3932
    4033struct semaphore {
    41         __spinlock_t lock;
     34        spinlock lock;
    4235        int count;
    4336        __thread_queue_t waiting;
     
    4639void  ?{}(semaphore & this, int count = 1);
    4740void ^?{}(semaphore & this);
    48 void   P (semaphore & this);
    49 void   V (semaphore & this);
     41void P(semaphore * this);
     42void V(semaphore * this);
    5043
    5144
     
    5346// Cluster
    5447struct cluster {
    55         // Ready queue locks
    56         __spinlock_t ready_queue_lock;
    57 
    58         // Ready queue for threads
    59         __thread_queue_t ready_queue;
    60 
    61         // Preemption rate on this cluster
    62         unsigned long long int preemption;
     48        spinlock ready_queue_lock;                      // Ready queue locks
     49        __thread_queue_t ready_queue;                   // Ready queue for threads
     50        unsigned long long int preemption;              // Preemption rate on this cluster
    6351};
    6452
    65 void ?{} (cluster & this);
     53void ?{}(cluster & this);
    6654void ^?{}(cluster & this);
    6755
     
    7462        FinishOpCode action_code;
    7563        thread_desc * thrd;
    76         __spinlock_t * lock;
    77         __spinlock_t ** locks;
     64        spinlock * lock;
     65        spinlock ** locks;
    7866        unsigned short lock_count;
    7967        thread_desc ** thrds;
     
    9179struct processor {
    9280        // Main state
    93         // Coroutine ctx who does keeps the state of the processor
    94         struct processorCtx_t * runner;
    95 
    96         // Cluster from which to get threads
    97         cluster * cltr;
    98 
    99         // Handle to pthreads
    100         pthread_t kernel_thread;
     81        struct processorCtx_t * runner;                 // Coroutine ctx who does keeps the state of the processor
     82        cluster * cltr;                                 // Cluster from which to get threads
     83        pthread_t kernel_thread;                        // Handle to pthreads
    10184
    10285        // Termination
    103         // Set to true to notify the processor should terminate
    104         volatile bool do_terminate;
    105 
    106         // Termination synchronisation
    107         semaphore terminated;
     86        volatile bool do_terminate;                     // Set to true to notify the processor should terminate
     87        semaphore terminated;                           // Termination synchronisation
    10888
    10989        // RunThread data
    110         // Action to do after a thread is ran
    111         struct FinishAction finish;
     90        struct FinishAction finish;                     // Action to do after a thread is ran
    11291
    11392        // Preemption data
    114         // Node which is added in the discrete event simulaiton
    115         struct alarm_node_t * preemption_alarm;
    116 
    117         // If true, a preemption was triggered in an unsafe region, the processor must preempt as soon as possible
    118         bool pending_preemption;
     93        struct alarm_node_t * preemption_alarm;         // Node which is added in the discrete event simulaiton
     94        bool pending_preemption;                        // If true, a preemption was triggered in an unsafe region, the processor must preempt as soon as possible
    11995
    12096#ifdef __CFA_DEBUG__
    121         // Last function to enable preemption on this processor
    122         char * last_enable;
     97        char * last_enable;                             // Last function to enable preemption on this processor
    12398#endif
    12499};
    125100
    126 void  ?{}(processor & this);
    127 void  ?{}(processor & this, cluster * cltr);
     101void ?{}(processor & this);
     102void ?{}(processor & this, cluster * cltr);
    128103void ^?{}(processor & this);
    129104
  • src/libcfa/concurrency/kernel.c

    r0fe4e62 rf5c3b6c  
    158158                LIB_DEBUG_PRINT_SAFE("Kernel : core %p signaling termination\n", &this);
    159159                this.do_terminate = true;
    160                 P( this.terminated );
     160                P( &this.terminated );
    161161                pthread_join( this.kernel_thread, NULL );
    162162        }
     
    216216        }
    217217
    218         V( this->terminated );
     218        V( &this->terminated );
    219219
    220220        LIB_DEBUG_PRINT_SAFE("Kernel : core %p terminated\n", this);
     
    242242void finishRunning(processor * this) {
    243243        if( this->finish.action_code == Release ) {
    244                 unlock( *this->finish.lock );
     244                unlock( this->finish.lock );
    245245        }
    246246        else if( this->finish.action_code == Schedule ) {
     
    248248        }
    249249        else if( this->finish.action_code == Release_Schedule ) {
    250                 unlock( *this->finish.lock );
     250                unlock( this->finish.lock );
    251251                ScheduleThread( this->finish.thrd );
    252252        }
    253253        else if( this->finish.action_code == Release_Multi ) {
    254254                for(int i = 0; i < this->finish.lock_count; i++) {
    255                         unlock( *this->finish.locks[i] );
     255                        unlock( this->finish.locks[i] );
    256256                }
    257257        }
    258258        else if( this->finish.action_code == Release_Multi_Schedule ) {
    259259                for(int i = 0; i < this->finish.lock_count; i++) {
    260                         unlock( *this->finish.locks[i] );
     260                        unlock( this->finish.locks[i] );
    261261                }
    262262                for(int i = 0; i < this->finish.thrd_count; i++) {
     
    334334        verifyf( thrd->next == NULL, "Expected null got %p", thrd->next );
    335335
    336         lock(   this_processor->cltr->ready_queue_lock DEBUG_CTX2 );
    337         append( this_processor->cltr->ready_queue, thrd );
    338         unlock( this_processor->cltr->ready_queue_lock );
     336        lock(   &this_processor->cltr->ready_queue_lock DEBUG_CTX2 );
     337        append( &this_processor->cltr->ready_queue, thrd );
     338        unlock( &this_processor->cltr->ready_queue_lock );
    339339
    340340        verify( disable_preempt_count > 0 );
     
    343343thread_desc * nextThread(cluster * this) {
    344344        verify( disable_preempt_count > 0 );
    345         lock( this->ready_queue_lock DEBUG_CTX2 );
    346         thread_desc * head = pop_head( this->ready_queue );
    347         unlock( this->ready_queue_lock );
     345        lock( &this->ready_queue_lock DEBUG_CTX2 );
     346        thread_desc * head = pop_head( &this->ready_queue );
     347        unlock( &this->ready_queue_lock );
    348348        verify( disable_preempt_count > 0 );
    349349        return head;
     
    358358}
    359359
    360 void BlockInternal( __spinlock_t * lock ) {
     360void BlockInternal( spinlock * lock ) {
    361361        disable_interrupts();
    362362        this_processor->finish.action_code = Release;
     
    384384}
    385385
    386 void BlockInternal( __spinlock_t * lock, thread_desc * thrd ) {
     386void BlockInternal( spinlock * lock, thread_desc * thrd ) {
    387387        assert(thrd);
    388388        disable_interrupts();
     
    398398}
    399399
    400 void BlockInternal(__spinlock_t * locks [], unsigned short count) {
     400void BlockInternal(spinlock ** locks, unsigned short count) {
    401401        disable_interrupts();
    402402        this_processor->finish.action_code = Release_Multi;
     
    411411}
    412412
    413 void BlockInternal(__spinlock_t * locks [], unsigned short lock_count, thread_desc * thrds [], unsigned short thrd_count) {
     413void BlockInternal(spinlock ** locks, unsigned short lock_count, thread_desc ** thrds, unsigned short thrd_count) {
    414414        disable_interrupts();
    415415        this_processor->finish.action_code = Release_Multi_Schedule;
     
    426426}
    427427
    428 void LeaveThread(__spinlock_t * lock, thread_desc * thrd) {
     428void LeaveThread(spinlock * lock, thread_desc * thrd) {
    429429        verify( disable_preempt_count > 0 );
    430430        this_processor->finish.action_code = thrd ? Release_Schedule : Release;
     
    516516}
    517517
    518 static __spinlock_t kernel_abort_lock;
    519 static __spinlock_t kernel_debug_lock;
     518static spinlock kernel_abort_lock;
     519static spinlock kernel_debug_lock;
    520520static bool kernel_abort_called = false;
    521521
     
    523523        // abort cannot be recursively entered by the same or different processors because all signal handlers return when
    524524        // the globalAbort flag is true.
    525         lock( kernel_abort_lock DEBUG_CTX2 );
     525        lock( &kernel_abort_lock DEBUG_CTX2 );
    526526
    527527        // first task to abort ?
    528528        if ( !kernel_abort_called ) {                   // not first task to abort ?
    529529                kernel_abort_called = true;
    530                 unlock( kernel_abort_lock );
     530                unlock( &kernel_abort_lock );
    531531        }
    532532        else {
    533                 unlock( kernel_abort_lock );
     533                unlock( &kernel_abort_lock );
    534534
    535535                sigset_t mask;
     
    561561extern "C" {
    562562        void __lib_debug_acquire() {
    563                 lock( kernel_debug_lock DEBUG_CTX2 );
     563                lock( &kernel_debug_lock DEBUG_CTX2 );
    564564        }
    565565
    566566        void __lib_debug_release() {
    567                 unlock( kernel_debug_lock );
     567                unlock( &kernel_debug_lock );
    568568        }
    569569}
     
    574574//-----------------------------------------------------------------------------
    575575// Locks
     576void ?{}( spinlock & this ) {
     577        this.lock = 0;
     578}
     579void ^?{}( spinlock & this ) {
     580
     581}
     582
     583bool try_lock( spinlock * this DEBUG_CTX_PARAM2 ) {
     584        return this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0;
     585}
     586
     587void lock( spinlock * this DEBUG_CTX_PARAM2 ) {
     588        for ( unsigned int i = 1;; i += 1 ) {
     589                if ( this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0 ) { break; }
     590        }
     591        LIB_DEBUG_DO(
     592                this->prev_name = caller;
     593                this->prev_thrd = this_thread;
     594        )
     595}
     596
     597void lock_yield( spinlock * this DEBUG_CTX_PARAM2 ) {
     598        for ( unsigned int i = 1;; i += 1 ) {
     599                if ( this->lock == 0 && __sync_lock_test_and_set_4( &this->lock, 1 ) == 0 ) { break; }
     600                yield();
     601        }
     602        LIB_DEBUG_DO(
     603                this->prev_name = caller;
     604                this->prev_thrd = this_thread;
     605        )
     606}
     607
     608
     609void unlock( spinlock * this ) {
     610        __sync_lock_release_4( &this->lock );
     611}
     612
    576613void  ?{}( semaphore & this, int count = 1 ) {
    577614        (this.lock){};
     
    581618void ^?{}(semaphore & this) {}
    582619
    583 void P(semaphore & this) {
    584         lock( this.lock DEBUG_CTX2 );
    585         this.count -= 1;
    586         if ( this.count < 0 ) {
     620void P(semaphore * this) {
     621        lock( &this->lock DEBUG_CTX2 );
     622        this->count -= 1;
     623        if ( this->count < 0 ) {
    587624                // queue current task
    588                 append( this.waiting, (thread_desc *)this_thread );
     625                append( &this->waiting, (thread_desc *)this_thread );
    589626
    590627                // atomically release spin lock and block
    591                 BlockInternal( &this.lock );
     628                BlockInternal( &this->lock );
    592629        }
    593630        else {
    594             unlock( this.lock );
    595         }
    596 }
    597 
    598 void V(semaphore & this) {
     631            unlock( &this->lock );
     632        }
     633}
     634
     635void V(semaphore * this) {
    599636        thread_desc * thrd = NULL;
    600         lock( this.lock DEBUG_CTX2 );
    601         this.count += 1;
    602         if ( this.count <= 0 ) {
     637        lock( &this->lock DEBUG_CTX2 );
     638        this->count += 1;
     639        if ( this->count <= 0 ) {
    603640                // remove task at head of waiting list
    604                 thrd = pop_head( this.waiting );
    605         }
    606 
    607         unlock( this.lock );
     641                thrd = pop_head( &this->waiting );
     642        }
     643
     644        unlock( &this->lock );
    608645
    609646        // make new owner
     
    618655}
    619656
    620 void append( __thread_queue_t & this, thread_desc * t ) {
    621         verify(this.tail != NULL);
    622         *this.tail = t;
    623         this.tail = &t->next;
    624 }
    625 
    626 thread_desc * pop_head( __thread_queue_t & this ) {
    627         thread_desc * head = this.head;
     657void append( __thread_queue_t * this, thread_desc * t ) {
     658        verify(this->tail != NULL);
     659        *this->tail = t;
     660        this->tail = &t->next;
     661}
     662
     663thread_desc * pop_head( __thread_queue_t * this ) {
     664        thread_desc * head = this->head;
    628665        if( head ) {
    629                 this.head = head->next;
     666                this->head = head->next;
    630667                if( !head->next ) {
    631                         this.tail = &this.head;
     668                        this->tail = &this->head;
    632669                }
    633670                head->next = NULL;
     
    636673}
    637674
    638 thread_desc * remove( __thread_queue_t & this, thread_desc ** it ) {
     675thread_desc * remove( __thread_queue_t * this, thread_desc ** it ) {
    639676        thread_desc * thrd = *it;
    640677        verify( thrd );
     
    642679        (*it) = thrd->next;
    643680
    644         if( this.tail == &thrd->next ) {
    645                 this.tail = it;
     681        if( this->tail == &thrd->next ) {
     682                this->tail = it;
    646683        }
    647684
    648685        thrd->next = NULL;
    649686
    650         verify( (this.head == NULL) == (&this.head == this.tail) );
    651         verify( *this.tail == NULL );
     687        verify( (this->head == NULL) == (&this->head == this->tail) );
     688        verify( *this->tail == NULL );
    652689        return thrd;
    653690}
     
    657694}
    658695
    659 void push( __condition_stack_t & this, __condition_criterion_t * t ) {
     696void push( __condition_stack_t * this, __condition_criterion_t * t ) {
    660697        verify( !t->next );
    661         t->next = this.top;
    662         this.top = t;
    663 }
    664 
    665 __condition_criterion_t * pop( __condition_stack_t & this ) {
    666         __condition_criterion_t * top = this.top;
     698        t->next = this->top;
     699        this->top = t;
     700}
     701
     702__condition_criterion_t * pop( __condition_stack_t * this ) {
     703        __condition_criterion_t * top = this->top;
    667704        if( top ) {
    668                 this.top = top->next;
     705                this->top = top->next;
    669706                top->next = NULL;
    670707        }
  • src/libcfa/concurrency/kernel_private.h

    r0fe4e62 rf5c3b6c  
    4545//Block current thread and release/wake-up the following resources
    4646void BlockInternal(void);
    47 void BlockInternal(__spinlock_t * lock);
     47void BlockInternal(spinlock * lock);
    4848void BlockInternal(thread_desc * thrd);
    49 void BlockInternal(__spinlock_t * lock, thread_desc * thrd);
    50 void BlockInternal(__spinlock_t * locks [], unsigned short count);
    51 void BlockInternal(__spinlock_t * locks [], unsigned short count, thread_desc * thrds [], unsigned short thrd_count);
    52 void LeaveThread(__spinlock_t * lock, thread_desc * thrd);
     49void BlockInternal(spinlock * lock, thread_desc * thrd);
     50void BlockInternal(spinlock ** locks, unsigned short count);
     51void BlockInternal(spinlock ** locks, unsigned short count, thread_desc ** thrds, unsigned short thrd_count);
     52void LeaveThread(spinlock * lock, thread_desc * thrd);
    5353
    5454//-----------------------------------------------------------------------------
     
    6666struct event_kernel_t {
    6767        alarm_list_t alarms;
    68         __spinlock_t lock;
     68        spinlock lock;
    6969};
    7070
  • src/libcfa/concurrency/monitor

    r0fe4e62 rf5c3b6c  
    3939}
    4040
     41// static inline int ?<?(monitor_desc* lhs, monitor_desc* rhs) {
     42//      return ((intptr_t)lhs) < ((intptr_t)rhs);
     43// }
     44
    4145struct monitor_guard_t {
    4246        monitor_desc ** m;
    43         __lock_size_t  count;
     47        int count;
    4448        monitor_desc ** prev_mntrs;
    45         __lock_size_t   prev_count;
     49        unsigned short  prev_count;
    4650        fptr_t          prev_func;
    4751};
    4852
    49 void ?{}( monitor_guard_t & this, monitor_desc ** m, __lock_size_t count, void (*func)() );
     53void ?{}( monitor_guard_t & this, monitor_desc ** m, int count, void (*func)() );
    5054void ^?{}( monitor_guard_t & this );
    5155
     
    5357        monitor_desc * m;
    5458        monitor_desc ** prev_mntrs;
    55         __lock_size_t   prev_count;
     59        unsigned short  prev_count;
    5660        fptr_t          prev_func;
    5761};
     
    7074
    7175struct __condition_criterion_t {
    72         // Whether or not the criterion is met (True if met)
    73         bool ready;
    74 
    75         // The monitor this criterion concerns
    76         monitor_desc * target;
    77 
    78         // The parent node to which this criterion belongs
    79         struct __condition_node_t * owner;
    80 
    81         // Intrusive linked list Next field
    82         __condition_criterion_t * next;
     76        bool ready;                                             //Whether or not the criterion is met (True if met)
     77        monitor_desc * target;                          //The monitor this criterion concerns
     78        struct __condition_node_t * owner;              //The parent node to which this criterion belongs
     79        __condition_criterion_t * next;         //Intrusive linked list Next field
    8380};
    8481
    8582struct __condition_node_t {
    86         // Thread that needs to be woken when all criteria are met
    87         thread_desc * waiting_thread;
    88 
    89         // Array of criteria (Criterions are contiguous in memory)
    90         __condition_criterion_t * criteria;
    91 
    92         // Number of criterions in the criteria
    93         __lock_size_t count;
    94 
    95         // Intrusive linked list Next field
    96         __condition_node_t * next;
    97 
    98         // Custom user info accessible before signalling
    99         uintptr_t user_info;
     83        thread_desc * waiting_thread;                   //Thread that needs to be woken when all criteria are met
     84        __condition_criterion_t * criteria;     //Array of criteria (Criterions are contiguous in memory)
     85        unsigned short count;                           //Number of criterions in the criteria
     86        __condition_node_t * next;                      //Intrusive linked list Next field
     87        uintptr_t user_info;                            //Custom user info accessible before signalling
    10088};
    10189
     
    10593};
    10694
    107 void ?{}(__condition_node_t & this, thread_desc * waiting_thread, __lock_size_t count, uintptr_t user_info );
     95void ?{}(__condition_node_t & this, thread_desc * waiting_thread, unsigned short count, uintptr_t user_info );
    10896void ?{}(__condition_criterion_t & this );
    10997void ?{}(__condition_criterion_t & this, monitor_desc * target, __condition_node_t * owner );
    11098
    11199void ?{}( __condition_blocked_queue_t & );
    112 void append( __condition_blocked_queue_t &, __condition_node_t * );
    113 __condition_node_t * pop_head( __condition_blocked_queue_t & );
     100void append( __condition_blocked_queue_t *, __condition_node_t * );
     101__condition_node_t * pop_head( __condition_blocked_queue_t * );
    114102
    115103struct condition {
    116         // Link list which contains the blocked threads as-well as the information needed to unblock them
    117         __condition_blocked_queue_t blocked;
    118 
    119         // Array of monitor pointers (Monitors are NOT contiguous in memory)
    120         monitor_desc ** monitors;
    121 
    122         // Number of monitors in the array
    123         __lock_size_t monitor_count;
     104        __condition_blocked_queue_t blocked;    //Link list which contains the blocked threads as-well as the information needed to unblock them
     105        monitor_desc ** monitors;                       //Array of monitor pointers (Monitors are NOT contiguous in memory)
     106        unsigned short monitor_count;                   //Number of monitors in the array
    124107};
    125108
     
    133116}
    134117
    135               void wait        ( condition & this, uintptr_t user_info = 0 );
    136               bool signal      ( condition & this );
    137               bool signal_block( condition & this );
    138 static inline bool is_empty    ( condition & this ) { return !this.blocked.head; }
    139          uintptr_t front       ( condition & this );
     118void wait( condition * this, uintptr_t user_info = 0 );
     119bool signal( condition * this );
     120bool signal_block( condition * this );
     121static inline bool is_empty( condition * this ) { return !this->blocked.head; }
     122uintptr_t front( condition * this );
    140123
    141124//-----------------------------------------------------------------------------
  • src/libcfa/concurrency/monitor.c

    r0fe4e62 rf5c3b6c  
    1717
    1818#include <stdlib>
    19 #include <inttypes.h>
    2019
    2120#include "libhdr.h"
     
    2726// Forward declarations
    2827static inline void set_owner ( monitor_desc * this, thread_desc * owner );
    29 static inline void set_owner ( monitor_desc * storage [], __lock_size_t count, thread_desc * owner );
    30 static inline void set_mask  ( monitor_desc * storage [], __lock_size_t count, const __waitfor_mask_t & mask );
     28static inline void set_owner ( monitor_desc ** storage, short count, thread_desc * owner );
     29static inline void set_mask  ( monitor_desc ** storage, short count, const __waitfor_mask_t & mask );
    3130static inline void reset_mask( monitor_desc * this );
    3231
     
    3433static inline bool is_accepted( monitor_desc * this, const __monitor_group_t & monitors );
    3534
    36 static inline void lock_all  ( __spinlock_t * locks [], __lock_size_t count );
    37 static inline void lock_all  ( monitor_desc * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count );
    38 static inline void unlock_all( __spinlock_t * locks [], __lock_size_t count );
    39 static inline void unlock_all( monitor_desc * locks [], __lock_size_t count );
    40 
    41 static inline void save   ( monitor_desc * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*out*/ recursions [], __waitfor_mask_t /*out*/ masks [] );
    42 static inline void restore( monitor_desc * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*in */ recursions [], __waitfor_mask_t /*in */ masks [] );
    43 
    44 static inline void init     ( __lock_size_t count, monitor_desc * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] );
    45 static inline void init_push( __lock_size_t count, monitor_desc * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] );
     35static inline void lock_all( spinlock ** locks, unsigned short count );
     36static inline void lock_all( monitor_desc ** source, spinlock ** /*out*/ locks, unsigned short count );
     37static inline void unlock_all( spinlock ** locks, unsigned short count );
     38static inline void unlock_all( monitor_desc ** locks, unsigned short count );
     39
     40static inline void save   ( monitor_desc ** ctx, short count, spinlock ** locks, unsigned int * /*out*/ recursions, __waitfor_mask_t * /*out*/ masks );
     41static inline void restore( monitor_desc ** ctx, short count, spinlock ** locks, unsigned int * /*in */ recursions, __waitfor_mask_t * /*in */ masks );
     42
     43static inline void init     ( int count, monitor_desc ** monitors, __condition_node_t * waiter, __condition_criterion_t * criteria );
     44static inline void init_push( int count, monitor_desc ** monitors, __condition_node_t * waiter, __condition_criterion_t * criteria );
    4645
    4746static inline thread_desc *        check_condition   ( __condition_criterion_t * );
    48 static inline void                 brand_condition   ( condition & );
    49 static inline [thread_desc *, int] search_entry_queue( const __waitfor_mask_t &, monitor_desc * monitors [], __lock_size_t count );
     47static inline void                 brand_condition   ( condition * );
     48static inline [thread_desc *, int] search_entry_queue( const __waitfor_mask_t &, monitor_desc ** monitors, int count );
    5049
    5150forall(dtype T | sized( T ))
    52 static inline __lock_size_t insert_unique( T * array [], __lock_size_t & size, T * val );
    53 static inline __lock_size_t count_max    ( const __waitfor_mask_t & mask );
    54 static inline __lock_size_t aggregate    ( monitor_desc * storage [], const __waitfor_mask_t & mask );
    55 
    56 #ifndef __CFA_LOCK_NO_YIELD
    57 #define DO_LOCK lock_yield
    58 #else
    59 #define DO_LOCK lock
    60 #endif
     51static inline short insert_unique( T ** array, short & size, T * val );
     52static inline short count_max    ( const __waitfor_mask_t & mask );
     53static inline short aggregate    ( monitor_desc ** storage, const __waitfor_mask_t & mask );
    6154
    6255//-----------------------------------------------------------------------------
     
    6558        __condition_node_t waiter = { thrd, count, user_info };   /* Create the node specific to this wait operation                                     */ \
    6659        __condition_criterion_t criteria[count];                  /* Create the creteria this wait operation needs to wake up                            */ \
    67         init( count, monitors, waiter, criteria );                /* Link everything together                                                            */ \
     60        init( count, monitors, &waiter, criteria );               /* Link everything together                                                            */ \
    6861
    6962#define wait_ctx_primed(thrd, user_info)                        /* Create the necessary information to use the signaller stack                         */ \
    7063        __condition_node_t waiter = { thrd, count, user_info };   /* Create the node specific to this wait operation                                     */ \
    7164        __condition_criterion_t criteria[count];                  /* Create the creteria this wait operation needs to wake up                            */ \
    72         init_push( count, monitors, waiter, criteria );           /* Link everything together and push it to the AS-Stack                                */ \
     65        init_push( count, monitors, &waiter, criteria );          /* Link everything together and push it to the AS-Stack                                */ \
    7366
    7467#define monitor_ctx( mons, cnt )                                /* Define that create the necessary struct for internal/external scheduling operations */ \
    7568        monitor_desc ** monitors = mons;                          /* Save the targeted monitors                                                          */ \
    76         __lock_size_t count = cnt;                                /* Save the count to a local variable                                                  */ \
     69        unsigned short count = cnt;                               /* Save the count to a local variable                                                  */ \
    7770        unsigned int recursions[ count ];                         /* Save the current recursion levels to restore them later                             */ \
    78         __waitfor_mask_t masks [ count ];                         /* Save the current waitfor masks to restore them later                                */ \
    79         __spinlock_t *   locks [ count ];                         /* We need to pass-in an array of locks to BlockInternal                               */ \
     71        __waitfor_mask_t masks[ count ];                          /* Save the current waitfor masks to restore them later                                */ \
     72        spinlock *   locks    [ count ];                         /* We need to pass-in an array of locks to BlockInternal                               */ \
    8073
    8174#define monitor_save    save   ( monitors, count, locks, recursions, masks )
     
    9083        // Enter single monitor
    9184        static void __enter_monitor_desc( monitor_desc * this, const __monitor_group_t & group ) {
    92                 // Lock the monitor spinlock
    93                 DO_LOCK( this->lock DEBUG_CTX2 );
     85                // Lock the monitor spinlock, lock_yield to reduce contention
     86                lock_yield( &this->lock DEBUG_CTX2 );
    9487                thread_desc * thrd = this_thread;
    9588
     
    121114
    122115                        // Some one else has the monitor, wait in line for it
    123                         append( this->entry_queue, thrd );
     116                        append( &this->entry_queue, thrd );
    124117                        BlockInternal( &this->lock );
    125118
     
    133126
    134127                // Release the lock and leave
    135                 unlock( this->lock );
     128                unlock( &this->lock );
    136129                return;
    137130        }
    138131
    139132        static void __enter_monitor_dtor( monitor_desc * this, fptr_t func ) {
    140                 // Lock the monitor spinlock
    141                 DO_LOCK( this->lock DEBUG_CTX2 );
     133                // Lock the monitor spinlock, lock_yield to reduce contention
     134                lock_yield( &this->lock DEBUG_CTX2 );
    142135                thread_desc * thrd = this_thread;
    143136
     
    151144                        set_owner( this, thrd );
    152145
    153                         unlock( this->lock );
     146                        unlock( &this->lock );
    154147                        return;
    155148                }
     
    160153                }
    161154
    162                 __lock_size_t count = 1;
     155                int count = 1;
    163156                monitor_desc ** monitors = &this;
    164157                __monitor_group_t group = { &this, 1, func };
     
    167160
    168161                        // Wake the thread that is waiting for this
    169                         __condition_criterion_t * urgent = pop( this->signal_stack );
     162                        __condition_criterion_t * urgent = pop( &this->signal_stack );
    170163                        verify( urgent );
    171164
     
    189182
    190183                        // Some one else has the monitor, wait in line for it
    191                         append( this->entry_queue, thrd );
     184                        append( &this->entry_queue, thrd );
    192185                        BlockInternal( &this->lock );
    193186
     
    202195        // Leave single monitor
    203196        void __leave_monitor_desc( monitor_desc * this ) {
    204                 // Lock the monitor spinlock, DO_LOCK to reduce contention
    205                 DO_LOCK( this->lock DEBUG_CTX2 );
     197                // Lock the monitor spinlock, lock_yield to reduce contention
     198                lock_yield( &this->lock DEBUG_CTX2 );
    206199
    207200                LIB_DEBUG_PRINT_SAFE("Kernel : %10p Leaving mon %p (%p)\n", this_thread, this, this->owner);
     
    216209                if( this->recursion != 0) {
    217210                        LIB_DEBUG_PRINT_SAFE("Kernel :  recursion still %d\n", this->recursion);
    218                         unlock( this->lock );
     211                        unlock( &this->lock );
    219212                        return;
    220213                }
     
    224217
    225218                // We can now let other threads in safely
    226                 unlock( this->lock );
     219                unlock( &this->lock );
    227220
    228221                //We need to wake-up the thread
     
    249242
    250243                // Lock the monitor now
    251                 DO_LOCK( this->lock DEBUG_CTX2 );
     244                lock_yield( &this->lock DEBUG_CTX2 );
    252245
    253246                disable_interrupts();
     
    279272// relies on the monitor array being sorted
    280273static inline void enter( __monitor_group_t monitors ) {
    281         for( __lock_size_t i = 0; i < monitors.size; i++) {
     274        for(int i = 0; i < monitors.size; i++) {
    282275                __enter_monitor_desc( monitors.list[i], monitors );
    283276        }
     
    286279// Leave multiple monitor
    287280// relies on the monitor array being sorted
    288 static inline void leave(monitor_desc * monitors [], __lock_size_t count) {
    289         for( __lock_size_t i = count - 1; i >= 0; i--) {
     281static inline void leave(monitor_desc ** monitors, int count) {
     282        for(int i = count - 1; i >= 0; i--) {
    290283                __leave_monitor_desc( monitors[i] );
    291284        }
     
    294287// Ctor for monitor guard
    295288// Sorts monitors before entering
    296 void ?{}( monitor_guard_t & this, monitor_desc * m [], __lock_size_t count, fptr_t func ) {
     289void ?{}( monitor_guard_t & this, monitor_desc ** m, int count, fptr_t func ) {
    297290        // Store current array
    298291        this.m = m;
     
    303296
    304297        // Save previous thread context
    305         this.[prev_mntrs, prev_count, prev_func] = this_thread->monitors.[list, size, func];
     298        this.prev_mntrs = this_thread->monitors.list;
     299        this.prev_count = this_thread->monitors.size;
     300        this.prev_func  = this_thread->monitors.func;
    306301
    307302        // Update thread context (needed for conditions)
    308         this_thread->monitors.[list, size, func] = [m, count, func];
     303        this_thread->monitors.list = m;
     304        this_thread->monitors.size = count;
     305        this_thread->monitors.func = func;
    309306
    310307        // LIB_DEBUG_PRINT_SAFE("MGUARD : enter %d\n", count);
     
    328325
    329326        // Restore thread context
    330         this_thread->monitors.[list, size, func] = this.[prev_mntrs, prev_count, prev_func];
    331 }
     327        this_thread->monitors.list = this.prev_mntrs;
     328        this_thread->monitors.size = this.prev_count;
     329        this_thread->monitors.func = this.prev_func;
     330}
     331
    332332
    333333// Ctor for monitor guard
    334334// Sorts monitors before entering
    335 void ?{}( monitor_dtor_guard_t & this, monitor_desc * m [], fptr_t func ) {
     335void ?{}( monitor_dtor_guard_t & this, monitor_desc ** m, fptr_t func ) {
    336336        // Store current array
    337337        this.m = *m;
    338338
    339339        // Save previous thread context
    340         this.[prev_mntrs, prev_count, prev_func] = this_thread->monitors.[list, size, func];
     340        this.prev_mntrs = this_thread->monitors.list;
     341        this.prev_count = this_thread->monitors.size;
     342        this.prev_func  = this_thread->monitors.func;
    341343
    342344        // Update thread context (needed for conditions)
    343         this_thread->monitors.[list, size, func] = [m, 1, func];
     345        this_thread->monitors.list = m;
     346        this_thread->monitors.size = 1;
     347        this_thread->monitors.func = func;
    344348
    345349        __enter_monitor_dtor( this.m, func );
    346350}
     351
    347352
    348353// Dtor for monitor guard
     
    352357
    353358        // Restore thread context
    354         this_thread->monitors.[list, size, func] = this.[prev_mntrs, prev_count, prev_func];
     359        this_thread->monitors.list = this.prev_mntrs;
     360        this_thread->monitors.size = this.prev_count;
     361        this_thread->monitors.func = this.prev_func;
    355362}
    356363
    357364//-----------------------------------------------------------------------------
    358365// Internal scheduling types
    359 void ?{}(__condition_node_t & this, thread_desc * waiting_thread, __lock_size_t count, uintptr_t user_info ) {
     366void ?{}(__condition_node_t & this, thread_desc * waiting_thread, unsigned short count, uintptr_t user_info ) {
    360367        this.waiting_thread = waiting_thread;
    361368        this.count = count;
     
    371378}
    372379
    373 void ?{}(__condition_criterion_t & this, monitor_desc * target, __condition_node_t & owner ) {
     380void ?{}(__condition_criterion_t & this, monitor_desc * target, __condition_node_t * owner ) {
    374381        this.ready  = false;
    375382        this.target = target;
    376         this.owner  = &owner;
     383        this.owner  = owner;
    377384        this.next   = NULL;
    378385}
     
    380387//-----------------------------------------------------------------------------
    381388// Internal scheduling
    382 void wait( condition & this, uintptr_t user_info = 0 ) {
     389void wait( condition * this, uintptr_t user_info = 0 ) {
    383390        brand_condition( this );
    384391
    385392        // Check that everything is as expected
    386         assertf( this.monitors != NULL, "Waiting with no monitors (%p)", this.monitors );
    387         verifyf( this.monitor_count != 0, "Waiting with 0 monitors (%"PRIiFAST16")", this.monitor_count );
    388         verifyf( this.monitor_count < 32u, "Excessive monitor count (%"PRIiFAST16")", this.monitor_count );
     393        assertf( this->monitors != NULL, "Waiting with no monitors (%p)", this->monitors );
     394        verifyf( this->monitor_count != 0, "Waiting with 0 monitors (%i)", this->monitor_count );
     395        verifyf( this->monitor_count < 32u, "Excessive monitor count (%i)", this->monitor_count );
    389396
    390397        // Create storage for monitor context
    391         monitor_ctx( this.monitors, this.monitor_count );
     398        monitor_ctx( this->monitors, this->monitor_count );
    392399
    393400        // Create the node specific to this wait operation
     
    396403        // Append the current wait operation to the ones already queued on the condition
    397404        // We don't need locks for that since conditions must always be waited on inside monitor mutual exclusion
    398         append( this.blocked, &waiter );
     405        append( &this->blocked, &waiter );
    399406
    400407        // Lock all monitors (aggregates the locks as well)
     
    402409
    403410        // Find the next thread(s) to run
    404         __lock_size_t thread_count = 0;
     411        short thread_count = 0;
    405412        thread_desc * threads[ count ];
    406413        __builtin_memset( threads, 0, sizeof( threads ) );
     
    410417
    411418        // Remove any duplicate threads
    412         for( __lock_size_t i = 0; i < count; i++) {
     419        for( int i = 0; i < count; i++) {
    413420                thread_desc * new_owner = next_thread( monitors[i] );
    414421                insert_unique( threads, thread_count, new_owner );
     
    422429}
    423430
    424 bool signal( condition & this ) {
     431bool signal( condition * this ) {
    425432        if( is_empty( this ) ) { return false; }
    426433
    427434        //Check that everything is as expected
    428         verify( this.monitors );
    429         verify( this.monitor_count != 0 );
     435        verify( this->monitors );
     436        verify( this->monitor_count != 0 );
    430437
    431438        //Some more checking in debug
    432439        LIB_DEBUG_DO(
    433440                thread_desc * this_thrd = this_thread;
    434                 if ( this.monitor_count != this_thrd->monitors.size ) {
    435                         abortf( "Signal on condition %p made with different number of monitor(s), expected %i got %i", &this, this.monitor_count, this_thrd->monitors.size );
    436                 }
    437 
    438                 for(int i = 0; i < this.monitor_count; i++) {
    439                         if ( this.monitors[i] != this_thrd->monitors.list[i] ) {
    440                                 abortf( "Signal on condition %p made with different monitor, expected %p got %i", &this, this.monitors[i], this_thrd->monitors.list[i] );
     441                if ( this->monitor_count != this_thrd->monitors.size ) {
     442                        abortf( "Signal on condition %p made with different number of monitor(s), expected %i got %i", this, this->monitor_count, this_thrd->monitors.size );
     443                }
     444
     445                for(int i = 0; i < this->monitor_count; i++) {
     446                        if ( this->monitors[i] != this_thrd->monitors.list[i] ) {
     447                                abortf( "Signal on condition %p made with different monitor, expected %p got %i", this, this->monitors[i], this_thrd->monitors.list[i] );
    441448                        }
    442449                }
    443450        );
    444451
    445         __lock_size_t count = this.monitor_count;
     452        unsigned short count = this->monitor_count;
    446453
    447454        // Lock all monitors
    448         lock_all( this.monitors, NULL, count );
     455        lock_all( this->monitors, NULL, count );
    449456
    450457        //Pop the head of the waiting queue
    451         __condition_node_t * node = pop_head( this.blocked );
     458        __condition_node_t * node = pop_head( &this->blocked );
    452459
    453460        //Add the thread to the proper AS stack
     
    455462                __condition_criterion_t * crit = &node->criteria[i];
    456463                assert( !crit->ready );
    457                 push( crit->target->signal_stack, crit );
     464                push( &crit->target->signal_stack, crit );
    458465        }
    459466
    460467        //Release
    461         unlock_all( this.monitors, count );
     468        unlock_all( this->monitors, count );
    462469
    463470        return true;
    464471}
    465472
    466 bool signal_block( condition & this ) {
    467         if( !this.blocked.head ) { return false; }
     473bool signal_block( condition * this ) {
     474        if( !this->blocked.head ) { return false; }
    468475
    469476        //Check that everything is as expected
    470         verifyf( this.monitors != NULL, "Waiting with no monitors (%p)", this.monitors );
    471         verifyf( this.monitor_count != 0, "Waiting with 0 monitors (%"PRIiFAST16")", this.monitor_count );
     477        verifyf( this->monitors != NULL, "Waiting with no monitors (%p)", this->monitors );
     478        verifyf( this->monitor_count != 0, "Waiting with 0 monitors (%i)", this->monitor_count );
    472479
    473480        // Create storage for monitor context
    474         monitor_ctx( this.monitors, this.monitor_count );
     481        monitor_ctx( this->monitors, this->monitor_count );
    475482
    476483        // Lock all monitors (aggregates the locks them as well)
     
    484491
    485492        //Find the thread to run
    486         thread_desc * signallee = pop_head( this.blocked )->waiting_thread;
     493        thread_desc * signallee = pop_head( &this->blocked )->waiting_thread;
    487494        set_owner( monitors, count, signallee );
    488495
    489         LIB_DEBUG_PRINT_BUFFER_DECL( "Kernel : signal_block condition %p (s: %p)\n", &this, signallee );
     496        LIB_DEBUG_PRINT_BUFFER_DECL( "Kernel : signal_block condition %p (s: %p)\n", this, signallee );
    490497
    491498        //Everything is ready to go to sleep
     
    505512
    506513// Access the user_info of the thread waiting at the front of the queue
    507 uintptr_t front( condition & this ) {
     514uintptr_t front( condition * this ) {
    508515        verifyf( !is_empty(this),
    509516                "Attempt to access user data on an empty condition.\n"
    510517                "Possible cause is not checking if the condition is empty before reading stored data."
    511518        );
    512         return this.blocked.head->user_info;
     519        return this->blocked.head->user_info;
    513520}
    514521
     
    530537        // This statment doesn't have a contiguous list of monitors...
    531538        // Create one!
    532         __lock_size_t max = count_max( mask );
     539        short max = count_max( mask );
    533540        monitor_desc * mon_storage[max];
    534541        __builtin_memset( mon_storage, 0, sizeof( mon_storage ) );
    535         __lock_size_t actual_count = aggregate( mon_storage, mask );
    536 
    537         LIB_DEBUG_PRINT_BUFFER_DECL( "Kernel : waitfor %d (s: %d, m: %d)\n", actual_count, mask.size, (__lock_size_t)max);
     542        short actual_count = aggregate( mon_storage, mask );
     543
     544        LIB_DEBUG_PRINT_BUFFER_DECL( "Kernel : waitfor %d (s: %d, m: %d)\n", actual_count, mask.size, (short)max);
    538545
    539546        if(actual_count == 0) return;
     
    562569
    563570                                __condition_criterion_t * dtor_crit = mon2dtor->dtor_node->criteria;
    564                                 push( mon2dtor->signal_stack, dtor_crit );
     571                                push( &mon2dtor->signal_stack, dtor_crit );
    565572
    566573                                unlock_all( locks, count );
     
    622629        set_mask( monitors, count, mask );
    623630
    624         for( __lock_size_t i = 0; i < count; i++) {
     631        for(int i = 0; i < count; i++) {
    625632                verify( monitors[i]->owner == this_thread );
    626633        }
     
    654661}
    655662
    656 static inline void set_owner( monitor_desc * monitors [], __lock_size_t count, thread_desc * owner ) {
     663static inline void set_owner( monitor_desc ** monitors, short count, thread_desc * owner ) {
    657664        monitors[0]->owner     = owner;
    658665        monitors[0]->recursion = 1;
    659         for( __lock_size_t i = 1; i < count; i++ ) {
     666        for( int i = 1; i < count; i++ ) {
    660667                monitors[i]->owner     = owner;
    661668                monitors[i]->recursion = 0;
     
    663670}
    664671
    665 static inline void set_mask( monitor_desc * storage [], __lock_size_t count, const __waitfor_mask_t & mask ) {
    666         for( __lock_size_t i = 0; i < count; i++) {
     672static inline void set_mask( monitor_desc ** storage, short count, const __waitfor_mask_t & mask ) {
     673        for(int i = 0; i < count; i++) {
    667674                storage[i]->mask = mask;
    668675        }
     
    678685        //Check the signaller stack
    679686        LIB_DEBUG_PRINT_SAFE("Kernel :  mon %p AS-stack top %p\n", this, this->signal_stack.top);
    680         __condition_criterion_t * urgent = pop( this->signal_stack );
     687        __condition_criterion_t * urgent = pop( &this->signal_stack );
    681688        if( urgent ) {
    682689                //The signaller stack is not empty,
     
    690697        // No signaller thread
    691698        // Get the next thread in the entry_queue
    692         thread_desc * new_owner = pop_head( this->entry_queue );
     699        thread_desc * new_owner = pop_head( &this->entry_queue );
    693700        set_owner( this, new_owner );
    694701
     
    698705static inline bool is_accepted( monitor_desc * this, const __monitor_group_t & group ) {
    699706        __acceptable_t * it = this->mask.clauses; // Optim
    700         __lock_size_t count = this->mask.size;
     707        int count = this->mask.size;
    701708
    702709        // Check if there are any acceptable functions
     
    707714
    708715        // For all acceptable functions check if this is the current function.
    709         for( __lock_size_t i = 0; i < count; i++, it++ ) {
     716        for( short i = 0; i < count; i++, it++ ) {
    710717                if( *it == group ) {
    711718                        *this->mask.accepted = i;
     
    718725}
    719726
    720 static inline void init( __lock_size_t count, monitor_desc * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] ) {
    721         for( __lock_size_t i = 0; i < count; i++) {
     727static inline void init( int count, monitor_desc ** monitors, __condition_node_t * waiter, __condition_criterion_t * criteria ) {
     728        for(int i = 0; i < count; i++) {
    722729                (criteria[i]){ monitors[i], waiter };
    723730        }
    724731
    725         waiter.criteria = criteria;
    726 }
    727 
    728 static inline void init_push( __lock_size_t count, monitor_desc * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] ) {
    729         for( __lock_size_t i = 0; i < count; i++) {
     732        waiter->criteria = criteria;
     733}
     734
     735static inline void init_push( int count, monitor_desc ** monitors, __condition_node_t * waiter, __condition_criterion_t * criteria ) {
     736        for(int i = 0; i < count; i++) {
    730737                (criteria[i]){ monitors[i], waiter };
    731738                LIB_DEBUG_PRINT_SAFE( "Kernel :  target %p = %p\n", criteria[i].target, &criteria[i] );
    732                 push( criteria[i].target->signal_stack, &criteria[i] );
    733         }
    734 
    735         waiter.criteria = criteria;
    736 }
    737 
    738 static inline void lock_all( __spinlock_t * locks [], __lock_size_t count ) {
    739         for( __lock_size_t i = 0; i < count; i++ ) {
    740                 DO_LOCK( *locks[i] DEBUG_CTX2 );
    741         }
    742 }
    743 
    744 static inline void lock_all( monitor_desc * source [], __spinlock_t * /*out*/ locks [], __lock_size_t count ) {
    745         for( __lock_size_t i = 0; i < count; i++ ) {
    746                 __spinlock_t * l = &source[i]->lock;
    747                 DO_LOCK( *l DEBUG_CTX2 );
     739                push( &criteria[i].target->signal_stack, &criteria[i] );
     740        }
     741
     742        waiter->criteria = criteria;
     743}
     744
     745static inline void lock_all( spinlock ** locks, unsigned short count ) {
     746        for( int i = 0; i < count; i++ ) {
     747                lock_yield( locks[i] DEBUG_CTX2 );
     748        }
     749}
     750
     751static inline void lock_all( monitor_desc ** source, spinlock ** /*out*/ locks, unsigned short count ) {
     752        for( int i = 0; i < count; i++ ) {
     753                spinlock * l = &source[i]->lock;
     754                lock_yield( l DEBUG_CTX2 );
    748755                if(locks) locks[i] = l;
    749756        }
    750757}
    751758
    752 static inline void unlock_all( __spinlock_t * locks [], __lock_size_t count ) {
    753         for( __lock_size_t i = 0; i < count; i++ ) {
    754                 unlock( *locks[i] );
    755         }
    756 }
    757 
    758 static inline void unlock_all( monitor_desc * locks [], __lock_size_t count ) {
    759         for( __lock_size_t i = 0; i < count; i++ ) {
    760                 unlock( locks[i]->lock );
    761         }
    762 }
    763 
    764 static inline void save(
    765         monitor_desc * ctx [],
    766         __lock_size_t count,
    767         __attribute((unused)) __spinlock_t * locks [],
    768         unsigned int /*out*/ recursions [],
    769         __waitfor_mask_t /*out*/ masks []
    770 ) {
    771         for( __lock_size_t i = 0; i < count; i++ ) {
     759static inline void unlock_all( spinlock ** locks, unsigned short count ) {
     760        for( int i = 0; i < count; i++ ) {
     761                unlock( locks[i] );
     762        }
     763}
     764
     765static inline void unlock_all( monitor_desc ** locks, unsigned short count ) {
     766        for( int i = 0; i < count; i++ ) {
     767                unlock( &locks[i]->lock );
     768        }
     769}
     770
     771static inline void save( monitor_desc ** ctx, short count, __attribute((unused)) spinlock ** locks, unsigned int * /*out*/ recursions, __waitfor_mask_t * /*out*/ masks ) {
     772        for( int i = 0; i < count; i++ ) {
    772773                recursions[i] = ctx[i]->recursion;
    773774                masks[i]      = ctx[i]->mask;
     
    775776}
    776777
    777 static inline void restore(
    778         monitor_desc * ctx [],
    779         __lock_size_t count,
    780         __spinlock_t * locks [],
    781         unsigned int /*out*/ recursions [],
    782         __waitfor_mask_t /*out*/ masks []
    783 ) {
     778static inline void restore( monitor_desc ** ctx, short count, spinlock ** locks, unsigned int * /*out*/ recursions, __waitfor_mask_t * /*out*/ masks ) {
    784779        lock_all( locks, count );
    785         for( __lock_size_t i = 0; i < count; i++ ) {
     780        for( int i = 0; i < count; i++ ) {
    786781                ctx[i]->recursion = recursions[i];
    787782                ctx[i]->mask      = masks[i];
     
    816811}
    817812
    818 static inline void brand_condition( condition & this ) {
     813static inline void brand_condition( condition * this ) {
    819814        thread_desc * thrd = this_thread;
    820         if( !this.monitors ) {
     815        if( !this->monitors ) {
    821816                // LIB_DEBUG_PRINT_SAFE("Branding\n");
    822817                assertf( thrd->monitors.list != NULL, "No current monitor to brand condition %p", thrd->monitors.list );
    823                 this.monitor_count = thrd->monitors.size;
    824 
    825                 this.monitors = malloc( this.monitor_count * sizeof( *this.monitors ) );
    826                 for( int i = 0; i < this.monitor_count; i++ ) {
    827                         this.monitors[i] = thrd->monitors.list[i];
    828                 }
    829         }
    830 }
    831 
    832 static inline [thread_desc *, int] search_entry_queue( const __waitfor_mask_t & mask, monitor_desc * monitors [], __lock_size_t count ) {
    833 
    834         __thread_queue_t & entry_queue = monitors[0]->entry_queue;
     818                this->monitor_count = thrd->monitors.size;
     819
     820                this->monitors = malloc( this->monitor_count * sizeof( *this->monitors ) );
     821                for( int i = 0; i < this->monitor_count; i++ ) {
     822                        this->monitors[i] = thrd->monitors.list[i];
     823                }
     824        }
     825}
     826
     827static inline [thread_desc *, int] search_entry_queue( const __waitfor_mask_t & mask, monitor_desc ** monitors, int count ) {
     828
     829        __thread_queue_t * entry_queue = &monitors[0]->entry_queue;
    835830
    836831        // For each thread in the entry-queue
    837         for(    thread_desc ** thrd_it = &entry_queue.head;
     832        for(    thread_desc ** thrd_it = &entry_queue->head;
    838833                *thrd_it;
    839834                thrd_it = &(*thrd_it)->next
     
    857852
    858853forall(dtype T | sized( T ))
    859 static inline __lock_size_t insert_unique( T * array [], __lock_size_t & size, T * val ) {
     854static inline short insert_unique( T ** array, short & size, T * val ) {
    860855        if( !val ) return size;
    861856
    862         for( __lock_size_t i = 0; i <= size; i++) {
     857        for(int i = 0; i <= size; i++) {
    863858                if( array[i] == val ) return size;
    864859        }
     
    869864}
    870865
    871 static inline __lock_size_t count_max( const __waitfor_mask_t & mask ) {
    872         __lock_size_t max = 0;
    873         for( __lock_size_t i = 0; i < mask.size; i++ ) {
     866static inline short count_max( const __waitfor_mask_t & mask ) {
     867        short max = 0;
     868        for( int i = 0; i < mask.size; i++ ) {
    874869                max += mask.clauses[i].size;
    875870        }
     
    877872}
    878873
    879 static inline __lock_size_t aggregate( monitor_desc * storage [], const __waitfor_mask_t & mask ) {
    880         __lock_size_t size = 0;
    881         for( __lock_size_t i = 0; i < mask.size; i++ ) {
     874static inline short aggregate( monitor_desc ** storage, const __waitfor_mask_t & mask ) {
     875        short size = 0;
     876        for( int i = 0; i < mask.size; i++ ) {
    882877                __libcfa_small_sort( mask.clauses[i].list, mask.clauses[i].size );
    883                 for( __lock_size_t j = 0; j < mask.clauses[i].size; j++) {
     878                for( int j = 0; j < mask.clauses[i].size; j++) {
    884879                        insert_unique( storage, size, mask.clauses[i].list[j] );
    885880                }
     
    895890}
    896891
    897 void append( __condition_blocked_queue_t & this, __condition_node_t * c ) {
    898         verify(this.tail != NULL);
    899         *this.tail = c;
    900         this.tail = &c->next;
    901 }
    902 
    903 __condition_node_t * pop_head( __condition_blocked_queue_t & this ) {
    904         __condition_node_t * head = this.head;
     892void append( __condition_blocked_queue_t * this, __condition_node_t * c ) {
     893        verify(this->tail != NULL);
     894        *this->tail = c;
     895        this->tail = &c->next;
     896}
     897
     898__condition_node_t * pop_head( __condition_blocked_queue_t * this ) {
     899        __condition_node_t * head = this->head;
    905900        if( head ) {
    906                 this.head = head->next;
     901                this->head = head->next;
    907902                if( !head->next ) {
    908                         this.tail = &this.head;
     903                        this->tail = &this->head;
    909904                }
    910905                head->next = NULL;
  • src/libcfa/concurrency/preemption.c

    r0fe4e62 rf5c3b6c  
    355355                case SI_KERNEL:
    356356                        // LIB_DEBUG_PRINT_SAFE("Kernel : Preemption thread tick\n");
    357                         lock( event_kernel->lock DEBUG_CTX2 );
     357                        lock( &event_kernel->lock DEBUG_CTX2 );
    358358                        tick_preemption();
    359                         unlock( event_kernel->lock );
     359                        unlock( &event_kernel->lock );
    360360                        break;
    361361                // Signal was not sent by the kernel but by an other thread
  • src/main.cc

    r0fe4e62 rf5c3b6c  
    206206                        FILE * extras = fopen( libcfap | treep ? "../prelude/extras.cf" : CFA_LIBDIR "/extras.cf", "r" );
    207207                        assertf( extras, "cannot open extras.cf\n" );
    208                         parse( extras, LinkageSpec::BuiltinC );
     208                        parse( extras, LinkageSpec::C );
    209209
    210210                        if ( ! libcfap ) {
  • src/prelude/builtins.c

    r0fe4e62 rf5c3b6c  
    8080} // ?\?
    8181
    82 // FIXME (x \ (unsigned long int)y) relies on X ?\?(T, unsigned long) a function that is neither
    83 // defined, nor passed as an assertion parameter. Without user-defined conversions, cannot specify
    84 // X as a type that casts to double, yet it doesn't make sense to write functions with that type
    85 // signature where X is double.
    86 
    87 // static inline forall( otype T | { void ?{}( T & this, one_t ); T ?*?( T, T ); double ?/?( double, T ); } )
    88 // double ?\?( T x, signed long int y ) {
    89 //     if ( y >=  0 ) return (double)(x \ (unsigned long int)y);
    90 //     else return 1.0 / x \ (unsigned long int)(-y);
    91 // } // ?\?
     82static inline forall( otype T | { void ?{}( T & this, one_t ); T ?*?( T, T ); double ?/?( double, T ); } )
     83double ?\?( T x, signed long int y ) {
     84    if ( y >=  0 ) return (double)(x \ (unsigned long int)y);
     85    else return 1.0 / x \ (unsigned long int)(-y);
     86} // ?\?
    9287
    9388static inline long int ?\=?( long int & x, unsigned long int y ) { x = x \ y; return x; }
  • src/tests/.expect/32/KRfunctions.txt

    r0fe4e62 rf5c3b6c  
     1__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned int __size);
     2__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
     3__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
     4__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
     5__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
     6extern signed int printf(const char *__restrict __format, ...);
    17signed int __f0__Fi_iPCii__1(signed int __a__i_1, const signed int *__b__PCi_1, signed int __c__i_1){
    28    __attribute__ ((unused)) signed int ___retval_f0__i_1;
  • src/tests/.expect/32/attributes.txt

    r0fe4e62 rf5c3b6c  
     1__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned int __size);
     2__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
     3__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
     4__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
     5__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
     6extern signed int printf(const char *__restrict __format, ...);
    17signed int __la__Fi___1(){
    28    __attribute__ ((unused)) signed int ___retval_la__i_1;
  • src/tests/.expect/32/declarationSpecifier.txt

    r0fe4e62 rf5c3b6c  
     1__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned int __size);
     2__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
     3__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
     4__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
     5__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
     6extern signed int printf(const char *__restrict __format, ...);
    17volatile const signed short int __x1__CVs_1;
    28static volatile const signed short int __x2__CVs_1;
     
    695701}
    696702static inline int invoke_main(int argc, char* argv[], char* envp[]) { (void)argc; (void)argv; (void)envp; return __main__Fi_iPPCc__1(argc, argv); }
     703__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned int __size);
     704__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
     705__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
     706__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
     707__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
     708extern signed int printf(const char *__restrict __format, ...);
    697709static inline signed int invoke_main(signed int argc, char **argv, char **envp);
    698710signed int main(signed int __argc__i_1, char **__argv__PPc_1, char **__envp__PPc_1){
  • src/tests/.expect/32/extension.txt

    r0fe4e62 rf5c3b6c  
     1__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned int __size);
     2__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
     3__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
     4__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
     5__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
     6extern signed int printf(const char *__restrict __format, ...);
    17__extension__ signed int __a__i_1;
    28__extension__ signed int __b__i_1;
  • src/tests/.expect/32/gccExtensions.txt

    r0fe4e62 rf5c3b6c  
     1__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned int __size);
     2__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
     3__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
     4__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
     5__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
     6extern signed int printf(const char *__restrict __format, ...);
    17extern signed int __x__i_1 asm ( "xx" );
    28signed int __main__Fi_iPPCc__1(signed int __argc__i_1, const char **__argv__PPCc_1){
     
    168174}
    169175static inline int invoke_main(int argc, char* argv[], char* envp[]) { (void)argc; (void)argv; (void)envp; return __main__Fi_iPPCc__1(argc, argv); }
     176__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned int __size);
     177__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
     178__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
     179__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
     180__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
     181extern signed int printf(const char *__restrict __format, ...);
    170182static inline signed int invoke_main(signed int argc, char **argv, char **envp);
    171183signed int main(signed int __argc__i_1, char **__argv__PPc_1, char **__envp__PPc_1){
  • src/tests/.expect/32/literals.txt

    r0fe4e62 rf5c3b6c  
     1__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned int __size);
     2__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
     3__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
     4__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
     5__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
     6extern signed int printf(const char *__restrict __format, ...);
    17void __for_each__A0_2_0_0____operator_assign__PFd0_Rd0d0____constructor__PF_Rd0____constructor__PF_Rd0d0____destructor__PF_Rd0____operator_assign__PFd1_Rd1d1____constructor__PF_Rd1____constructor__PF_Rd1d1____destructor__PF_Rd1____operator_preincr__PFd0_Rd0____operator_predecr__PFd0_Rd0____operator_equal__PFi_d0d0____operator_notequal__PFi_d0d0____operator_deref__PFRd1_d0__F_d0d0PF_d1___1(__attribute__ ((unused)) void (*_adapterF_9telt_type__P)(void (*__anonymous_object0)(), void *__anonymous_object1), __attribute__ ((unused)) void *(*_adapterFP9telt_type_14titerator_type_M_P)(void (*__anonymous_object2)(), void *__anonymous_object3), __attribute__ ((unused)) signed int (*_adapterFi_14titerator_type14titerator_type_M_PP)(void (*__anonymous_object4)(), void *__anonymous_object5, void *__anonymous_object6), __attribute__ ((unused)) void (*_adapterF14titerator_type_P14titerator_type_P_M)(void (*__anonymous_object7)(), __attribute__ ((unused)) void *___retval__operator_preincr__14titerator_type_1, void *__anonymous_object8), __attribute__ ((unused)) void (*_adapterF_P9telt_type9telt_type__MP)(void (*__anonymous_object9)(), void *__anonymous_object10, void *__anonymous_object11), __attribute__ ((unused)) void (*_adapterF9telt_type_P9telt_type9telt_type_P_MP)(void (*__anonymous_object12)(), __attribute__ ((unused)) void *___retval__operator_assign__9telt_type_1, void *__anonymous_object13, void *__anonymous_object14), __attribute__ ((unused)) void (*_adapterF_P14titerator_type14titerator_type__MP)(void (*__anonymous_object15)(), void *__anonymous_object16, void *__anonymous_object17), __attribute__ ((unused)) void (*_adapterF14titerator_type_P14titerator_type14titerator_type_P_MP)(void (*__anonymous_object18)(), __attribute__ ((unused)) void *___retval__operator_assign__14titerator_type_1, void *__anonymous_object19, void *__anonymous_object20), __attribute__ ((unused)) unsigned long int _sizeof_14titerator_type, __attribute__ ((unused)) unsigned long int _alignof_14titerator_type, __attribute__ ((unused)) unsigned long int _sizeof_9telt_type, __attribute__ ((unused)) unsigned long int _alignof_9telt_type, __attribute__ ((unused)) void *(*___operator_assign__PF14titerator_type_R14titerator_type14titerator_type__1)(void *__anonymous_object21, void *__anonymous_object22), __attribute__ ((unused)) void (*___constructor__PF_R14titerator_type__1)(void *__anonymous_object23), __attribute__ ((unused)) void (*___constructor__PF_R14titerator_type14titerator_type__1)(void *__anonymous_object24, void *__anonymous_object25), __attribute__ ((unused)) void (*___destructor__PF_R14titerator_type__1)(void *__anonymous_object26), __attribute__ ((unused)) void *(*___operator_assign__PF9telt_type_R9telt_type9telt_type__1)(void *__anonymous_object27, void *__anonymous_object28), __attribute__ ((unused)) void (*___constructor__PF_R9telt_type__1)(void *__anonymous_object29), __attribute__ ((unused)) void (*___constructor__PF_R9telt_type9telt_type__1)(void *__anonymous_object30, void *__anonymous_object31), __attribute__ ((unused)) void (*___destructor__PF_R9telt_type__1)(void *__anonymous_object32), __attribute__ ((unused)) void *(*___operator_preincr__PF14titerator_type_R14titerator_type__1)(void *__anonymous_object33), __attribute__ ((unused)) void *(*___operator_predecr__PF14titerator_type_R14titerator_type__1)(void *__anonymous_object34), __attribute__ ((unused)) signed int (*___operator_equal__PFi_14titerator_type14titerator_type__1)(void *__anonymous_object35, void *__anonymous_object36), __attribute__ ((unused)) signed int (*___operator_notequal__PFi_14titerator_type14titerator_type__1)(void *__anonymous_object37, void *__anonymous_object38), __attribute__ ((unused)) void *(*___operator_deref__PFR9telt_type_14titerator_type__1)(void *__anonymous_object39), void *__begin__14titerator_type_1, void *__end__14titerator_type_1, void (*__func__PF_9telt_type__1)(void *__anonymous_object40));
    28void __for_each_reverse__A0_2_0_0____operator_assign__PFd0_Rd0d0____constructor__PF_Rd0____constructor__PF_Rd0d0____destructor__PF_Rd0____operator_assign__PFd1_Rd1d1____constructor__PF_Rd1____constructor__PF_Rd1d1____destructor__PF_Rd1____operator_preincr__PFd0_Rd0____operator_predecr__PFd0_Rd0____operator_equal__PFi_d0d0____operator_notequal__PFi_d0d0____operator_deref__PFRd1_d0__F_d0d0PF_d1___1(__attribute__ ((unused)) void (*_adapterF_9telt_type__P)(void (*__anonymous_object41)(), void *__anonymous_object42), __attribute__ ((unused)) void *(*_adapterFP9telt_type_14titerator_type_M_P)(void (*__anonymous_object43)(), void *__anonymous_object44), __attribute__ ((unused)) signed int (*_adapterFi_14titerator_type14titerator_type_M_PP)(void (*__anonymous_object45)(), void *__anonymous_object46, void *__anonymous_object47), __attribute__ ((unused)) void (*_adapterF14titerator_type_P14titerator_type_P_M)(void (*__anonymous_object48)(), __attribute__ ((unused)) void *___retval__operator_preincr__14titerator_type_1, void *__anonymous_object49), __attribute__ ((unused)) void (*_adapterF_P9telt_type9telt_type__MP)(void (*__anonymous_object50)(), void *__anonymous_object51, void *__anonymous_object52), __attribute__ ((unused)) void (*_adapterF9telt_type_P9telt_type9telt_type_P_MP)(void (*__anonymous_object53)(), __attribute__ ((unused)) void *___retval__operator_assign__9telt_type_1, void *__anonymous_object54, void *__anonymous_object55), __attribute__ ((unused)) void (*_adapterF_P14titerator_type14titerator_type__MP)(void (*__anonymous_object56)(), void *__anonymous_object57, void *__anonymous_object58), __attribute__ ((unused)) void (*_adapterF14titerator_type_P14titerator_type14titerator_type_P_MP)(void (*__anonymous_object59)(), __attribute__ ((unused)) void *___retval__operator_assign__14titerator_type_1, void *__anonymous_object60, void *__anonymous_object61), __attribute__ ((unused)) unsigned long int _sizeof_14titerator_type, __attribute__ ((unused)) unsigned long int _alignof_14titerator_type, __attribute__ ((unused)) unsigned long int _sizeof_9telt_type, __attribute__ ((unused)) unsigned long int _alignof_9telt_type, __attribute__ ((unused)) void *(*___operator_assign__PF14titerator_type_R14titerator_type14titerator_type__1)(void *__anonymous_object62, void *__anonymous_object63), __attribute__ ((unused)) void (*___constructor__PF_R14titerator_type__1)(void *__anonymous_object64), __attribute__ ((unused)) void (*___constructor__PF_R14titerator_type14titerator_type__1)(void *__anonymous_object65, void *__anonymous_object66), __attribute__ ((unused)) void (*___destructor__PF_R14titerator_type__1)(void *__anonymous_object67), __attribute__ ((unused)) void *(*___operator_assign__PF9telt_type_R9telt_type9telt_type__1)(void *__anonymous_object68, void *__anonymous_object69), __attribute__ ((unused)) void (*___constructor__PF_R9telt_type__1)(void *__anonymous_object70), __attribute__ ((unused)) void (*___constructor__PF_R9telt_type9telt_type__1)(void *__anonymous_object71, void *__anonymous_object72), __attribute__ ((unused)) void (*___destructor__PF_R9telt_type__1)(void *__anonymous_object73), __attribute__ ((unused)) void *(*___operator_preincr__PF14titerator_type_R14titerator_type__1)(void *__anonymous_object74), __attribute__ ((unused)) void *(*___operator_predecr__PF14titerator_type_R14titerator_type__1)(void *__anonymous_object75), __attribute__ ((unused)) signed int (*___operator_equal__PFi_14titerator_type14titerator_type__1)(void *__anonymous_object76, void *__anonymous_object77), __attribute__ ((unused)) signed int (*___operator_notequal__PFi_14titerator_type14titerator_type__1)(void *__anonymous_object78, void *__anonymous_object79), __attribute__ ((unused)) void *(*___operator_deref__PFR9telt_type_14titerator_type__1)(void *__anonymous_object80), void *__begin__14titerator_type_1, void *__end__14titerator_type_1, void (*__func__PF_9telt_type__1)(void *__anonymous_object81));
     
    13711377}
    13721378static inline int invoke_main(int argc, char* argv[], char* envp[]) { (void)argc; (void)argv; (void)envp; return __main__Fi___1(); }
     1379__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned int __size);
     1380__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
     1381__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
     1382__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
     1383__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
     1384extern signed int printf(const char *__restrict __format, ...);
    13731385static inline signed int invoke_main(signed int argc, char **argv, char **envp);
    13741386signed int main(signed int __argc__i_1, char **__argv__PPc_1, char **__envp__PPc_1){
  • src/tests/.expect/64/KRfunctions.txt

    r0fe4e62 rf5c3b6c  
     1__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned long int __size);
     2__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
     3__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
     4__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
     5__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
     6extern signed int printf(const char *__restrict __format, ...);
    17signed int __f0__Fi_iPCii__1(signed int __a__i_1, const signed int *__b__PCi_1, signed int __c__i_1){
    28    __attribute__ ((unused)) signed int ___retval_f0__i_1;
  • src/tests/.expect/64/attributes.txt

    r0fe4e62 rf5c3b6c  
     1__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned long int __size);
     2__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
     3__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
     4__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
     5__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
     6extern signed int printf(const char *__restrict __format, ...);
    17signed int __la__Fi___1(){
    28    __attribute__ ((unused)) signed int ___retval_la__i_1;
  • src/tests/.expect/64/declarationSpecifier.txt

    r0fe4e62 rf5c3b6c  
     1__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned long int __size);
     2__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
     3__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
     4__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
     5__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
     6extern signed int printf(const char *__restrict __format, ...);
    17volatile const signed short int __x1__CVs_1;
    28static volatile const signed short int __x2__CVs_1;
     
    695701}
    696702static inline int invoke_main(int argc, char* argv[], char* envp[]) { (void)argc; (void)argv; (void)envp; return __main__Fi_iPPCc__1(argc, argv); }
     703__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned long int __size);
     704__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
     705__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
     706__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
     707__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
     708extern signed int printf(const char *__restrict __format, ...);
    697709static inline signed int invoke_main(signed int argc, char **argv, char **envp);
    698710signed int main(signed int __argc__i_1, char **__argv__PPc_1, char **__envp__PPc_1){
  • src/tests/.expect/64/extension.txt

    r0fe4e62 rf5c3b6c  
     1__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned long int __size);
     2__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
     3__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
     4__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
     5__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
     6extern signed int printf(const char *__restrict __format, ...);
    17__extension__ signed int __a__i_1;
    28__extension__ signed int __b__i_1;
  • src/tests/.expect/64/gccExtensions.txt

    r0fe4e62 rf5c3b6c  
     1__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned long int __size);
     2__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
     3__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
     4__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
     5__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
     6extern signed int printf(const char *__restrict __format, ...);
    17extern signed int __x__i_1 asm ( "xx" );
    28signed int __main__Fi_iPPCc__1(signed int __argc__i_1, const char **__argv__PPCc_1){
     
    168174}
    169175static inline int invoke_main(int argc, char* argv[], char* envp[]) { (void)argc; (void)argv; (void)envp; return __main__Fi_iPPCc__1(argc, argv); }
     176__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned long int __size);
     177__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
     178__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
     179__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
     180__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
     181extern signed int printf(const char *__restrict __format, ...);
    170182static inline signed int invoke_main(signed int argc, char **argv, char **envp);
    171183signed int main(signed int __argc__i_1, char **__argv__PPc_1, char **__envp__PPc_1){
  • src/tests/.expect/64/literals.txt

    r0fe4e62 rf5c3b6c  
     1__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned long int __size);
     2__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
     3__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
     4__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
     5__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
     6extern signed int printf(const char *__restrict __format, ...);
    17void __for_each__A0_2_0_0____operator_assign__PFd0_Rd0d0____constructor__PF_Rd0____constructor__PF_Rd0d0____destructor__PF_Rd0____operator_assign__PFd1_Rd1d1____constructor__PF_Rd1____constructor__PF_Rd1d1____destructor__PF_Rd1____operator_preincr__PFd0_Rd0____operator_predecr__PFd0_Rd0____operator_equal__PFi_d0d0____operator_notequal__PFi_d0d0____operator_deref__PFRd1_d0__F_d0d0PF_d1___1(__attribute__ ((unused)) void (*_adapterF_9telt_type__P)(void (*__anonymous_object0)(), void *__anonymous_object1), __attribute__ ((unused)) void *(*_adapterFP9telt_type_14titerator_type_M_P)(void (*__anonymous_object2)(), void *__anonymous_object3), __attribute__ ((unused)) signed int (*_adapterFi_14titerator_type14titerator_type_M_PP)(void (*__anonymous_object4)(), void *__anonymous_object5, void *__anonymous_object6), __attribute__ ((unused)) void (*_adapterF14titerator_type_P14titerator_type_P_M)(void (*__anonymous_object7)(), __attribute__ ((unused)) void *___retval__operator_preincr__14titerator_type_1, void *__anonymous_object8), __attribute__ ((unused)) void (*_adapterF_P9telt_type9telt_type__MP)(void (*__anonymous_object9)(), void *__anonymous_object10, void *__anonymous_object11), __attribute__ ((unused)) void (*_adapterF9telt_type_P9telt_type9telt_type_P_MP)(void (*__anonymous_object12)(), __attribute__ ((unused)) void *___retval__operator_assign__9telt_type_1, void *__anonymous_object13, void *__anonymous_object14), __attribute__ ((unused)) void (*_adapterF_P14titerator_type14titerator_type__MP)(void (*__anonymous_object15)(), void *__anonymous_object16, void *__anonymous_object17), __attribute__ ((unused)) void (*_adapterF14titerator_type_P14titerator_type14titerator_type_P_MP)(void (*__anonymous_object18)(), __attribute__ ((unused)) void *___retval__operator_assign__14titerator_type_1, void *__anonymous_object19, void *__anonymous_object20), __attribute__ ((unused)) unsigned long int _sizeof_14titerator_type, __attribute__ ((unused)) unsigned long int _alignof_14titerator_type, __attribute__ ((unused)) unsigned long int _sizeof_9telt_type, __attribute__ ((unused)) unsigned long int _alignof_9telt_type, __attribute__ ((unused)) void *(*___operator_assign__PF14titerator_type_R14titerator_type14titerator_type__1)(void *__anonymous_object21, void *__anonymous_object22), __attribute__ ((unused)) void (*___constructor__PF_R14titerator_type__1)(void *__anonymous_object23), __attribute__ ((unused)) void (*___constructor__PF_R14titerator_type14titerator_type__1)(void *__anonymous_object24, void *__anonymous_object25), __attribute__ ((unused)) void (*___destructor__PF_R14titerator_type__1)(void *__anonymous_object26), __attribute__ ((unused)) void *(*___operator_assign__PF9telt_type_R9telt_type9telt_type__1)(void *__anonymous_object27, void *__anonymous_object28), __attribute__ ((unused)) void (*___constructor__PF_R9telt_type__1)(void *__anonymous_object29), __attribute__ ((unused)) void (*___constructor__PF_R9telt_type9telt_type__1)(void *__anonymous_object30, void *__anonymous_object31), __attribute__ ((unused)) void (*___destructor__PF_R9telt_type__1)(void *__anonymous_object32), __attribute__ ((unused)) void *(*___operator_preincr__PF14titerator_type_R14titerator_type__1)(void *__anonymous_object33), __attribute__ ((unused)) void *(*___operator_predecr__PF14titerator_type_R14titerator_type__1)(void *__anonymous_object34), __attribute__ ((unused)) signed int (*___operator_equal__PFi_14titerator_type14titerator_type__1)(void *__anonymous_object35, void *__anonymous_object36), __attribute__ ((unused)) signed int (*___operator_notequal__PFi_14titerator_type14titerator_type__1)(void *__anonymous_object37, void *__anonymous_object38), __attribute__ ((unused)) void *(*___operator_deref__PFR9telt_type_14titerator_type__1)(void *__anonymous_object39), void *__begin__14titerator_type_1, void *__end__14titerator_type_1, void (*__func__PF_9telt_type__1)(void *__anonymous_object40));
    28void __for_each_reverse__A0_2_0_0____operator_assign__PFd0_Rd0d0____constructor__PF_Rd0____constructor__PF_Rd0d0____destructor__PF_Rd0____operator_assign__PFd1_Rd1d1____constructor__PF_Rd1____constructor__PF_Rd1d1____destructor__PF_Rd1____operator_preincr__PFd0_Rd0____operator_predecr__PFd0_Rd0____operator_equal__PFi_d0d0____operator_notequal__PFi_d0d0____operator_deref__PFRd1_d0__F_d0d0PF_d1___1(__attribute__ ((unused)) void (*_adapterF_9telt_type__P)(void (*__anonymous_object41)(), void *__anonymous_object42), __attribute__ ((unused)) void *(*_adapterFP9telt_type_14titerator_type_M_P)(void (*__anonymous_object43)(), void *__anonymous_object44), __attribute__ ((unused)) signed int (*_adapterFi_14titerator_type14titerator_type_M_PP)(void (*__anonymous_object45)(), void *__anonymous_object46, void *__anonymous_object47), __attribute__ ((unused)) void (*_adapterF14titerator_type_P14titerator_type_P_M)(void (*__anonymous_object48)(), __attribute__ ((unused)) void *___retval__operator_preincr__14titerator_type_1, void *__anonymous_object49), __attribute__ ((unused)) void (*_adapterF_P9telt_type9telt_type__MP)(void (*__anonymous_object50)(), void *__anonymous_object51, void *__anonymous_object52), __attribute__ ((unused)) void (*_adapterF9telt_type_P9telt_type9telt_type_P_MP)(void (*__anonymous_object53)(), __attribute__ ((unused)) void *___retval__operator_assign__9telt_type_1, void *__anonymous_object54, void *__anonymous_object55), __attribute__ ((unused)) void (*_adapterF_P14titerator_type14titerator_type__MP)(void (*__anonymous_object56)(), void *__anonymous_object57, void *__anonymous_object58), __attribute__ ((unused)) void (*_adapterF14titerator_type_P14titerator_type14titerator_type_P_MP)(void (*__anonymous_object59)(), __attribute__ ((unused)) void *___retval__operator_assign__14titerator_type_1, void *__anonymous_object60, void *__anonymous_object61), __attribute__ ((unused)) unsigned long int _sizeof_14titerator_type, __attribute__ ((unused)) unsigned long int _alignof_14titerator_type, __attribute__ ((unused)) unsigned long int _sizeof_9telt_type, __attribute__ ((unused)) unsigned long int _alignof_9telt_type, __attribute__ ((unused)) void *(*___operator_assign__PF14titerator_type_R14titerator_type14titerator_type__1)(void *__anonymous_object62, void *__anonymous_object63), __attribute__ ((unused)) void (*___constructor__PF_R14titerator_type__1)(void *__anonymous_object64), __attribute__ ((unused)) void (*___constructor__PF_R14titerator_type14titerator_type__1)(void *__anonymous_object65, void *__anonymous_object66), __attribute__ ((unused)) void (*___destructor__PF_R14titerator_type__1)(void *__anonymous_object67), __attribute__ ((unused)) void *(*___operator_assign__PF9telt_type_R9telt_type9telt_type__1)(void *__anonymous_object68, void *__anonymous_object69), __attribute__ ((unused)) void (*___constructor__PF_R9telt_type__1)(void *__anonymous_object70), __attribute__ ((unused)) void (*___constructor__PF_R9telt_type9telt_type__1)(void *__anonymous_object71, void *__anonymous_object72), __attribute__ ((unused)) void (*___destructor__PF_R9telt_type__1)(void *__anonymous_object73), __attribute__ ((unused)) void *(*___operator_preincr__PF14titerator_type_R14titerator_type__1)(void *__anonymous_object74), __attribute__ ((unused)) void *(*___operator_predecr__PF14titerator_type_R14titerator_type__1)(void *__anonymous_object75), __attribute__ ((unused)) signed int (*___operator_equal__PFi_14titerator_type14titerator_type__1)(void *__anonymous_object76, void *__anonymous_object77), __attribute__ ((unused)) signed int (*___operator_notequal__PFi_14titerator_type14titerator_type__1)(void *__anonymous_object78, void *__anonymous_object79), __attribute__ ((unused)) void *(*___operator_deref__PFR9telt_type_14titerator_type__1)(void *__anonymous_object80), void *__begin__14titerator_type_1, void *__end__14titerator_type_1, void (*__func__PF_9telt_type__1)(void *__anonymous_object81));
     
    13711377}
    13721378static inline int invoke_main(int argc, char* argv[], char* envp[]) { (void)argc; (void)argv; (void)envp; return __main__Fi___1(); }
     1379__attribute__ ((__nothrow__,__leaf__,__malloc__)) extern void *malloc(unsigned long int __size);
     1380__attribute__ ((__nothrow__,__leaf__)) extern void free(void *__ptr);
     1381__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void abort(void);
     1382__attribute__ ((__nothrow__,__leaf__,__nonnull__(1))) extern signed int atexit(void (*__func)(void));
     1383__attribute__ ((__nothrow__,__leaf__,__noreturn__)) extern void exit(signed int __status);
     1384extern signed int printf(const char *__restrict __format, ...);
    13731385static inline signed int invoke_main(signed int argc, char **argv, char **envp);
    13741386signed int main(signed int __argc__i_1, char **__argv__PPc_1, char **__envp__PPc_1){
  • src/tests/boundedBuffer.c

    r0fe4e62 rf5c3b6c  
    1 //
     1// 
    22// The contents of this file are covered under the licence agreement in the
    33// file "LICENCE" distributed with Cforall.
    4 //
    5 // boundedBuffer.c --
    6 //
     4// 
     5// boundedBuffer.c -- 
     6// 
    77// Author           : Peter A. Buhr
    88// Created On       : Mon Oct 30 12:45:13 2017
     
    1010// Last Modified On : Mon Oct 30 23:02:46 2017
    1111// Update Count     : 9
    12 //
     12// 
    1313
    1414#include <stdlib>
     
    3131
    3232void insert( Buffer & mutex buffer, int elem ) {
    33         if ( buffer.count == 20 ) wait( buffer.empty );
     33        if ( buffer.count == 20 ) wait( &buffer.empty );
    3434        buffer.elements[buffer.back] = elem;
    3535        buffer.back = ( buffer.back + 1 ) % 20;
    3636        buffer.count += 1;
    37         signal( buffer.full );
     37        signal( &buffer.full );
    3838}
    3939int remove( Buffer & mutex buffer ) {
    40         if ( buffer.count == 0 ) wait( buffer.full );
     40        if ( buffer.count == 0 ) wait( &buffer.full );
    4141        int elem = buffer.elements[buffer.front];
    4242        buffer.front = ( buffer.front + 1 ) % 20;
    4343        buffer.count -= 1;
    44         signal( buffer.empty );
     44        signal( &buffer.empty );
    4545        return elem;
    4646}
  • src/tests/datingService.c

    r0fe4e62 rf5c3b6c  
    1 //                               -*- Mode: C -*-
    2 //
     1//                               -*- Mode: C -*- 
     2// 
    33// The contents of this file are covered under the licence agreement in the
    44// file "LICENCE" distributed with Cforall.
    5 //
    6 // datingService.c --
    7 //
     5// 
     6// datingService.c -- 
     7// 
    88// Author           : Peter A. Buhr
    99// Created On       : Mon Oct 30 12:56:20 2017
     
    1111// Last Modified On : Mon Oct 30 23:02:11 2017
    1212// Update Count     : 15
    13 //
     13// 
    1414
    1515#include <stdlib>                                                                               // random
     
    1818#include <thread>
    1919#include <unistd.h>                                                                             // getpid
     20
     21bool empty( condition & c ) {
     22        return c.blocked.head == NULL;
     23}
    2024
    2125enum { NoOfPairs = 20 };
     
    2731
    2832unsigned int girl( DatingService & mutex ds, unsigned int PhoneNo, unsigned int ccode ) {
    29         if ( is_empty( ds.Boys[ccode] ) ) {
    30                 wait( ds.Girls[ccode] );
     33        if ( empty( ds.Boys[ccode] ) ) {
     34                wait( &ds.Girls[ccode] );
    3135                ds.GirlPhoneNo = PhoneNo;
    3236        } else {
    3337                ds.GirlPhoneNo = PhoneNo;
    34                 signal_block( ds.Boys[ccode] );
     38                signal_block( &ds.Boys[ccode] );
    3539        } // if
    3640        return ds.BoyPhoneNo;
     
    3842
    3943unsigned int boy( DatingService & mutex ds, unsigned int PhoneNo, unsigned int ccode ) {
    40         if ( is_empty( ds.Girls[ccode] ) ) {
    41                 wait( ds.Boys[ccode] );
     44        if ( empty( ds.Girls[ccode] ) ) {
     45                wait( &ds.Boys[ccode] );
    4246                ds.BoyPhoneNo = PhoneNo;
    4347        } else {
    4448                ds.BoyPhoneNo = PhoneNo;
    45                 signal_block( ds.Girls[ccode] );
     49                signal_block( &ds.Girls[ccode] );
    4650        } // if
    4751        return ds.GirlPhoneNo;
  • src/tests/sched-int-barge.c

    r0fe4e62 rf5c3b6c  
    7373        if( action == c.do_wait1 || action == c.do_wait2 ) {
    7474                c.state = WAIT;
    75                 wait( cond );
     75                wait( &cond );
    7676
    7777                if(c.state != SIGNAL) {
     
    8383                c.state = SIGNAL;
    8484
    85                 signal( cond );
    86                 signal( cond );
     85                signal( &cond );
     86                signal( &cond );
    8787        }
    8888        else {
  • src/tests/sched-int-block.c

    r0fe4e62 rf5c3b6c  
    4747//------------------------------------------------------------------------------
    4848void wait_op( global_data_t & mutex a, global_data_t & mutex b, unsigned i ) {
    49         wait( cond, (uintptr_t)this_thread );
     49        wait( &cond, (uintptr_t)this_thread );
    5050
    5151        yield( random( 10 ) );
     
    7474        [a.last_thread, b.last_thread, a.last_signaller, b.last_signaller] = this_thread;
    7575
    76         if( !is_empty( cond ) ) {
     76        if( !is_empty( &cond ) ) {
    7777
    78                 thread_desc * next = front( cond );
     78                thread_desc * next = front( &cond );
    7979
    80                 if( ! signal_block( cond ) ) {
     80                if( ! signal_block( &cond ) ) {
    8181                        sout | "ERROR expected to be able to signal" | endl;
    8282                        abort();
  • src/tests/sched-int-disjoint.c

    r0fe4e62 rf5c3b6c  
    5959// Waiting logic
    6060bool wait( global_t & mutex m, global_data_t & mutex d ) {
    61         wait( cond );
     61        wait( &cond );
    6262        if( d.state != SIGNAL ) {
    6363                sout | "ERROR barging!" | endl;
     
    8080//------------------------------------------------------------------------------
    8181// Signalling logic
    82 void signal( condition & cond, global_t & mutex a, global_data_t & mutex b ) {
     82void signal( condition * cond, global_t & mutex a, global_data_t & mutex b ) {
    8383        b.state = SIGNAL;
    8484        signal( cond );
     
    8686
    8787void logic( global_t & mutex a ) {
    88         signal( cond, a, data );
     88        signal( &cond, a, data );
    8989
    9090        yield( random( 10 ) );
  • src/tests/sched-int-wait.c

    r0fe4e62 rf5c3b6c  
    4141//----------------------------------------------------------------------------------------------------
    4242// Tools
    43 void signal( condition & cond, global_t & mutex a, global_t & mutex b ) {
     43void signal( condition * cond, global_t & mutex a, global_t & mutex b ) {
    4444        signal( cond );
    4545}
    4646
    47 void signal( condition & cond, global_t & mutex a, global_t & mutex b, global_t & mutex c ) {
     47void signal( condition * cond, global_t & mutex a, global_t & mutex b, global_t & mutex c ) {
    4848        signal( cond );
    4949}
    5050
    51 void wait( condition & cond, global_t & mutex a, global_t & mutex b ) {
     51void wait( condition * cond, global_t & mutex a, global_t & mutex b ) {
    5252        wait( cond );
    5353}
    5454
    55 void wait( condition & cond, global_t & mutex a, global_t & mutex b, global_t & mutex c ) {
     55void wait( condition * cond, global_t & mutex a, global_t & mutex b, global_t & mutex c ) {
    5656        wait( cond );
    5757}
     
    6565                switch( action ) {
    6666                        case 0:
    67                                 signal( condABC, globalA, globalB, globalC );
     67                                signal( &condABC, globalA, globalB, globalC );
    6868                                break;
    6969                        case 1:
    70                                 signal( condAB , globalA, globalB );
     70                                signal( &condAB , globalA, globalB );
    7171                                break;
    7272                        case 2:
    73                                 signal( condBC , globalB, globalC );
     73                                signal( &condBC , globalB, globalC );
    7474                                break;
    7575                        case 3:
    76                                 signal( condAC , globalA, globalC );
     76                                signal( &condAC , globalA, globalC );
    7777                                break;
    7878                        default:
     
    8888void main( WaiterABC & this ) {
    8989        for( int i = 0; i < N; i++ ) {
    90                 wait( condABC, globalA, globalB, globalC );
     90                wait( &condABC, globalA, globalB, globalC );
    9191        }
    9292
     
    9898void main( WaiterAB & this ) {
    9999        for( int i = 0; i < N; i++ ) {
    100                 wait( condAB , globalA, globalB );
     100                wait( &condAB , globalA, globalB );
    101101        }
    102102
     
    108108void main( WaiterAC & this ) {
    109109        for( int i = 0; i < N; i++ ) {
    110                 wait( condAC , globalA, globalC );
     110                wait( &condAC , globalA, globalC );
    111111        }
    112112
     
    118118void main( WaiterBC & this ) {
    119119        for( int i = 0; i < N; i++ ) {
    120                 wait( condBC , globalB, globalC );
     120                wait( &condBC , globalB, globalC );
    121121        }
    122122
  • src/tests/thread.c

    r0fe4e62 rf5c3b6c  
    1515                yield();
    1616        }
    17         V(*this.lock);
     17        V(this.lock);
    1818}
    1919
    2020void main(Second& this) {
    21         P(*this.lock);
     21        P(this.lock);
    2222        for(int i = 0; i < 10; i++) {
    2323                sout | "Second : Suspend No." | i + 1 | endl;
Note: See TracChangeset for help on using the changeset viewer.