Changeset 74ec742

benchmark/Cargo.toml.in

-              r29d8c02
+              r74ec742
 authors = ["Cforall"]
 edition = "2018"
+[[bin]]
+name = "rdq-churn-tokio"
+path = "@abs_srcdir@/readyQ/churn.rs"
 [[bin]]

benchmark/Makefile.am

-              r29d8c02
+              r74ec742
 RDQBENCHES = \
         rdq-churn-cfa \
+        rdq-churn-tokio \
+        rdq-churn-go \
+        rdq-churn-fibre \
         rdq-cycle-cfa \
         rdq-cycle-tokio \

benchmark/plot.py

-              r29d8c02
+              r74ec742
 class Field:
         def __init__(self, unit, _min, _log):
+        def __init__(self, unit, _min, _log, _name=None):
                 self.unit = unit
                 self.min  = _min
                 self.log  = _log
+                self.name = _name
 field_names = {
 …
         "Ops per procs"         : Field('Ops'   , 0, False),
         "Ops per threads"       : Field('Ops'   , 0, False),
         "ns per ops/procs"      : Field('ns'    , 0, False),
         "Number of threads"     : Field('thrd'  , 1, False),
+        "ns per ops/procs"      : Field(''    , 0, False, _name = "Latency (ns $/$ (Processor $\\times$ Operation))" ),
+        "Number of threads"     : Field(''      , 1, False),
         "Total Operations(ops)" : Field('Ops'   , 0, False),
         "Ops/sec/procs"         : Field('Ops'   , 0, False),
         "Total blocks"          : Field('Blocks', 0, False),
         "Ops per second"        : Field('Ops'   , 0, False),
+        "Ops per second"        : Field(''   , 0, False),
         "Cycle size (# thrds)"  : Field('thrd'  , 1, False),
         "Duration (ms)"         : Field('ms'    , 0, False),
+        "Target QPS"            : Field('QPS'   , 0, False),
+        "Actual QPS"            : Field('QPS'   , 0, False),
+        "Target QPS"            : Field(''      , 0, False),
+        "Actual QPS"            : Field(''      , 0, False),
+        "Average Read Latency"  : Field('us'    , 0, True),
         "Median Read Latency"   : Field('us'    , 0, True),
         "Tail Read Latency"     : Field('us'    , 0, True),
+        "Average Update Latency": Field('us'    , 0, True),
         "Median Update Latency" : Field('us'    , 0, True),
         "Tail Update Latency"   : Field('us'    , 0, True),
+        "Update Ratio"          : Field('\%'    , 0, False),
+}
 def plot(in_data, x, y, out):
+def plot(in_data, x, y, options):
         fig, ax = plt.subplots()
         colors = itertools.cycle(['#0095e3','#006cb4','#69df00','#0aa000','#fb0300','#e30002','#fd8f00','#ff7f00','#8f00d6','#4b009a','#ffff00','#b13f00'])
 …
         print("Making Plots")
         for name, data in series.items():
+        for name, data in sorted(series.items()):
                 _col = next(colors)
                 plt.scatter(data['x'], data['y'], color=_col, label=name, marker='x')
 …
         print("Finishing Plots")
         plt.ylabel(y)
+        plt.ylabel(field_names[y].name if field_names[y].name else y)
         # plt.xticks(range(1, math.ceil(mx) + 1))
         plt.xlabel(x)
+        plt.xlabel(field_names[x].name if field_names[x].name else x)
         plt.grid(b = True)
         ax.xaxis.set_major_formatter( EngFormatter(unit=field_names[x].unit) )
+        if field_names[x].log:
+        if options.logx:
+                ax.set_xscale('log')
+        elif field_names[x].log:
                 ax.set_xscale('log')
         else:
 …
         ax.yaxis.set_major_formatter( EngFormatter(unit=field_names[y].unit) )
+        if field_names[y].log:
+        if options.logy:
+                ax.set_yscale('log')
+        elif field_names[y].log:
                 ax.set_yscale('log')
         else:
                 plt.ylim(field_names[y].min, my*1.2)
+                plt.ylim(field_names[y].min, options.MaxY if options.MaxY else my*1.2)
         plt.legend(loc='upper left')
         print("Results Ready")
         if out:
                 plt.savefig(out)
+        if options.out:
+                plt.savefig(options.out, bbox_inches='tight')
         else:
                 plt.show()
 …
         parser.add_argument('-y', nargs='?', type=str, default="", help="Which field to use as the Y axis")
         parser.add_argument('-x', nargs='?', type=str, default="", help="Which field to use as the X axis")
+        parser.add_argument('--logx', action='store_true', help="if set, makes the x-axis logscale")
+        parser.add_argument('--logy', action='store_true', help="if set, makes the y-axis logscale")
+        parser.add_argument('--MaxY', nargs='?', type=int, help="maximum value of the y-axis")
         options =  parser.parse_args()
 …
         plot(data, wantx, wanty, options.out)
+        plot(data, wantx, wanty, options)

benchmark/process-mutilate.py

-              r29d8c02
+              r74ec742
 parser = argparse.ArgumentParser(description='Python Script to convert output from mutilate to rmit like output')
 parser.add_argument('--out', nargs='?', type=argparse.FileType('w'), default=sys.stdout)
+parser.add_argument('--var', nargs='?', type=str, default='Target QPS')
 try:
         options =  parser.parse_args()
 …
         try:
+                latAvs = fields[1]
                 lat50s = fields[6]
                 lat99s = fields[9]
 …
         try:
+                latAv = locale.atof(latAvs)
                 lat50 = locale.atof(lat50s)
                 lat99 = locale.atof(lat99s)
 …
                 raise Warning("Warning: \"{}\" \"{}\"! can't convert to float".format(lat50s, lat99s))
         return lat50, lat99
+        return latAv, lat50, lat99
 def want0(line):
 …
                 try:
                         if   line.startswith("read"):
                                 rlat50, rlat99 = precentile(line)
+                                rlatAv, rlat50, rlat99 = precentile(line)
                         elif line.startswith("update"):
                                 ulat50, ulat99 = precentile(line)
+                                ulatAv, ulat50, ulat99 = precentile(line)
                         elif line.startswith("Total QPS"):
 …
         try:
+                out['Average Read Latency'] = rlatAv
                 out['Median Read Latency'] = rlat50
                 out['Tail Read Latency'] = rlat99
 …
         try:
+                out['Average Update Latency'] = ulatAv
                 out['Median Update Latency'] = ulat50
                 out['Tail Update Latency'] = ulat99
 …
                         continue
                 d = { 'Target QPS': int(rate) }
+                d = { options.var : int(rate) }
                 w = extract( f, d )

benchmark/readyQ/bench.go

r29d8c02	r74ec742
71	71	duration = 5
72	72	clock_mode = true
73		fmt.Printf("Running for %f seconds\n", duration)
	73	fmt.Printf("Running for %f seconds (default)\n", duration)
74	74	}
75	75

benchmark/readyQ/churn.cfa

-              r29d8c02
+              r74ec742
 #include "rq_bench.hfa"
+#include <locks.hfa>
 unsigned spot_cnt = 2;
 bench_sem * volatile * spots;
+semaphore * spots;
 thread BThrd {
         unsigned long long count;
         unsigned long long blocks;
         bench_sem sem;
+        bool skip;
 };
 …
         this.count  = 0;
         this.blocks = 0;
+        this.skip = false;
+}
 …
 void main( BThrd & this ) with( this ) {
         wait( sem );
+        park();
         for() {
+                uint32_t r = prng();
+                bench_sem * next = __atomic_exchange_n(&spots[r % spot_cnt], &sem, __ATOMIC_SEQ_CST);
+                if(next) post( *next );
+                blocks += wait( sem );
+                uint32_t r = prng(this);
+                semaphore & sem = spots[r % spot_cnt];
+                if(!skip) V( sem );
+                blocks += P( sem );
+                skip = false;
                 count ++;
                 if( clock_mode && stop) break;
 …
                 { 's', "spots", "Number of spots in the system", spot_cnt }
         };
         BENCH_OPT_PARSE("cforall cycle benchmark");
+        BENCH_OPT_PARSE("cforall churn benchmark");
+        {
 …
                         spots = aalloc(spot_cnt);
                         for(i; spot_cnt) {
                                 spots[i] = 0p;
+                                (spots[i]){ 0 };
+                        }
 …
                         BThrd * threads[nthreads];
                         for(i; nthreads ) {
+                                threads[i] = malloc();
+                                (*threads[i]){};
+                                BThrd & t = *(threads[i] = malloc());
+                                (t){};
+                                t.skip = i < spot_cnt;
+                        }
                         printf("Starting\n");
 …
                         for(i; nthreads) {
                                 post( threads[i]->sem );
+                                unpark( *threads[i] );
+                        }
                         wait(start, is_tty);
 …
                         printf("\nDone\n");
+                        for(i; spot_cnt) {
+                                for(10000) V( spots[i] );
+                        }
                         for(i; nthreads) {
-                                post( threads[i]->sem );
                                 BThrd & thrd = join( *threads[i] );
                                 global_counter += thrd.count;

benchmark/readyQ/cycle.cpp

r29d8c02	r74ec742
46	46	}
47	47	for(unsigned i = 0; i < tthreads; i++) {
48		threads[i] = new Fibre( reinterpret_cast<void ()(void )>(partner_main), &thddata[i] );
	48	threads[i] = new Fibre();
	49	threads[i]->run( partner_main, &thddata[i] );
49	50	}
50	51	printf("Starting\n");

benchmark/readyQ/rq_bench.hpp

-              r29d8c02
+              r74ec742
+        }
+        if(strcmp(arg, "Y") == 0) {
+                value = true;
+                return true;
+        }
+        if(strcmp(arg, "y") == 0) {
+                value = true;
+                return true;
+        }
         if(strcmp(arg, "no") == 0) {
+                value = false;
+                return true;
+        }
+        if(strcmp(arg, "N") == 0) {
+                value = false;
+                return true;
+        }
+        if(strcmp(arg, "n") == 0) {
                 value = false;
                 return true;

benchmark/readyQ/transfer.cfa

-              r29d8c02
+              r74ec742
 bool exhaust = false;
+volatile bool estop = false;
 thread$ * the_main;
 …
 static void waitgroup() {
         Time start = timeHiRes();
         for(i; nthreads) {
+        OUTER: for(i; nthreads) {
                 PRINT( sout | "Waiting for :" | i | "(" | threads[i]->idx | ")"; )
                 while( threads[i]->idx != lead_idx ) {
 …
                                 print_stats_now( bench_cluster, CFA_STATS_READY_Q | CFA_STATS_IO );
                                 serr | "Programs has been blocked for more than 5 secs";
+                                exit(1);
+                                estop = true;
+                                unpark( the_main );
+                                break OUTER;
+                        }
+                }
 …
 static void lead(MyThread & this) {
         this.idx = ++lead_idx;
         if(lead_idx > stop_count) {
+        if(lead_idx > stop_count || estop) {
                 PRINT( sout | "Leader" | this.id | "done"; )
                 unpark( the_main );
 …
                         wait( this );
+                }
                 if(lead_idx > stop_count) break;
+                if(lead_idx > stop_count || estop) break;
+        }
+}
 …
         sout | "Number of processors    : " | nprocs;
         sout | "Number of threads       : " | nthreads;
         sout | "Total Operations(ops)   : " | stop_count;
+        sout | "Total Operations(ops)   : " | lead_idx - 1;
         sout | "Threads parking on wait : " | (exhaust ? "yes" : "no");
         sout | "Rechecking              : " | rechecks;
+        sout | "ns per transfer         : " | (end - start)`dms / lead_idx;

benchmark/readyQ/transfer.cpp

-              r29d8c02
+              r74ec742
 bool exhaust = false;
+volatile bool estop = false;
 bench_sem the_main;
 …
                                 if( to_miliseconds(timeHiRes() - start) > 5'000 ) {
                                         std::cerr << "Programs has been blocked for more than 5 secs" << std::endl;
+                                        std::exit(1);
+                                        estop = true;
+                                        the_main.post();
+                                        goto END;
+                                }
+                        }
+                }
+                END:;
                 PRINT( std::cout | "Waiting done"; )
+        }
 …
         void lead() {
                 this->idx = ++lead_idx;
                 if(lead_idx > stop_count) {
+                if(lead_idx > stop_count || estop) {
                         PRINT( std::cout << "Leader " << this->id << " done" << std::endl; )
                         the_main.post();
 …
+        }
         static void main(void * arg) {
                 MyThread & self = *reinterpret_cast<MyThread*>(arg);
+        static void main(MyThread * arg) {
+                MyThread & self = *arg;
                 self.park();
 …
                                 self.wait();
+                        }
                         if(lead_idx > stop_count) break;
+                        if(lead_idx > stop_count || estop) break;
+                }
+        }
 …
                         for(size_t i = 0; i < nthreads; i++) {
                                 threads[i] = new MyThread( i );
+                                handles[i] = new Fibre( MyThread::main, threads[i] );
+                                handles[i] = new Fibre();
+                                handles[i]->run( MyThread::main, threads[i] );
+                        }
 …
                                 PRINT( std::cout << i << " joined" << std::endl; )
                                 rechecks += thrd.rechecks;
-                                // delete( handles[i] );
                                 delete( threads[i] );
+                        }
 …
         std::cout << "Number of processors    : " << nprocs << std::endl;
         std::cout << "Number of threads       : " << nthreads << std::endl;
         std::cout << "Total Operations(ops)   : " << stop_count << std::endl;
+        std::cout << "Total Operations(ops)   : " << (lead_idx - 1) << std::endl;
         std::cout << "Threads parking on wait : " << (exhaust ? "yes" : "no") << std::endl;
         std::cout << "Rechecking              : " << rechecks << std::endl;
+        std::cout << "ns per transfer         : " << std::fixed << (((double)(end - start)) / (lead_idx)) << std::endl;

benchmark/readyQ/transfer.go

-              r29d8c02
+              r74ec742
         "math/rand"
         "os"
+        "regexp"
         "runtime"
         "sync/atomic"
 …
         id uint64
         idx uint64
+        estop uint64
         seed uint64
+}
 …
 func NewLeader(size uint64) (*LeaderInfo) {
         this := &LeaderInfo{0, 0, uint64(os.Getpid())}
+        this := &LeaderInfo{0, 0, 0, uint64(os.Getpid())}
         r := rand.Intn(10)
 …
+}
 func waitgroup(idx uint64, threads [] MyThread) {
+func waitgroup(leader * LeaderInfo, idx uint64, threads [] MyThread, main_sem chan struct {}) {
         start := time.Now()
+        Outer:
         for i := 0; i < len(threads); i++ {
                 // fmt.Fprintf(os.Stderr, "Waiting for :%d (%d)\n", threads[i].id, atomic.LoadUint64(&threads[i].idx) );
 …
                         if delta.Seconds() > 5 {
                                 fmt.Fprintf(os.Stderr, "Programs has been blocked for more than 5 secs")
+                                os.Exit(1)
+                                atomic.StoreUint64(&leader.estop, 1);
+                                main_sem <- (struct {}{})
+                                break Outer
+                        }
+                }
 …
                 if i != me {
                         // debug!( "Leader waking {}", i);
+                        defer func() {
+                                if err := recover(); err != nil {
+                                        fmt.Fprintf(os.Stderr, "Panic occurred: %s\n", err)
+                                }
+                        }()
                         threads[i].sem <- (struct {}{})
+                }
 …
         atomic.StoreUint64(&leader.idx, nidx);
         if nidx > stop_count {
+        if nidx > stop_count || atomic.LoadUint64(&leader.estop) != 0 {
                 // debug!( "Leader {} done", this.id);
                 main_sem <- (struct {}{})
 …
         // debug!( "====================\nLeader no {} : {}", nidx, this.id);
         waitgroup(nidx, threads);
+        waitgroup(leader, nidx, threads, main_sem);
         leader.next( uint64(len(threads)) );
 …
                         waitleader( exhaust, leader, &threads[me], &r )
+                }
                 if atomic.LoadUint64(&leader.idx) > stop_count { break; }
+                if atomic.LoadUint64(&leader.idx) > stop_count || atomic.LoadUint64(&leader.estop) != 0 { break; }
+        }
 …
 func main() {
         // Benchmark specific command line arguments
         exhaustOpt := flag.Bool("e", false, "Whether or not threads that have seen the new epoch should park instead of yielding.")
+        exhaustOpt := flag.String("e", "no", "Whether or not threads that have seen the new epoch should park instead of yielding.")
         // General benchmark initialization and deinitialization
+        defer bench_init()()
+        exhaust := *exhaustOpt;
+        bench_init()
+        exhaustVal := *exhaustOpt;
+        var exhaust bool
+        re_yes := regexp.MustCompile("[Yy]|[Yy][Ee][Ss]")
+        re_no  := regexp.MustCompile("[Nn]|[Nn][Oo]")
+        if re_yes.Match([]byte(exhaustVal)) {
+                exhaust = true
+        } else if re_no.Match([]byte(exhaustVal)) {
+                exhaust = false
+        } else {
+                fmt.Fprintf(os.Stderr, "Unrecognized exhaust(-e) option '%s'\n", exhaustVal)
+                os.Exit(1)
+        }
         if clock_mode {
                 fmt.Fprintf(os.Stderr, "Programs does not support fixed duration mode")
+                fmt.Fprintf(os.Stderr, "Programs does not support fixed duration mode\n")
                 os.Exit(1)
+        }
 …
                 ws = "no"
+        }
         p.Printf("Duration (ms)           : %f\n", delta.Milliseconds() )
+        p.Printf("Duration (ms)           : %d\n", delta.Milliseconds() )
         p.Printf("Number of processors    : %d\n", nprocs )
         p.Printf("Number of threads       : %d\n", nthreads )
         p.Printf("Total Operations(ops)   : %15d\n", stop_count )
+        p.Printf("Total Operations(ops)   : %15d\n", (leader.idx - 1) )
         p.Printf("Threads parking on wait : %s\n", ws)
         p.Printf("Rechecking              : %d\n", rechecks )
+}
+        p.Printf("ns per transfer         : %f\n", float64(delta.Nanoseconds()) / float64(leader.idx) )
+}

benchmark/readyQ/transfer.rs

-              r29d8c02
+              r74ec742
 use std::hint;
 use std::sync::Arc;
 use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
 use std::time::{Instant,Duration};
 …
                         match val {
                                 "yes" => true,
+                                "Y" => true,
+                                "y" => true,
                                 "no"  => false,
+                                "N"  => false,
+                                "n"  => false,
                                 "maybe" | "I don't know" | "Can you repeat the question?" => {
                                         eprintln!("Lines for 'Malcolm in the Middle' are not acceptable values of parameter 'exhaust'");
 …
         id: AtomicUsize,
         idx: AtomicUsize,
+        estop: AtomicBool,
         seed: u128,
+}
 …
                         id: AtomicUsize::new(nthreads),
                         idx: AtomicUsize::new(0),
+                        estop: AtomicBool::new(false),
                         seed: process::id() as u128
                 };
 …
+}
 fn waitgroup(idx: usize, threads: &Vec<Arc<MyThread>>) {
+fn waitgroup(leader: &LeaderInfo, idx: usize, threads: &Vec<Arc<MyThread>>, main_sem: &sync::Semaphore) {
         let start = Instant::now();
         for t in threads {
+        'outer: for t in threads {
                 debug!( "Waiting for :{} ({})", t.id, t.idx.load(Ordering::Relaxed) );
                 while t.idx.load(Ordering::Relaxed) != idx {
 …
                         if start.elapsed() > Duration::from_secs(5) {
                                 eprintln!("Programs has been blocked for more than 5 secs");
+                                std::process::exit(1);
+                                leader.estop.store(true, Ordering::Relaxed);
+                                main_sem.add_permits(1);
+                                break 'outer;
+                        }
+                }
 …
         leader.idx.store(nidx, Ordering::Relaxed);
         if nidx as u64 > exp.stop_count {
+        if nidx as u64 > exp.stop_count || leader.estop.load(Ordering::Relaxed) {
                 debug!( "Leader {} done", this.id);
                 main_sem.add_permits(1);
 …
         debug!( "====================\nLeader no {} : {}", nidx, this.id);
         waitgroup(nidx, threads);
+        waitgroup(leader, nidx, threads, main_sem);
         leader.next( threads.len() );
 …
                         wait( exhaust, &leader, &threads[me], &mut rechecks ).await;
+                }
                 if leader.idx.load(Ordering::Relaxed) as u64 > exp.stop_count { break; }
+                if leader.idx.load(Ordering::Relaxed) as u64 > exp.stop_count || leader.estop.load(Ordering::Relaxed) { break; }
+        }
 …
         println!("Number of processors    : {}", (nprocs).to_formatted_string(&Locale::en));
         println!("Number of threads       : {}", (nthreads).to_formatted_string(&Locale::en));
         println!("Total Operations(ops)   : {:>15}", (exp.stop_count).to_formatted_string(&Locale::en));
+        println!("Total Operations(ops)   : {:>15}", (leader.idx.load(Ordering::Relaxed) - 1).to_formatted_string(&Locale::en));
         println!("Threads parking on wait : {}", if exhaust { "yes" } else { "no" });
         println!("Rechecking              : {}", rechecks );
+}
+        println!("ns per transfer         : {}", ((duration.as_nanos() as f64) / leader.idx.load(Ordering::Relaxed) as f64));
+}

benchmark/readyQ/yield.cfa

-              r29d8c02
+              r74ec742
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+extern "C" {
+        #include <locale.h>
+        #include <getopt.h>
+}
+#include <unistd.h>
+#include <clock.hfa>
+#include <time.hfa>
+#include <stats.hfa>
+#include "../benchcltr.hfa"
+extern bool traceHeapOn();
+volatile bool run = false;
+volatile unsigned long long global_counter;
+#include "rq_bench.hfa"
 thread __attribute__((aligned(128))) Yielder {
         unsigned long long counter;
+        unsigned long long count;
 };
 void ?{}( Yielder & this ) {
         this.counter = 0;
         ((thread&)this){ "Yielder Thread", *the_benchmark_cluster };
+        ((thread&)this){ "Yielder Thread", bench_cluster };
+        this.count = 0;
+}
 void main( Yielder & this ) {
         park();
+        /* paranoid */ assert( true == __atomic_load_n(&run, __ATOMIC_RELAXED) );
+        for() {
+                yield();
+                this.count++;
+                if( clock_mode && stop) break;
+                if(!clock_mode && this.count >= stop_count) break;
+        }
+        while(__atomic_load_n(&run, __ATOMIC_RELAXED)) {
+                yield();
+                this.counter++;
+        }
+        __atomic_fetch_add(&global_counter, this.counter, __ATOMIC_SEQ_CST);
+        __atomic_fetch_add(&threads_left, -1, __ATOMIC_SEQ_CST);
+}
 int main(int argc, char * argv[]) {
-        unsigned num_io = 1;
-        io_context_params params;
         cfa_option opt[] = {
                 BENCH_OPT_CFA
+                BENCH_OPT
         };
+        int opt_cnt = sizeof(opt) / sizeof(cfa_option);
+        char **left;
+        parse_args( argc, argv, opt, opt_cnt, "[OPTIONS]...\ncforall yield benchmark", left );
+        BENCH_OPT_PARSE("cforall yield benchmark");
+        {
                 printf("Running %d threads on %d processors for %f seconds\n", nthreads, nprocs, duration);
+                unsigned long long global_counter = 0;
                 Time start, end;
                 BenchCluster cl = { num_io, params, CFA_STATS_READY_Q };
+                BenchCluster bc = { nprocs };
+                {
+                        BenchProc procs[nprocs];
+                        {
+                                Yielder threads[nthreads];
+                                printf("Starting\n");
+                        threads_left = nthreads;
+                        Yielder threads[nthreads];
+                        printf("Starting\n");
+                                bool is_tty = isatty(STDOUT_FILENO);
+                                start = timeHiRes();
+                                run = true;
+                        bool is_tty = isatty(STDOUT_FILENO);
+                        start = timeHiRes();
                                 for(i; nthreads) {
                                         unpark( threads[i] );
+                                }
                                 wait(duration, start, end, is_tty);
+                        for(i; nthreads) {
+                                unpark( threads[i] );
+                        }
+                        wait(start, is_tty);
+                                run = false;
+                                end = timeHiRes();
+                                printf("\nDone\n");
+                        stop = true;
+                        end = timeHiRes();
+                        printf("\nDone\n");
+                        for(i; nthreads) {
+                                Yielder & y = join( threads[i] );
+                                global_counter += y.count;
+                        }
+                }
+                printf("Duration (ms)       : %'ld\n", (end - start)`dms);
+                printf("Number of processors: %'d\n", nprocs);
+                printf("Number of threads   : %'d\n", nthreads);
+                printf("Total yields        : %'15llu\n", global_counter);
+                printf("Yields per second   : %'18.2lf\n", ((double)global_counter) / (end - start)`s);
+                printf("ns per yields       : %'18.2lf\n", ((double)(end - start)`ns) / global_counter);
+                printf("Yields per procs    : %'15llu\n", global_counter / nprocs);
+                printf("Yields/sec/procs    : %'18.2lf\n", (((double)global_counter) / nprocs) / (end - start)`s);
+                printf("ns per yields/procs : %'18.2lf\n", ((double)(end - start)`ns) / (global_counter / nprocs));
+                printf("Duration (ms)        : %'ld\n", (end - start)`dms);
+                printf("Number of processors : %'d\n", nprocs);
+                printf("Number of threads    : %'d\n", nthreads);
+                printf("Total Operations(ops): %'15llu\n", global_counter);
+                printf("Ops per second       : %'18.2lf\n", ((double)global_counter) / (end - start)`s);
+                printf("ns per ops           : %'18.2lf\n", (end - start)`dns / global_counter);
+                printf("Ops per threads      : %'15llu\n", global_counter / nthreads);
+                printf("Ops per procs        : %'15llu\n", global_counter / nprocs);
+                printf("Ops/sec/procs        : %'18.2lf\n", (((double)global_counter) / nprocs) / (end - start)`s);
+                printf("ns per ops/procs     : %'18.2lf\n", (end - start)`dns / (global_counter / nprocs));
                 fflush(stdout);
+        }

benchmark/readyQ/yield.cpp

-              r29d8c02
+              r74ec742
+#include <cassert>
+#include <cstdlib>
+#include <cstdio>
+#include <cstring>
+#include <climits>
+extern "C" {
+        #include <locale.h>
+        #include <getopt.h>
+}
+#include <unistd.h>
+#include <chrono>
+using Clock = std::chrono::high_resolution_clock;
+using duration_t = std::chrono::duration<double>;
+using std::chrono::nanoseconds;
+template<typename Ratio, typename T>
+T duration_cast(T seconds) {
+        return std::chrono::duration_cast<std::chrono::duration<T, Ratio>>(std::chrono::duration<T>(seconds)).count();
+}
+#include "rq_bench.hpp"
+#include <libfibre/fibre.h>
 volatile bool run = false;
 volatile unsigned long long global_counter;
-#include "libfibre/fibre.h"
+FredBarrier * barrier;
+struct __attribute__((aligned(128))) counter_t {
+        int value = 0;
+};
+void fibre_main() {
+        fibre_park();
+        unsigned long long count = 0;
+        for(;;) {
+                Fibre::yield();
+                count++;
+                if( clock_mode && stop) break;
+                if(!clock_mode && count >= stop_count) break;
+        }
+void fibre_main( counter_t * counter ) {
+        barrier->wait();
+        // /* paranoid */ assert( true == __atomic_load_n(&run, __ATOMIC_RELAXED) );
+        while(__atomic_load_n(&run, __ATOMIC_RELAXED)) {
+                Fibre::forceYield();
+                // fibre_yield();
+                counter->value++;
+        }
+        __atomic_fetch_add(&global_counter, counter->value, __ATOMIC_SEQ_CST);
+        __atomic_fetch_add(&global_counter, count, __ATOMIC_SEQ_CST);
+        __atomic_fetch_add(&threads_left, -1, __ATOMIC_SEQ_CST);
+}
 int main(int argc, char * argv[]) {
+        double duration = 5;
+        int nprocs = 1;
+        int nthreads = 1;
+        std::cout.imbue(std::locale(""));
+        setlocale(LC_ALL, "");
+        for(;;) {
+                static struct option options[] = {
+                        {"duration",  required_argument, 0, 'd'},
+                        {"nprocs",    required_argument, 0, 'p'},
+                        {"nthreads",  required_argument, 0, 't'},
+                        {0, 0, 0, 0}
+                };
+                int idx = 0;
+                int opt = getopt_long(argc, argv, "d:p:t:", options, &idx);
+                const char * arg = optarg ? optarg : "";
+                char * end;
+                switch(opt) {
+                        case -1:
+                                goto run;
+                        // Numeric Arguments
+                        case 'd':
+                                duration = strtod(arg, &end);
+                                if(*end != '\0') {
+                                        fprintf(stderr, "Duration must be a valid double, was %s\n", arg);
+                                        goto usage;
+                                }
+                                break;
+                        case 't':
+                                nthreads = strtoul(arg, &end, 10);
+                                if(*end != '\0' || nthreads < 1) {
+                                        fprintf(stderr, "Number of threads must be a positive integer, was %s\n", arg);
+                                        goto usage;
+                                }
+                                break;
+                        case 'p':
+                                nprocs = strtoul(arg, &end, 10);
+                                if(*end != '\0' || nprocs < 1) {
+                                        fprintf(stderr, "Number of processors must be a positive integer, was %s\n", arg);
+                                        goto usage;
+                                }
+                                break;
+                        // Other cases
+                        default: /* ? */
+                                fprintf( stderr, "Unkown option '%c'\n", opt);
+                        usage:
+                                fprintf( stderr, "Usage: %s [options]\n", argv[0]);
+                                fprintf( stderr, "\n" );
+                                fprintf( stderr, "  -d, --duration=DURATION  Duration of the experiment, in seconds\n" );
+                                fprintf( stderr, "  -t, --nthreads=NTHREADS  Number of kernel threads\n" );
+                                fprintf( stderr, "  -q, --nqueues=NQUEUES    Number of queues per threads\n" );
+                                exit(1);
+                }
+        }
+        run:
+        option_t opt[] = {
+                BENCH_OPT
+        };
+        BENCH_OPT_PARSE("libfibre yield benchmark");
+        {
                 printf("Running %d threads on %d processors for %lf seconds\n", nthreads, nprocs, duration);
                 FibreInit();
                 barrier = new FredBarrier(nthreads + 1);
+                FibreInit(1, nprocs);
+                uint64_t start, end;
+                {
+                        Context::CurrCluster().addWorkers(nprocs);
+                        {
+                                counter_t counters[nthreads];
+                                Fibre threads[nthreads];
+                                for(int i = 0; i < nthreads; i++) {
+                                        threads[i].run(fibre_main, &counters[i]);
+                                }
+                                printf("Starting\n");
+                                bool is_tty = isatty(STDOUT_FILENO);
+                                auto before = Clock::now();
+                                run = true;
+                        threads_left = nthreads;
+                        Fibre * threads[nthreads];
+                        for(unsigned i = 0; i < nthreads; i++) {
+                                threads[i] = new Fibre();
+                                threads[i]->run(fibre_main);
+                        }
+                        printf("Starting\n");
+                        bool is_tty = isatty(STDOUT_FILENO);
+                        start = timeHiRes();
+                                barrier->wait();
+                                for(;;) {
+                                        usleep(500'000);
+                                        auto now = Clock::now();
+                                        duration_t durr = now - before;
+                                        if( durr.count() > duration ) {
+                                                break;
+                                        }
+                                        if(is_tty) {
+                                                std::cout << "\r" << std::setprecision(4) << durr.count();
+                                                std::cout.flush();
+                                        }
+                                }
+                        for(unsigned i = 0; i < nthreads; i++ ) {
+                                fibre_unpark( threads[i] );
+                        }
+                        wait<Fibre>(start, is_tty);
+                                auto after = Clock::now();
+                                duration_t durr = after - before;
+                                duration = durr.count();
+                                run = false;
+                                printf("\nDone\n");
+                                for(auto & thread : threads) {
+                                        thread.join();
+                                }
+                                // for(const auto & counter : counters) {
+                                //      std::cout << counter.value << std::endl;
+                                // }
+                        stop = true;
+                        end = timeHiRes();
+                        for(unsigned i = 0; i < nthreads; i++ ) {
+                                fibre_join( threads[i], nullptr );
+                        }
+                }
                 auto dur_nano = duration_cast<std::nano>(duration);
                 auto dur_dms  = duration_cast<std::milli>(duration);
                 printf("Duration (ms)       : %'.2lf\n", dur_dms );
                 printf("Total yields        : %'15llu\n", global_counter );
                 printf("Yields per procs    : %'15llu\n", global_counter / nprocs );
                 printf("Yields per second   : %'18.2lf\n", ((double)global_counter) / duration );
                 printf("Yields/sec/procs    : %'18.2lf\n", (((double)global_counter) / nprocs) / duration );
                 printf("ns per yields       : %'18.2lf\n", dur_nano / global_counter );
                 printf("ns per yields/procs : %'18.2lf\n", dur_nano / (global_counter / nprocs) );
+                printf("Duration (ms)        : %'ld\n", to_miliseconds(end - start));
+                printf("Number of processors : %'d\n", nprocs);
+                printf("Number of threads    : %'d\n", nthreads);
+                printf("Total Operations(ops): %'15llu\n", global_counter);
+                printf("Ops per second       : %'18.2lf\n", ((double)global_counter) / to_fseconds(end - start));
+                printf("ns per ops           : %'18.2lf\n", ((double)(end - start)) / global_counter);
+                printf("Ops per threads      : %'15llu\n", global_counter / nthreads);
+                printf("Ops per procs        : %'15llu\n", global_counter / nprocs);
+                printf("Ops/sec/procs        : %'18.2lf\n", (((double)global_counter) / nprocs) / to_fseconds(end - start));
+                printf("ns per ops/procs     : %'18.2lf\n", ((double)(end - start)) / (global_counter / nprocs));
+                fflush(stdout);
+        }
+}

benchmark/readyQ/yield.rs

-              r29d8c02
+              r74ec742
         });
         println!("Duration (ms)       : {}", (duration.as_millis()).to_formatted_string(&Locale::en));
         println!("Number of processors: {}", (nprocs).to_formatted_string(&Locale::en));
         println!("Number of threads   : {}", (nthreads).to_formatted_string(&Locale::en));
         println!("Total yields        : {:>15}", (global_counter).to_formatted_string(&Locale::en));
         println!("Yields per second   : {:>15}", (((global_counter as f64) / duration.as_secs() as f64) as u64).to_formatted_string(&Locale::en));
         println!("ns per yields       : {:>15}", ((duration.as_nanos() as f64 / global_counter as f64) as u64).to_formatted_string(&Locale::en));
         println!("Yields per threads  : {:>15}", (global_counter / nthreads as u64).to_formatted_string(&Locale::en));
         println!("Yields per procs    : {:>15}", (global_counter / nprocs as u64).to_formatted_string(&Locale::en));
         println!("Yields/sec/procs    : {:>15}", ((((global_counter as f64) / nprocs as f64) / duration.as_secs() as f64) as u64).to_formatted_string(&Locale::en));
         println!("ns per yields/procs : {:>15}", ((duration.as_nanos() as f64 / (global_counter as f64 / nprocs as f64)) as u64).to_formatted_string(&Locale::en));
+        println!("Duration (ms)        : {}", (duration.as_millis()).to_formatted_string(&Locale::en));
+        println!("Number of processors : {}", (nprocs).to_formatted_string(&Locale::en));
+        println!("Number of threads    : {}", (nthreads).to_formatted_string(&Locale::en));
+        println!("Total Operations(ops): {:>15}", (global_counter).to_formatted_string(&Locale::en));
+        println!("Ops per second       : {:>15}", (((global_counter as f64) / duration.as_secs() as f64) as u64).to_formatted_string(&Locale::en));
+        println!("ns per ops           : {:>15}", ((duration.as_nanos() as f64 / global_counter as f64) as u64).to_formatted_string(&Locale::en));
+        println!("Ops per threads      : {:>15}", (global_counter / nthreads as u64).to_formatted_string(&Locale::en));
+        println!("Ops per procs        : {:>15}", (global_counter / nprocs as u64).to_formatted_string(&Locale::en));
+        println!("Ops/sec/procs        : {:>15}", ((((global_counter as f64) / nprocs as f64) / duration.as_secs() as f64) as u64).to_formatted_string(&Locale::en));
+        println!("ns per ops/procs     : {:>15}", ((duration.as_nanos() as f64 / (global_counter as f64 / nprocs as f64)) as u64).to_formatted_string(&Locale::en));
+}

benchmark/rmit.py

-              r29d8c02
+              r74ec742
                 pass
+        if re.search("^[0-9-,]+$", values):
+        if values.startswith('\\'):
+                return key, values[1:].split(',')
+        elif re.search("^[0-9-,]+$", values):
                 values = parse_range(values)
                 return key, [v for v in values]
 …
         return eval(fmt)
+# Evaluate all the options
+# options can be of the for key = val or key = some_math(other_key)
+# produce a list of all the options to replace some_math(other_key) with actual value
 def eval_options(opts):
+        # Find all the options with dependencies
         dependents = [d for d in opts.values() if type(d) is DependentOpt]
+        # we need to find all the straglers
         processed = []
+        nopts = []
+        # extract all the necessary inputs
+        input_keys = {}
         for d in dependents:
+                # Mark the dependent as seen
                 processed.append(d.key)
+                lists = []
+                # process each of the dependencies
                 for dvar in d.vars:
+                        # Check that it depends on something that exists
                         if not dvar in opts.keys():
                                 print('ERROR: extra pattern option {}:{} uses unknown key {}'.format(d.key,d.value,dvar), file=sys.stderr)
                                 sys.exit(1)
+                        lists.append([(dvar, o) for o in opts[dvar]])
+                        # Check that it's not nested
+                        if type(dvar) is DependentOpt:
+                                print('ERROR: dependent options cannot be nested {}:{} uses key {}'.format(d.key,d.value,dvar), file=sys.stderr)
+                                sys.exit(1)
+                        # Add the values to the input keys
+                        if dvar not in input_keys:
+                                input_keys[dvar] = opts[dvar]
+                        else :
+                                if input_keys[dvar] != opts[dvar]:
+                                        print('INTERNAL ERROR: repeat input do not match {}:{} vs {}'.format(dvar,opts[dvar],input_keys[dvar]), file=sys.stderr)
+                                        sys.exit(1)
+                        # Mark the input as seen
                         processed.append(dvar)
+                kopt = []
+                for vals in list(itertools.product(*lists)):
+                        res = ['-{}'.format(d.key), "{}".format(eval_one(d.value, vals))]
+                        for k, v in vals:
+                                res.extend(['-{}'.format(k), "{}".format(v)])
+                        kopt.append(res)
+                nopts.append(kopt)
+        for k, vals in opts.items():
+                if k not in processed:
+                        kopt = []
+                        for v in vals:
+                                kopt.append(['-{}'.format(k), "{}".format(v)])
+                        nopts.append(kopt)
+        return nopts
+        # add in all the straglers they should cause too many problems
+        for k, v in opts.items():
+                if type(v) is DependentOpt:
+                        continue
+                if k in processed:
+                        # consistency check
+                        if k not in input_keys:
+                                print('INTERNAL ERROR: key \'{}\' marked as processed but not in input_keys'.format(k), file=sys.stderr)
+                                sys.exit(1)
+                        continue
+                # consistency check
+                if k in input_keys:
+                        print('INTERNAL ERROR: key \'{}\' in input_keys but not marked as processed'.format(k), file=sys.stderr)
+                        sys.exit(1)
+                # add the straggler
+                input_keys[k] = v
+        # flatten the dict into a list of pairs so it's easier to work with
+        input_list = []
+        for k, v in input_keys.items():
+                input_list.append([(k, o) for o in v])
+        # evaluate all the dependents
+        # they are not allowed to produce new values so it's a one-to-one mapping from here
+        evaluated = []
+        for inputs in list(itertools.product(*input_list)):
+                this_eval = list(inputs)
+                for d in dependents:
+                        this_eval.append((d.key, eval_one(d.value, inputs)))
+                evaluated.append(this_eval)
+        # reformat everything to a list of arguments
+        formated = []
+        for o in evaluated:
+                inner = []
+                for k,v in o:
+                        inner.append("-{}".format(k))
+                        inner.append("{}".format(v))
+                # print(inner)
+                formated.append(inner)
+        return formated
 # returns the first option with key 'opt'
 …
         known_hosts = {
                 "jax": {
                         range(  1,  24) : "48-71",
                         range( 25,  48) : "48-71,144-167",
                         range( 49,  96) : "48-95,144-191",
                         range( 97, 144) : "24-95,120-191",
                         range(145, 192) : "0-95,96-191",
+                        range(  1,  25) : "48-71",
+                        range( 25,  49) : "48-71,144-167",
+                        range( 49,  97) : "48-95,144-191",
+                        range( 97, 145) : "24-95,120-191",
+                        range(145, 193) : "0-95,96-191",
                 },
+        }
 …
         except:
-                print('ERROR: invalid arguments', file=sys.stderr)
-                parser.print_help(sys.stderr)
                 sys.exit(1)
 …
         # Figure out all the combinations to run
         actions = []
         for p in itertools.product(range(options.trials), commands, *opts):
+        for p in itertools.product(range(options.trials), commands, opts):
                 act = [p[1]]
                 for o in p[2:]:
 …
         if options.file != sys.stdout:
                 print("Done");                                                                                ")
+                print("Done                                                                                ")

doc/theses/thierry_delisle_PhD/thesis/Makefile

-              r29d8c02
+              r74ec742
         emptytree \
         fairness \
+        idle \
+        idle1 \
+        idle2 \
+        idle_state \
         io_uring \
         pivot_ring \
 …
         cycle \
         result.cycle.jax.ops \
+        result.yield.jax.ops \
+        result.churn.jax.ops \
+        result.cycle.jax.ns \
+        result.yield.jax.ns \
+        result.churn.jax.ns \
+        result.cycle.low.jax.ops \
+        result.yield.low.jax.ops \
+        result.churn.low.jax.ops \
+        result.cycle.low.jax.ns \
+        result.yield.low.jax.ns \
+        result.churn.low.jax.ns \
+        result.memcd.updt.qps \
+        result.memcd.updt.lat \
+        result.memcd.rate.qps \
+        result.memcd.rate.99th \
+}
 …
         python3 $< $@
+build/result.%.ns.svg : data/% | ${Build}
+        ../../../../benchmark/plot.py -f $< -o $@ -y "ns per ops"
+cycle_jax_ops_FLAGS = --MaxY=120000000
+cycle_low_jax_ops_FLAGS = --MaxY=120000000
+cycle_jax_ns_FLAGS = --MaxY=2000
+cycle_low_jax_ns_FLAGS = --MaxY=2000
+build/result.%.ops.svg : data/% | ${Build}
+        ../../../../benchmark/plot.py -f $< -o $@ -y "Ops per second"
+yield_jax_ops_FLAGS = --MaxY=150000000
+yield_low_jax_ops_FLAGS = --MaxY=150000000
+yield_jax_ns_FLAGS = --MaxY=1500
+yield_low_jax_ns_FLAGS = --MaxY=1500
+build/result.%.ns.svg : data/% Makefile | ${Build}
+        ../../../../benchmark/plot.py -f $< -o $@ -y "ns per ops/procs" $($(subst .,_,$*)_ns_FLAGS)
+build/result.%.ops.svg : data/% Makefile | ${Build}
+        ../../../../benchmark/plot.py -f $< -o $@ -y "Ops per second" $($(subst .,_,$*)_ops_FLAGS)
+build/result.memcd.updt.qps.svg : data/memcd.updt Makefile | ${Build}
+        ../../../../benchmark/plot.py -f $< -o $@ -y "Actual QPS" -x "Update Ratio"
+build/result.memcd.updt.lat.svg : data/memcd.updt Makefile | ${Build}
+        ../../../../benchmark/plot.py -f $< -o $@ -y "Average Read Latency" -x "Update Ratio"
+build/result.memcd.rate.qps.svg : data/memcd.rate Makefile | ${Build}
+        ../../../../benchmark/plot.py -f $< -o $@ -y "Actual QPS" -x "Target QPS"
+build/result.memcd.rate.99th.svg : data/memcd.rate Makefile | ${Build}
+        ../../../../benchmark/plot.py -f $< -o $@ -y "Tail Read Latency" -x "Target QPS"
 ## pstex with inverted colors

doc/theses/thierry_delisle_PhD/thesis/data/cycle.jax

r29d8c02	r74ec742
1		[["rdq-cycle-go", "./rdq-cycle-go -t 4 -p 4 -d 5 -r 5", {"Duration (ms)": 5000.0, "Number of processors": 4.0, "Number of threads": 20.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 43606897.0, "Ops per second": 8720908.73, "ns per ops": 114.67, "Ops per threads": 2180344.0, "Ops per procs": 10901724.0, "Ops/sec/procs": 2180227.18, "ns per ops/procs": 458.67}],["rdq-cycle-cfa", "./rdq-cycle-cfa -t 16 -p 16 -d 5 -r 5", {"Duration (ms)": 5010.922033, "Number of processors": 16.0, "Number of threads": 80.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 93993568.0, "Total blocks": 93993209.0, "Ops per second": 18757739.07, "ns per ops": 53.31, "Ops per threads": 1174919.0, "Ops per procs": 5874598.0, "Ops/sec/procs": 1172358.69, "ns per ops/procs": 852.98}],["rdq-cycle-go", "./rdq-cycle-go -t 16 -p 16 -d 5 -r 5", {"Duration (ms)": 5000.0, "Number of processors": 16.0, "Number of threads": 80.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 136763517.0, "Ops per second": 27351079.35, "ns per ops": 36.56, "Ops per threads": 1709543.0, "Ops per procs": 8547719.0, "Ops/sec/procs": 1709442.46, "ns per ops/procs": 584.99}],["rdq-cycle-go", "./rdq-cycle-go -t 1 -p 1 -d 5 -r 5", {"Duration (ms)": 5000.0, "Number of processors": 1.0, "Number of threads": 5.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 27778961.0, "Ops per second": 5555545.09, "ns per ops": 180.0, "Ops per threads": 5555792.0, "Ops per procs": 27778961.0, "Ops/sec/procs": 5555545.09, "ns per ops/procs": 180.0}],["rdq-cycle-cfa", "./rdq-cycle-cfa -t 4 -p 4 -d 5 -r 5", {"Duration (ms)": 5009.290878, "Number of processors": 4.0, "Number of threads": 20.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 43976310.0, "Total blocks": 43976217.0, "Ops per second": 8778949.17, "ns per ops": 113.91, "Ops per threads": 2198815.0, "Ops per procs": 10994077.0, "Ops/sec/procs": 2194737.29, "ns per ops/procs": 455.64}],["rdq-cycle-cfa", "./rdq-cycle-cfa -t 4 -p 4 -d 5 -r 5", {"Duration (ms)": 5009.151542, "Number of processors": 4.0, "Number of threads": 20.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 44132300.0, "Total blocks": 44132201.0, "Ops per second": 8810334.37, "ns per ops": 113.5, "Ops per threads": 2206615.0, "Ops per procs": 11033075.0, "Ops/sec/procs": 2202583.59, "ns per ops/procs": 454.01}],["rdq-cycle-go", "./rdq-cycle-go -t 4 -p 4 -d 5 -r 5", {"Duration (ms)": 5000.0, "Number of processors": 4.0, "Number of threads": 20.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 46353896.0, "Ops per second": 9270294.11, "ns per ops": 107.87, "Ops per threads": 2317694.0, "Ops per procs": 11588474.0, "Ops/sec/procs": 2317573.53, "ns per ops/procs": 431.49}],["rdq-cycle-go", "./rdq-cycle-go -t 1 -p 1 -d 5 -r 5", {"Duration (ms)": 5000.0, "Number of processors": 1.0, "Number of threads": 5.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 27894379.0, "Ops per second": 5578591.58, "ns per ops": 179.26, "Ops per threads": 5578875.0, "Ops per procs": 27894379.0, "Ops/sec/procs": 5578591.58, "ns per ops/procs": 179.26}],["rdq-cycle-cfa", "./rdq-cycle-cfa -t 1 -p 1 -d 5 -r 5", {"Duration (ms)": 5008.743463, "Number of processors": 1.0, "Number of threads": 5.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 32825528.0, "Total blocks": 32825527.0, "Ops per second": 6553645.29, "ns per ops": 152.59, "Ops per threads": 6565105.0, "Ops per procs": 32825528.0, "Ops/sec/procs": 6553645.29, "ns per ops/procs": 152.59}],["rdq-cycle-go", "./rdq-cycle-go -t 16 -p 16 -d 5 -r 5", {"Duration (ms)": 5000.0, "Number of processors": 16.0, "Number of threads": 80.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 138213098.0, "Ops per second": 27640977.5, "ns per ops": 36.18, "Ops per threads": 1727663.0, "Ops per procs": 8638318.0, "Ops/sec/procs": 1727561.09, "ns per ops/procs": 578.85}],["rdq-cycle-cfa", "./rdq-cycle-cfa -t 4 -p 4 -d 5 -r 5", {"Duration (ms)": 5007.914168, "Number of processors": 4.0, "Number of threads": 20.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 44109513.0, "Total blocks": 44109419.0, "Ops per second": 8807961.06, "ns per ops": 113.53, "Ops per threads": 2205475.0, "Ops per procs": 11027378.0, "Ops/sec/procs": 2201990.27, "ns per ops/procs": 454.13}],["rdq-cycle-cfa", "./rdq-cycle-cfa -t 16 -p 16 -d 5 -r 5", {"Duration (ms)": 5012.121876, "Number of processors": 16.0, "Number of threads": 80.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 94130673.0, "Total blocks": 94130291.0, "Ops per second": 18780603.37, "ns per ops": 53.25, "Ops per threads": 1176633.0, "Ops per procs": 5883167.0, "Ops/sec/procs": 1173787.71, "ns per ops/procs": 851.94}],["rdq-cycle-go", "./rdq-cycle-go -t 16 -p 16 -d 5 -r 5", {"Duration (ms)": 5000.0, "Number of processors": 16.0, "Number of threads": 80.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 140936367.0, "Ops per second": 28185668.38, "ns per ops": 35.48, "Ops per threads": 1761704.0, "Ops per procs": 8808522.0, "Ops/sec/procs": 1761604.27, "ns per ops/procs": 567.66}],["rdq-cycle-go", "./rdq-cycle-go -t 4 -p 4 -d 5 -r 5", {"Duration (ms)": 5000.0, "Number of processors": 4.0, "Number of threads": 20.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 44279585.0, "Ops per second": 8855475.01, "ns per ops": 112.92, "Ops per threads": 2213979.0, "Ops per procs": 11069896.0, "Ops/sec/procs": 2213868.75, "ns per ops/procs": 451.7}],["rdq-cycle-cfa", "./rdq-cycle-cfa -t 1 -p 1 -d 5 -r 5", {"Duration (ms)": 5008.37392, "Number of processors": 1.0, "Number of threads": 5.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 32227534.0, "Total blocks": 32227533.0, "Ops per second": 6434730.02, "ns per ops": 155.41, "Ops per threads": 6445506.0, "Ops per procs": 32227534.0, "Ops/sec/procs": 6434730.02, "ns per ops/procs": 155.41}],["rdq-cycle-cfa", "./rdq-cycle-cfa -t 16 -p 16 -d 5 -r 5", {"Duration (ms)": 5011.019789, "Number of processors": 16.0, "Number of threads": 80.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 90600569.0, "Total blocks": 90600173.0, "Ops per second": 18080265.66, "ns per ops": 55.31, "Ops per threads": 1132507.0, "Ops per procs": 5662535.0, "Ops/sec/procs": 1130016.6, "ns per ops/procs": 884.94}],["rdq-cycle-cfa", "./rdq-cycle-cfa -t 1 -p 1 -d 5 -r 5", {"Duration (ms)": 5008.52474, "Number of processors": 1.0, "Number of threads": 5.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 32861776.0, "Total blocks": 32861775.0, "Ops per second": 6561168.75, "ns per ops": 152.41, "Ops per threads": 6572355.0, "Ops per procs": 32861776.0, "Ops/sec/procs": 6561168.75, "ns per ops/procs": 152.41}],["rdq-cycle-go", "./rdq-cycle-go -t 1 -p 1 -d 5 -r 5", {"Duration (ms)": 5000.0, "Number of processors": 1.0, "Number of threads": 5.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 28097680.0, "Ops per second": 5619274.9, "ns per ops": 177.96, "Ops per threads": 5619536.0, "Ops per procs": 28097680.0, "Ops/sec/procs": 5619274.9, "ns per ops/procs": 177.96}]]
	1	[["rdq-cycle-go", "./rdq-cycle-go -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10001.0, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 1138076440.0, "Ops per second": 113792094.48, "ns per ops": 8.79, "Ops per threads": 94839.0, "Ops per procs": 47419851.0, "Ops/sec/procs": 4741337.27, "ns per ops/procs": 210.91}],["rdq-cycle-go", "./rdq-cycle-go -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 200285.0, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 17638575791.0, "Ops per second": 88067238.72, "ns per ops": 11.35, "Ops per threads": 2204821.0, "Ops per procs": 1102410986.0, "Ops/sec/procs": 5504202.42, "ns per ops/procs": 181.68}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10100.0, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 54856916.0, "Ops per second": 5485691.0, "ns per ops": 184.0, "Ops per threads": 109713.0, "Ops per procs": 54856916.0, "Ops/sec/procs": 5485691.0, "ns per ops/procs": 184.0}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10025.449006, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 558836360.0, "Total blocks": 558836360.0, "Ops per second": 55741778.71, "ns per ops": 17.94, "Ops per threads": 69854.0, "Ops per procs": 34927272.0, "Ops/sec/procs": 3483861.17, "ns per ops/procs": 287.04}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10038.0, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 58647049.0, "Total blocks": 58647049.0, "Ops per second": 5842287.68, "ns per ops": 171.17, "Ops per threads": 7330.0, "Ops per procs": 3665440.0, "Ops/sec/procs": 365142.98, "ns per ops/procs": 2738.65}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10003.489711, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 728096996.0, "Total blocks": 728096996.0, "Ops per second": 72784299.98, "ns per ops": 13.74, "Ops per threads": 60674.0, "Ops per procs": 30337374.0, "Ops/sec/procs": 3032679.17, "ns per ops/procs": 329.74}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10021.0, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 63157049.0, "Total blocks": 63157049.0, "Ops per second": 6302255.13, "ns per ops": 158.67, "Ops per threads": 15789.0, "Ops per procs": 7894631.0, "Ops/sec/procs": 787781.89, "ns per ops/procs": 1269.39}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10009.0, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 62412200.0, "Total blocks": 62411700.0, "Ops per second": 6235572.31, "ns per ops": 160.37, "Ops per threads": 124824.0, "Ops per procs": 62412200.0, "Ops/sec/procs": 6235572.31, "ns per ops/procs": 160.37}],["rdq-cycle-go", "./rdq-cycle-go -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10000.0, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 464608617.0, "Ops per second": 46457191.42, "ns per ops": 21.53, "Ops per threads": 116152.0, "Ops per procs": 58076077.0, "Ops/sec/procs": 5807148.93, "ns per ops/procs": 172.2}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10099.0, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 391521066.0, "Ops per second": 39152106.0, "ns per ops": 25.0, "Ops per threads": 97880.0, "Ops per procs": 48940133.0, "Ops/sec/procs": 4894013.0, "ns per ops/procs": 206.0}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10099.0, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 963549550.0, "Ops per second": 96354955.0, "ns per ops": 10.0, "Ops per threads": 80295.0, "Ops per procs": 40147897.0, "Ops/sec/procs": 4014789.0, "ns per ops/procs": 251.0}],["rdq-cycle-go", "./rdq-cycle-go -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10001.0, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 867718190.0, "Ops per second": 86761170.55, "ns per ops": 11.53, "Ops per threads": 108464.0, "Ops per procs": 54232386.0, "Ops/sec/procs": 5422573.16, "ns per ops/procs": 184.41}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10100.0, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 962016289.0, "Ops per second": 96201628.0, "ns per ops": 10.0, "Ops per threads": 80168.0, "Ops per procs": 40084012.0, "Ops/sec/procs": 4008401.0, "ns per ops/procs": 251.0}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10016.837824, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 54738237.0, "Total blocks": 54737741.0, "Ops per second": 5464622.46, "ns per ops": 183.0, "Ops per threads": 109476.0, "Ops per procs": 54738237.0, "Ops/sec/procs": 5464622.46, "ns per ops/procs": 183.0}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10099.0, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 731309408.0, "Ops per second": 73130940.0, "ns per ops": 13.0, "Ops per threads": 91413.0, "Ops per procs": 45706838.0, "Ops/sec/procs": 4570683.0, "ns per ops/procs": 220.0}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10100.0, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 739772688.0, "Ops per second": 73977268.0, "ns per ops": 13.0, "Ops per threads": 92471.0, "Ops per procs": 46235793.0, "Ops/sec/procs": 4623579.0, "ns per ops/procs": 218.0}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10100.0, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 391449785.0, "Ops per second": 39144978.0, "ns per ops": 25.0, "Ops per threads": 97862.0, "Ops per procs": 48931223.0, "Ops/sec/procs": 4893122.0, "ns per ops/procs": 206.0}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10048.0, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 57239183.0, "Total blocks": 57239183.0, "Ops per second": 5696211.13, "ns per ops": 175.56, "Ops per threads": 4769.0, "Ops per procs": 2384965.0, "Ops/sec/procs": 237342.13, "ns per ops/procs": 4213.33}],["rdq-cycle-go", "./rdq-cycle-go -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10000.0, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 55248375.0, "Ops per second": 5524562.87, "ns per ops": 181.01, "Ops per threads": 110496.0, "Ops per procs": 55248375.0, "Ops/sec/procs": 5524562.87, "ns per ops/procs": 181.01}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10021.0, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 61553053.0, "Total blocks": 61553053.0, "Ops per second": 6142186.88, "ns per ops": 162.81, "Ops per threads": 15388.0, "Ops per procs": 7694131.0, "Ops/sec/procs": 767773.36, "ns per ops/procs": 1302.47}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10008.0, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 62811642.0, "Total blocks": 62811142.0, "Ops per second": 6275517.47, "ns per ops": 159.35, "Ops per threads": 125623.0, "Ops per procs": 62811642.0, "Ops/sec/procs": 6275517.47, "ns per ops/procs": 159.35}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10018.820873, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 260866706.0, "Total blocks": 260862710.0, "Ops per second": 26037665.44, "ns per ops": 38.41, "Ops per threads": 65216.0, "Ops per procs": 32608338.0, "Ops/sec/procs": 3254708.18, "ns per ops/procs": 307.25}],["rdq-cycle-go", "./rdq-cycle-go -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10000.0, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 874581175.0, "Ops per second": 87449851.2, "ns per ops": 11.44, "Ops per threads": 109322.0, "Ops per procs": 54661323.0, "Ops/sec/procs": 5465615.7, "ns per ops/procs": 182.96}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10099.0, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 55228782.0, "Ops per second": 5522878.0, "ns per ops": 182.0, "Ops per threads": 110457.0, "Ops per procs": 55228782.0, "Ops/sec/procs": 5522878.0, "ns per ops/procs": 182.0}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10009.0, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 62564955.0, "Total blocks": 62564455.0, "Ops per second": 6250797.96, "ns per ops": 159.98, "Ops per threads": 125129.0, "Ops per procs": 62564955.0, "Ops/sec/procs": 6250797.96, "ns per ops/procs": 159.98}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10100.0, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 738848909.0, "Ops per second": 73884890.0, "ns per ops": 13.0, "Ops per threads": 92356.0, "Ops per procs": 46178056.0, "Ops/sec/procs": 4617805.0, "ns per ops/procs": 218.0}],["rdq-cycle-go", "./rdq-cycle-go -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10001.0, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 1131221613.0, "Ops per second": 113108175.94, "ns per ops": 8.84, "Ops per threads": 94268.0, "Ops per procs": 47134233.0, "Ops/sec/procs": 4712840.66, "ns per ops/procs": 212.19}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10008.209159, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 729328104.0, "Total blocks": 729328099.0, "Ops per second": 72872987.81, "ns per ops": 13.72, "Ops per threads": 60777.0, "Ops per procs": 30388671.0, "Ops/sec/procs": 3036374.49, "ns per ops/procs": 329.34}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10099.0, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 961002611.0, "Ops per second": 96100261.0, "ns per ops": 10.0, "Ops per threads": 80083.0, "Ops per procs": 40041775.0, "Ops/sec/procs": 4004177.0, "ns per ops/procs": 252.0}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10099.0, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 390098231.0, "Ops per second": 39009823.0, "ns per ops": 25.0, "Ops per threads": 97524.0, "Ops per procs": 48762278.0, "Ops/sec/procs": 4876227.0, "ns per ops/procs": 207.0}],["rdq-cycle-tokio", "./rdq-cycle-tokio -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10100.0, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 55237591.0, "Ops per second": 5523759.0, "ns per ops": 182.0, "Ops per threads": 110475.0, "Ops per procs": 55237591.0, "Ops/sec/procs": 5523759.0, "ns per ops/procs": 182.0}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10016.576699, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 54510321.0, "Total blocks": 54509820.0, "Ops per second": 5442011.04, "ns per ops": 183.76, "Ops per threads": 109020.0, "Ops per procs": 54510321.0, "Ops/sec/procs": 5442011.04, "ns per ops/procs": 183.76}],["rdq-cycle-go", "./rdq-cycle-go -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10001.0, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 1135730371.0, "Ops per second": 113558509.97, "ns per ops": 8.81, "Ops per threads": 94644.0, "Ops per procs": 47322098.0, "Ops/sec/procs": 4731604.58, "ns per ops/procs": 211.34}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10039.0, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 61004037.0, "Total blocks": 61004037.0, "Ops per second": 6076255.04, "ns per ops": 164.58, "Ops per threads": 7625.0, "Ops per procs": 3812752.0, "Ops/sec/procs": 379765.94, "ns per ops/procs": 2633.2}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10004.891999, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 747946345.0, "Total blocks": 747934349.0, "Ops per second": 74758062.86, "ns per ops": 13.38, "Ops per threads": 62328.0, "Ops per procs": 31164431.0, "Ops/sec/procs": 3114919.29, "ns per ops/procs": 321.04}],["rdq-cycle-go", "./rdq-cycle-go -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10000.0, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 466424792.0, "Ops per second": 46638931.23, "ns per ops": 21.44, "Ops per threads": 116606.0, "Ops per procs": 58303099.0, "Ops/sec/procs": 5829866.4, "ns per ops/procs": 171.53}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10086.0, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 57343570.0, "Total blocks": 57343570.0, "Ops per second": 5685308.81, "ns per ops": 175.89, "Ops per threads": 4778.0, "Ops per procs": 2389315.0, "Ops/sec/procs": 236887.87, "ns per ops/procs": 4221.41}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10020.39533, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 263517289.0, "Total blocks": 263513293.0, "Ops per second": 26298093.07, "ns per ops": 38.03, "Ops per threads": 65879.0, "Ops per procs": 32939661.0, "Ops/sec/procs": 3287261.63, "ns per ops/procs": 304.2}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10025.357431, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 551670395.0, "Total blocks": 551662399.0, "Ops per second": 55027503.89, "ns per ops": 18.17, "Ops per threads": 68958.0, "Ops per procs": 34479399.0, "Ops/sec/procs": 3439218.99, "ns per ops/procs": 290.76}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 24 -d 10 -r 5 -t 2400", {"Duration (ms)": 10050.0, "Number of processors": 24.0, "Number of threads": 12000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 56162695.0, "Total blocks": 56162695.0, "Ops per second": 5588033.65, "ns per ops": 178.95, "Ops per threads": 4680.0, "Ops per procs": 2340112.0, "Ops/sec/procs": 232834.74, "ns per ops/procs": 4294.89}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10019.690183, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 271866976.0, "Total blocks": 271862980.0, "Ops per second": 27133271.69, "ns per ops": 36.86, "Ops per threads": 67966.0, "Ops per procs": 33983372.0, "Ops/sec/procs": 3391658.96, "ns per ops/procs": 294.84}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10057.0, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 62105022.0, "Total blocks": 62105022.0, "Ops per second": 6175186.04, "ns per ops": 161.94, "Ops per threads": 15526.0, "Ops per procs": 7763127.0, "Ops/sec/procs": 771898.25, "ns per ops/procs": 1295.51}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10025.81217, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 537080117.0, "Total blocks": 537072121.0, "Ops per second": 53569736.59, "ns per ops": 18.67, "Ops per threads": 67135.0, "Ops per procs": 33567507.0, "Ops/sec/procs": 3348108.54, "ns per ops/procs": 298.68}],["rdq-cycle-go", "./rdq-cycle-go -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10000.0, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 55967030.0, "Ops per second": 5596438.25, "ns per ops": 178.69, "Ops per threads": 111934.0, "Ops per procs": 55967030.0, "Ops/sec/procs": 5596438.25, "ns per ops/procs": 178.69}],["rdq-cycle-go", "./rdq-cycle-go -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10000.0, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 55703320.0, "Ops per second": 5570084.72, "ns per ops": 179.53, "Ops per threads": 111406.0, "Ops per procs": 55703320.0, "Ops/sec/procs": 5570084.72, "ns per ops/procs": 179.53}],["rdq-cycle-go", "./rdq-cycle-go -p 8 -d 10 -r 5 -t 800", {"Duration (ms)": 10000.0, "Number of processors": 8.0, "Number of threads": 4000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 469211793.0, "Ops per second": 46918327.16, "ns per ops": 21.31, "Ops per threads": 117302.0, "Ops per procs": 58651474.0, "Ops/sec/procs": 5864790.9, "ns per ops/procs": 170.51}],["rdq-cycle-cfa", "./rdq-cycle-cfa -p 1 -d 10 -r 5 -t 100", {"Duration (ms)": 10016.545208, "Number of processors": 1.0, "Number of threads": 500.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 54925472.0, "Total blocks": 54924976.0, "Ops per second": 5483474.68, "ns per ops": 182.37, "Ops per threads": 109850.0, "Ops per procs": 54925472.0, "Ops/sec/procs": 5483474.68, "ns per ops/procs": 182.37}],["rdq-cycle-fibre", "./rdq-cycle-fibre -p 16 -d 10 -r 5 -t 1600", {"Duration (ms)": 10037.0, "Number of processors": 16.0, "Number of threads": 8000.0, "Cycle size (# thrds)": 5.0, "Total Operations(ops)": 60770550.0, "Total blocks": 60770550.0, "Ops per second": 6054474.7, "ns per ops": 165.17, "Ops per threads": 7596.0, "Ops per procs": 3798159.0, "Ops/sec/procs": 378404.67, "ns per ops/procs": 2642.67}]]

doc/theses/thierry_delisle_PhD/thesis/local.bib

-              r29d8c02
+              r74ec742
   note = "[Online; accessed 12-April-2022]"
+}
+% RMR notes :
+% [05/04, 12:36] Trevor Brown
+%     i don't know where rmr complexity was first introduced, but there are many many many papers that use the term and define it
+% [05/04, 12:37] Trevor Brown
+%     here's one paper that uses the term a lot and links to many others that use it... might trace it to something useful there https://drops.dagstuhl.de/opus/volltexte/2021/14832/pdf/LIPIcs-DISC-2021-30.pdf
+% [05/04, 12:37] Trevor Brown
+%     another option might be to cite a textbook
+% [05/04, 12:42] Trevor Brown
+%     but i checked two textbooks in the area i'm aware of and i don't see a definition of rmr complexity in either
+% [05/04, 12:42] Trevor Brown
+%     this one has a nice statement about the prevelance of rmr complexity, as well as some rough definition
+% [05/04, 12:42] Trevor Brown
+%     https://dl.acm.org/doi/pdf/10.1145/3465084.3467938
+% Race to idle notes :
+% [13/04, 16:56] Martin Karsten
+%       I don't have a citation. Google brings up this one, which might be good:
+%
+% https://doi.org/10.1137/1.9781611973099.100

doc/theses/thierry_delisle_PhD/thesis/text/eval_macro.tex

-              r29d8c02
+              r74ec742
 Networked ZIPF
+Nginx : 5Gb still good, 4Gb starts to suffer
+Cforall : 10Gb too high, 4 Gb too low
 \section{Memcached}
+In Memory
+\subsection{Benchmark Environment}
+These experiments are run on a cluster of homogenous Supermicro SYS-6017R-TDF compute nodes with the following characteristics:
+The server runs Ubuntu 20.04.3 LTS on top of Linux Kernel 5.11.0-34.
+Each node has 2 Intel(R) Xeon(R) CPU E5-2620 v2 running at 2.10GHz.
+These CPUs have 6 cores per CPUs and 2 \glspl{hthrd} per core, for a total of 24 \glspl{hthrd}.
+The cpus each have 384 KB, 3 MB and 30 MB of L1, L2 and L3 caches respectively.
+Each node is connected to the network through a Mellanox 10 Gigabit Ethernet port.
+The network route uses 1 Mellanox SX1012 10/40 Gigabit Ethernet cluster switch.
+Networked
+\begin{figure}
+        \centering
+        \input{result.memcd.updt.qps.pstex_t}
+        \caption[Churn Benchmark : Throughput on Intel]{Churn Benchmark : Throughput on Intel\smallskip\newline Description}
+        \label{fig:memcd:updt:qps}
+\end{figure}
+\begin{figure}
+        \centering
+        \input{result.memcd.updt.lat.pstex_t}
+        \caption[Churn Benchmark : Throughput on Intel]{Churn Benchmark : Throughput on Intel\smallskip\newline Description}
+        \label{fig:memcd:updt:lat}
+\end{figure}
+\begin{figure}
+        \centering
+        \input{result.memcd.rate.qps.pstex_t}
+        \caption[Churn Benchmark : Throughput on Intel]{Churn Benchmark : Throughput on Intel\smallskip\newline Description}
+        \label{fig:memcd:rate:qps}
+\end{figure}
+\begin{figure}
+        \centering
+        \input{result.memcd.rate.99th.pstex_t}
+        \caption[Churn Benchmark : Throughput on Intel]{Churn Benchmark : Throughput on Intel\smallskip\newline Description}
+        \label{fig:memcd:rate:tail}
+\end{figure}

doc/theses/thierry_delisle_PhD/thesis/text/eval_micro.tex

-              r29d8c02
+              r74ec742
 \section{Benchmark Environment}
 All of these benchmarks are run on two distinct hardware environment, an AMD and an INTEL machine.
+For all benchmarks, \texttt{taskset} is used to limit the experiment to 1 NUMA Node with no hyper threading.
+If more \glspl{hthrd} are needed, then 1 NUMA Node with hyperthreading is used.
+If still more \glspl{hthrd} are needed then the experiment is limited to as few NUMA Nodes as needed.
 \paragraph{AMD} The AMD machine is a server with two AMD EPYC 7662 CPUs and 256GB of DDR4 RAM.
 …
 \section{Cycling latency}
+\begin{figure}
+        \centering
+        \input{cycle.pstex_t}
+        \caption[Cycle benchmark]{Cycle benchmark\smallskip\newline Each \gls{at} unparks the next \gls{at} in the cycle before parking itself.}
+        \label{fig:cycle}
+\end{figure}
 The most basic evaluation of any ready queue is to evaluate the latency needed to push and pop one element from the ready-queue.
 Since these two operation also describe a \texttt{yield} operation, many systems use this as the most basic benchmark.
 …
 Note that this problem is only present on SMP machines and is significantly mitigated by the fact that there are multiple rings in the system.
-\begin{figure}
-        \centering
-        \input{cycle.pstex_t}
-        \caption[Cycle benchmark]{Cycle benchmark\smallskip\newline Each \gls{at} unparks the next \gls{at} in the cycle before parking itself.}
-        \label{fig:cycle}
-\end{figure}
 To avoid this benchmark from being dominated by the idle sleep handling, the number of rings is kept at least as high as the number of \glspl{proc} available.
 Beyond this point, adding more rings serves to mitigate even more the idle sleep handling.
 …
 The actual benchmark is more complicated to handle termination, but that simply requires using a binary semphore or a channel instead of raw \texttt{park}/\texttt{unpark} and carefully picking the order of the \texttt{P} and \texttt{V} with respect to the loop condition.
+\begin{lstlisting}
+        Thread.main() {
+                count := 0
+                for {
+                        wait()
+                        this.next.wake()
+                        count ++
+                        if must_stop() { break }
+                }
+                global.count += count
+        }
+\end{lstlisting}
+\begin{figure}
+        \centering
+        \input{result.cycle.jax.ops.pstex_t}
+        \vspace*{-10pt}
+        \label{fig:cycle:ns:jax}
+\end{figure}
+Figure~\ref{fig:cycle:code} shows pseudo code for this benchmark.
+\begin{figure}
+        \begin{lstlisting}
+                Thread.main() {
+                        count := 0
+                        for {
+                                wait()
+                                this.next.wake()
+                                count ++
+                                if must_stop() { break }
+                        }
+                        global.count += count
+                }
+        \end{lstlisting}
+        \caption[Cycle Benchmark : Pseudo Code]{Cycle Benchmark : Pseudo Code}
+        \label{fig:cycle:code}
+\end{figure}
+\subsection{Results}
+\begin{figure}
+        \subfloat[][Throughput, 100 \ats per \proc]{
+                \resizebox{0.5\linewidth}{!}{
+                        \input{result.cycle.jax.ops.pstex_t}
+                }
+                \label{fig:cycle:jax:ops}
+        }
+        \subfloat[][Throughput, 1 \ats per \proc]{
+                \resizebox{0.5\linewidth}{!}{
+                        \input{result.cycle.low.jax.ops.pstex_t}
+                }
+                \label{fig:cycle:jax:low:ops}
+        }
+        \subfloat[][Latency, 100 \ats per \proc]{
+                \resizebox{0.5\linewidth}{!}{
+                        \input{result.cycle.jax.ns.pstex_t}
+                }
+        }
+        \subfloat[][Latency, 1 \ats per \proc]{
+                \resizebox{0.5\linewidth}{!}{
+                        \input{result.cycle.low.jax.ns.pstex_t}
+                }
+                \label{fig:cycle:jax:low:ns}
+        }
+        \caption[Cycle Benchmark on Intel]{Cycle Benchmark on Intel\smallskip\newline Throughput as a function of \proc count, using 100 cycles per \proc, 5 \ats per cycle.}
+        \label{fig:cycle:jax}
+\end{figure}
+Figure~\ref{fig:cycle:jax} shows the throughput as a function of \proc count, with the following constants:
+Each run uses 100 cycles per \proc, 5 \ats per cycle.
+\todo{results discussion}
 \section{Yield}
 …
 Its only interesting variable is the number of \glspl{at} per \glspl{proc}, where ratios close to 1 means the ready queue(s) could be empty.
 This sometimes puts more strain on the idle sleep handling, compared to scenarios where there is clearly plenty of work to be done.
+\todo{code, setup, results}
+\begin{lstlisting}
+        Thread.main() {
+                count := 0
+                while !stop {
+                        yield()
+                        count ++
+                }
+                global.count += count
+        }
+\end{lstlisting}
+Figure~\ref{fig:yield:code} shows pseudo code for this benchmark, the ``wait/wake-next'' is simply replaced by a yield.
+\begin{figure}
+        \begin{lstlisting}
+                Thread.main() {
+                        count := 0
+                        for {
+                                yield()
+                                count ++
+                                if must_stop() { break }
+                        }
+                        global.count += count
+                }
+        \end{lstlisting}
+        \caption[Yield Benchmark : Pseudo Code]{Yield Benchmark : Pseudo Code}
+        \label{fig:yield:code}
+\end{figure}
+\subsection{Results}
+\begin{figure}
+        \subfloat[][Throughput, 100 \ats per \proc]{
+                \resizebox{0.5\linewidth}{!}{
+                        \input{result.yield.jax.ops.pstex_t}
+                }
+                \label{fig:yield:jax:ops}
+        }
+        \subfloat[][Throughput, 1 \ats per \proc]{
+                \resizebox{0.5\linewidth}{!}{
+                \input{result.yield.low.jax.ops.pstex_t}
+                }
+                \label{fig:yield:jax:low:ops}
+        }
+        \subfloat[][Latency, 100 \ats per \proc]{
+                \resizebox{0.5\linewidth}{!}{
+                \input{result.yield.jax.ns.pstex_t}
+                }
+                \label{fig:yield:jax:ns}
+        }
+        \subfloat[][Latency, 1 \ats per \proc]{
+                \resizebox{0.5\linewidth}{!}{
+                \input{result.yield.low.jax.ns.pstex_t}
+                }
+                \label{fig:yield:jax:low:ns}
+        }
+        \caption[Yield Benchmark on Intel]{Yield Benchmark on Intel\smallskip\newline Throughput as a function of \proc count, using 1 \ats per \proc.}
+        \label{fig:yield:jax}
+\end{figure}
+Figure~\ref{fig:yield:ops:jax} shows the throughput as a function of \proc count, with the following constants:
+Each run uses 100 \ats per \proc.
+\todo{results discussion}
 …
 In either case, this benchmark aims to highlight how each scheduler handles these cases, since both cases can lead to performance degradation if they are not handled correctly.
+To achieve this the benchmark uses a fixed size array of \newterm{chair}s, where a chair is a data structure that holds a single blocked \gls{at}.
+When a \gls{at} attempts to block on the chair, it must first unblocked the \gls{at} currently blocked on said chair, if any.
+This creates a flow where \glspl{at} push each other out of the chairs before being pushed out themselves.
+For this benchmark to work however, the number of \glspl{at} must be equal or greater to the number of chairs plus the number of \glspl{proc}.
+To achieve this the benchmark uses a fixed size array of semaphores.
+Each \gls{at} picks a random semaphore, \texttt{V}s it to unblock a \at waiting and then \texttt{P}s on the semaphore.
+This creates a flow where \glspl{at} push each other out of the semaphores before being pushed out themselves.
+For this benchmark to work however, the number of \glspl{at} must be equal or greater to the number of semaphores plus the number of \glspl{proc}.
+Note that the nature of these semaphores mean the counter can go beyond 1, which could lead to calls to \texttt{P} not blocking.
 \todo{code, setup, results}
 …
                 for {
                         r := random() % len(spots)
+                        next := xchg(spots[r], this)
+                        if next { next.wake() }
+                        wait()
+                        spots[r].V()
+                        spots[r].P()
                         count ++
                         if must_stop() { break }
 …
+        }
 \end{lstlisting}
+\begin{figure}
+        \subfloat[][Throughput, 100 \ats per \proc]{
+                \resizebox{0.5\linewidth}{!}{
+                        \input{result.churn.jax.ops.pstex_t}
+                }
+                \label{fig:churn:jax:ops}
+        }
+        \subfloat[][Throughput, 1 \ats per \proc]{
+                \resizebox{0.5\linewidth}{!}{
+                        \input{result.churn.low.jax.ops.pstex_t}
+                }
+                \label{fig:churn:jax:low:ops}
+        }
+        \subfloat[][Latency, 100 \ats per \proc]{
+                \resizebox{0.5\linewidth}{!}{
+                        \input{result.churn.jax.ns.pstex_t}
+                }
+        }
+        \subfloat[][Latency, 1 \ats per \proc]{
+                \resizebox{0.5\linewidth}{!}{
+                        \input{result.churn.low.jax.ns.pstex_t}
+                }
+                \label{fig:churn:jax:low:ns}
+        }
+        \caption[Churn Benchmark on Intel]{\centering Churn Benchmark on Intel\smallskip\newline Throughput and latency of the Churn on the benchmark on the Intel machine. Throughput is the total operation per second across all cores. Latency is the duration of each opeartion.}
+        \label{fig:churn:jax}
+\end{figure}
 \section{Locality}

doc/theses/thierry_delisle_PhD/thesis/text/practice.tex

-              r29d8c02
+              r74ec742
 More precise \CFA supports adding \procs using the RAII object @processor@.
 These objects can be created at any time and can be destroyed at any time.
 They are normally create as automatic stack variables, but this is not a requirement.
+They are normally created as automatic stack variables, but this is not a requirement.
 The consequence is that the scheduler and \io subsystems must support \procs comming in and out of existence.
 \section{Manual Resizing}
+The consequence of dynamically changing the number of \procs is that all internal arrays that are sized based on the number of \procs neede to be \texttt{realloc}ed.
+This also means that any references into these arrays, pointers or indexes, may need to be fixed when shrinking\footnote{Indexes may still need fixing because there is no guarantee the \proc causing the shrink had the highest index. Therefore indexes need to be reassigned to preserve contiguous indexes.}.
+There are no performance requirements, within reason, for resizing since this is usually considered as part of setup and teardown.
+Manual resizing is expected to be a rare operation.
+Programmers are mostly expected to resize clusters on startup or teardown.
+Therefore dynamically changing the number of \procs is an appropriate moment to allocate or free resources to match the new state.
+As such all internal arrays that are sized based on the number of \procs need to be \texttt{realloc}ed.
+This also means that any references into these arrays, pointers or indexes, may need to be fixed when shrinking\footnote{Indexes may still need fixing when shrinkingbecause some indexes are expected to refer to dense contiguous resources and there is no guarantee the resource being removed has the highest index.}.
+There are no performance requirements, within reason, for resizing since it is expected to be rare.
 However, this operation has strict correctness requirements since shrinking and idle sleep can easily lead to deadlocks.
 It should also avoid as much as possible any effect on performance when the number of \procs remain constant.
 This later requirement prehibits simple solutions, like simply adding a global lock to these arrays.
+This later requirement prohibits naive solutions, like simply adding a global lock to the ready-queue arrays.
 \subsection{Read-Copy-Update}
 …
 In this pattern, resizing is done by creating a copy of the internal data strucures, updating the copy with the desired changes, and then attempt an Idiana Jones Switch to replace the original witht the copy.
 This approach potentially has the advantage that it may not need any synchronization to do the switch.
 The switch definitely implies a race where \procs could still use the previous, original, data structure after the copy was switched in.
 The important question then becomes whether or not this race can be recovered from.
+If the changes that arrived late can be transferred from the original to the copy then this solution works.
 For linked-lists, dequeing is somewhat of a problem.
+However, there is a race where \procs could still use the previous, original, data structure after the copy was switched in.
+This race not only requires some added memory reclamation scheme, it also requires that operations made on the stale original version be eventually moved to the copy.
+For linked-lists, enqueing is only somewhat problematic, \ats enqueued to the original queues need to be transferred to the new, which might not preserve ordering.
+Dequeing is more challenging.
 Dequeing from the original will not necessarily update the copy which could lead to multiple \procs dequeing the same \at.
 Fixing this requires making the array contain pointers to subqueues rather than the subqueues themselves.
+Fixing this requires more synchronization or more indirection on the queues.
 Another challenge is that the original must be kept until all \procs have witnessed the change.
 …
 In addition to users manually changing the number of \procs, it is desireable to support ``removing'' \procs when there is not enough \ats for all the \procs to be useful.
 While manual resizing is expected to be rare, the number of \ats is expected to vary much more which means \procs may need to be ``removed'' for only short periods of time.
 Furthermore, race conditions that spuriously lead to the impression no \ats are ready are actually common in practice.
 Therefore \procs should not be actually \emph{removed} but simply put into an idle state where the \gls{kthrd} is blocked until more \ats become ready.
+Furthermore, race conditions that spuriously lead to the impression that no \ats are ready are actually common in practice.
+Therefore resources associated with \procs should not be freed but \procs simply put into an idle state where the \gls{kthrd} is blocked until more \ats become ready.
 This state is referred to as \newterm{Idle-Sleep}.
 …
 The \CFA scheduler simply follows the ``Race-to-Idle'\cit{https://doi.org/10.1137/1.9781611973099.100}' approach where a sleeping \proc is woken any time an \at becomes ready and \procs go to idle sleep anytime they run out of work.
+\section{Sleeping}
+As usual, the corner-stone of any feature related to the kernel is the choice of system call.
+In terms of blocking a \gls{kthrd} until some event occurs the linux kernel has many available options:
+\paragraph{\texttt{pthread\_mutex}/\texttt{pthread\_cond}}
+The most classic option is to use some combination of \texttt{pthread\_mutex} and \texttt{pthread\_cond}.
+These serve as straight forward mutual exclusion and synchronization tools and allow a \gls{kthrd} to wait on a \texttt{pthread\_cond} until signalled.
+While this approach is generally perfectly appropriate for \glspl{kthrd} waiting after eachother, \io operations do not signal \texttt{pthread\_cond}s.
+For \io results to wake a \proc waiting on a \texttt{pthread\_cond} means that a different \glspl{kthrd} must be woken up first, and then the \proc can be signalled.
+\subsection{\texttt{io\_uring} and Epoll}
+An alternative is to flip the problem on its head and block waiting for \io, using \texttt{io\_uring} or even \texttt{epoll}.
+This creates the inverse situation, where \io operations directly wake sleeping \procs but waking \proc from a running \gls{kthrd} must use an indirect scheme.
+This generally takes the form of creating a file descriptor, \eg, a dummy file, a pipe or an event fd, and using that file descriptor when \procs need to wake eachother.
+This leads to additional complexity because there can be a race between these artificial \io operations and genuine \io operations.
+If not handled correctly, this can lead to the artificial files going out of sync.
+\subsection{Event FDs}
+Another interesting approach is to use an event file descriptor\cit{eventfd}.
+This is a Linux feature that is a file descriptor that behaves like \io, \ie, uses \texttt{read} and \texttt{write}, but also behaves like a semaphore.
+Indeed, all read and writes must use 64bits large values\footnote{On 64-bit Linux, a 32-bit Linux would use 32 bits values.}.
+Writes add their values to the buffer, that is arithmetic addition and not buffer append, and reads zero out the buffer and return the buffer values so far\footnote{This is without the \texttt{EFD\_SEMAPHORE} flag. This flags changes the behavior of \texttt{read} but is not needed for this work.}.
+If a read is made while the buffer is already 0, the read blocks until a non-0 value is added.
+What makes this feature particularly interesting is that \texttt{io\_uring} supports the \texttt{IORING\_REGISTER\_EVENTFD} command, to register an event fd to a particular instance.
+Once that instance is registered, any \io completion will result in \texttt{io\_uring} writing to the event FD.
+This means that a \proc waiting on the event FD can be \emph{directly} woken up by either other \procs or incomming \io.
+\begin{figure}
+        \centering
+        \input{idle1.pstex_t}
+        \caption[Basic Idle Sleep Data Structure]{Basic Idle Sleep Data Structure \smallskip\newline Each idle \proc is put unto a doubly-linked stack protected by a lock.
+        Each \proc has a private event FD.}
+        \label{fig:idle1}
+\end{figure}
 \section{Tracking Sleepers}
 Tracking which \procs are in idle sleep requires a data structure holding all the sleeping \procs, but more importantly it requires a concurrent \emph{handshake} so that no \at is stranded on a ready-queue with no active \proc.
 The classic challenge is when a \at is made ready while a \proc is going to sleep, there is a race where the new \at may not see the sleeping \proc and the sleeping \proc may not see the ready \at.
+Furthermore, the ``Race-to-Idle'' approach means that there is some
+\section{Sleeping}
+\subsection{Event FDs}
+\subsection{Epoll}
+\subsection{\texttt{io\_uring}}
+\section{Reducing Latency}
+Since \ats can be made ready by timers, \io operations or other events outside a clusre, this race can occur even if the \proc going to sleep is the only \proc awake.
+As a result, improper handling of this race can lead to all \procs going to sleep and the system deadlocking.
+Furthermore, the ``Race-to-Idle'' approach means that there may be contention on the data structure tracking sleepers.
+Contention slowing down \procs attempting to sleep or wake-up can be tolerated.
+These \procs are not doing useful work and therefore not contributing to overall performance.
+However, notifying, checking if a \proc must be woken-up and doing so if needed, can significantly affect overall performance and must be low cost.
+\subsection{Sleepers List}
+Each cluster maintains a list of idle \procs, organized as a stack.
+This ordering hopefully allows \proc at the tail to stay in idle sleep for extended period of times.
+Because of these unbalanced performance requirements, the algorithm tracking sleepers is designed to have idle \proc handle as much of the work as possible.
+The idle \procs maintain the of sleepers among themselves and notifying a sleeping \proc takes as little work as possible.
+This approach means that maintaining the list is fairly straightforward.
+The list can simply use a single lock per cluster and only \procs that are getting in and out of idle state will contend for that lock.
+This approach also simplifies notification.
+Indeed, \procs need to be notify when a new \at is readied, but they also must be notified during resizing, so the \gls{kthrd} can be joined.
+This means that whichever entity removes idle \procs from the sleeper list must be able to do so in any order.
+Using a simple lock over this data structure makes the removal much simpler than using a lock-free data structure.
+The notification process then simply needs to wake-up the desired idle \proc, using \texttt{pthread\_cond\_signal}, \texttt{write} on an fd, etc., and the \proc will handle the rest.
+\subsection{Reducing Latency}
+As mentioned in this section, \procs going idle for extremely short periods of time is likely in certain common scenarios.
+Therefore, the latency of doing a system call to read from and writing to the event fd can actually negatively affect overall performance in a notable way.
+Is it important to reduce latency and contention of the notification as much as possible.
+Figure~\ref{fig:idle1} shoes the basic idle sleep data structure.
+For the notifiers, this data structure can cause contention on the lock and the event fd syscall can cause notable latency.
+\begin{figure}
+        \centering
+        \input{idle2.pstex_t}
+        \caption[Improved Idle Sleep Data Structure]{Improved Idle Sleep Data Structure \smallskip\newline An atomic pointer is added to the list, pointing to the Event FD of the first \proc on the list.}
+        \label{fig:idle2}
+\end{figure}
+The contention is mostly due to the lock on the list needing to be held to get to the head \proc.
+That lock can be contended by \procs attempting to go to sleep, \procs waking or notification attempts.
+The contentention from the \procs attempting to go to sleep can be mitigated slightly by using \texttt{try\_acquire} instead, so the \procs simply continue searching for \ats if the lock is held.
+This trick cannot be used for waking \procs since they are not in a state where they can run \ats.
+However, it is worth nothing that notification does not strictly require accessing the list or the head \proc.
+Therefore, contention can be reduced notably by having notifiers avoid the lock entirely and adding a pointer to the event fd of the first idle \proc, as in Figure~\ref{fig:idle2}.
+To avoid contention between the notifiers, instead of simply reading the atomic pointer, notifiers atomically exchange it to \texttt{null} so only only notifier will contend on the system call.
+\begin{figure}
+        \centering
+        \input{idle_state.pstex_t}
+        \caption[Improved Idle Sleep Data Structure]{Improved Idle Sleep Data Structure \smallskip\newline An atomic pointer is added to the list, pointing to the Event FD of the first \proc on the list.}
+        \label{fig:idle:state}
+\end{figure}
+The next optimization that can be done is to avoid the latency of the event fd when possible.
+This can be done by adding what is effectively a benaphore\cit{benaphore} in front of the event fd.
+A simple three state flag is added beside the event fd to avoid unnecessary system calls, as shown in Figure~\ref{fig:idle:state}.
+The flag starts in state \texttt{SEARCH}, while the \proc is searching for \ats to run.
+The \proc then confirms the sleep by atomically swaping the state to \texttt{SLEEP}.
+If the previous state was still \texttt{SEARCH}, then the \proc does read the event fd.
+Meanwhile, notifiers atomically exchange the state to \texttt{AWAKE} state.
+if the previous state was \texttt{SLEEP}, then the notifier must write to the event fd.
+However, if the notify arrives almost immediately after the \proc marks itself idle, then both reads and writes on the event fd can be omitted, which reduces latency notably.
+This leads to the final data structure shown in Figure~\ref{fig:idle}.
+\begin{figure}
+        \centering
+        \input{idle.pstex_t}
+        \caption[Low-latency Idle Sleep Data Structure]{Low-latency Idle Sleep Data Structure \smallskip\newline Each idle \proc is put unto a doubly-linked stack protected by a lock.
+        Each \proc has a private event FD with a benaphore in front of it.
+        The list also has an atomic pointer to the event fd and benaphore of the first \proc on the list.}
+        \label{fig:idle}
+\end{figure}

doc/theses/thierry_delisle_PhD/thesis/thesis.tex

-              r29d8c02
+              r74ec742
 \usepackage{xcolor}
 \usepackage{graphicx} % For including graphics
+\usepackage{subcaption}
 % Hyperlinks make it very easy to navigate an electronic document.
 …
 \newcommand\at{\gls{at}\xspace}%
 \newcommand\ats{\glspl{at}\xspace}%
+\newcommand\Proc{\Pls{proc}\xspace}%
 \newcommand\proc{\gls{proc}\xspace}%
 \newcommand\procs{\glspl{proc}\xspace}%

libcfa/src/Makefile.am

-              r29d8c02
+              r74ec742
 # The built sources must not depend on the installed inst_headers_src
 AM_CFAFLAGS = -quiet -cfalib -I$(srcdir)/stdhdr -I$(srcdir)/concurrency $(if $(findstring ${gdbwaittarget}, ${@}), -XCFA --gdb) @CONFIG_CFAFLAGS@
 AM_CFLAGS = -g -Wall -Werror=return-type -Wno-unused-function -fPIC -fexceptions -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@
+AM_CFLAGS = -g -Wall -Werror=return-type -Wno-unused-function -fPIC -fexceptions -fvisibility=hidden -pthread @ARCH_FLAGS@ @CONFIG_CFLAGS@
 AM_CCASFLAGS = -g -Wall -Werror=return-type -Wno-unused-function @ARCH_FLAGS@ @CONFIG_CFLAGS@
 CFACC = @CFACC@
 …
 prelude.o : prelude.cfa extras.cf gcc-builtins.cf builtins.cf @LOCAL_CFACC@ @CFACPP@
         ${AM_V_GEN}$(CFACOMPILE) -quiet -XCFA,-l ${<} -c -o ${@}
+        ${AM_V_GEN}$(CFACOMPILE) -quiet -XCFA,-l ${<} -c -fvisibility=default -o ${@}
 prelude.lo: prelude.cfa extras.cf gcc-builtins.cf builtins.cf @LOCAL_CFACC@ @CFACPP@
         ${AM_V_GEN}$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile \
         $(CFACOMPILE) -quiet -XCFA,-l ${<} -c -o ${@}
+        $(CFACOMPILE) -quiet -XCFA,-l ${<} -c -fvisibility=default -o ${@}
 concurrency/io/call.cfa: $(srcdir)/concurrency/io/call.cfa.in

libcfa/src/algorithms/range_iterator.cfa

-              r29d8c02
+              r74ec742
 #include <fstream.hfa>
+void main(RangeIter & this) {
+#include "bits/defs.hfa"
+void main(RangeIter & this) libcfa_public {
         for() {
                 this._start = -1;

libcfa/src/assert.cfa

-              r29d8c02
+              r74ec742
 #include <unistd.h>                                                             // STDERR_FILENO
 #include "bits/debug.hfa"
+#include "bits/defs.hfa"
 extern "C" {
 …
         // called by macro assert in assert.h
+        void __assert_fail( const char assertion[], const char file[], unsigned int line, const char function[] ) {
+        // would be cool to remove libcfa_public but it's needed for libcfathread
+        void __assert_fail( const char assertion[], const char file[], unsigned int line, const char function[] ) libcfa_public {
                 __cfaabi_bits_print_safe( STDERR_FILENO, CFA_ASSERT_FMT ".\n", assertion, __progname, function, line, file );
                 abort();
 …
         // called by macro assertf
+        void __assert_fail_f( const char assertion[], const char file[], unsigned int line, const char function[], const char fmt[], ... ) {
+        // would be cool to remove libcfa_public but it's needed for libcfathread
+        void __assert_fail_f( const char assertion[], const char file[], unsigned int line, const char function[], const char fmt[], ... ) libcfa_public {
                 __cfaabi_bits_acquire();
                 __cfaabi_bits_print_nolock( STDERR_FILENO, CFA_ASSERT_FMT ": ", assertion, __progname, function, line, file );

libcfa/src/bits/align.hfa

-              r29d8c02
+              r74ec742
 // Created On       : Mon Nov 28 12:27:26 2016
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Sat Nov 16 18:58:22 2019
 // Update Count     : 3
+// Last Modified On : Fri Apr 29 19:14:43 2022
+// Update Count     : 4
 //
 // This  library is free  software; you  can redistribute  it and/or  modify it
 …
 //#define libAlign() (sizeof(double))
 // gcc-7 uses xmms instructions, which require 16 byte alignment.
 #define libAlign() (16)
+#define libAlign() (__BIGGEST_ALIGNMENT__)
 // Check for power of 2

libcfa/src/bits/debug.cfa

-              r29d8c02
+              r74ec742
 #include <unistd.h>
+#include "bits/defs.hfa"
 enum { buffer_size = 4096 };
 static char buffer[ buffer_size ];
 extern "C" {
+        void __cfaabi_bits_write( int fd, const char in_buffer[], int len ) {
+        // would be cool to remove libcfa_public but it's needed for libcfathread
+        void __cfaabi_bits_write( int fd, const char in_buffer[], int len ) libcfa_public {
                 // ensure all data is written
                 for ( int count = 0, retcode; count < len; count += retcode ) {
 …
         void __cfaabi_bits_release() __attribute__((__weak__)) {}
+        int __cfaabi_bits_print_safe  ( int fd, const char fmt[], ... ) __attribute__(( format(printf, 2, 3) )) {
+        // would be cool to remove libcfa_public but it's needed for libcfathread
+        int __cfaabi_bits_print_safe  ( int fd, const char fmt[], ... ) __attribute__(( format(printf, 2, 3) )) libcfa_public {
                 va_list args;

libcfa/src/bits/defs.hfa

r29d8c02	r74ec742
36	36	#define __cfa_dlink(x) struct { struct x * next; struct x * back; } __dlink_substitute
37	37	#endif
	38
	39	#define libcfa_public __attribute__((visibility("default")))
38	40
39	41	#ifdef __cforall

libcfa/src/bits/weakso_locks.cfa

r29d8c02	r74ec742
18	18	#include "bits/weakso_locks.hfa"
19	19
	20	#pragma GCC visibility push(default)
	21
20	22	void ?{}( blocking_lock &, bool, bool ) {}
21	23	void ^?{}( blocking_lock & ) {}

libcfa/src/common.cfa

r29d8c02	r74ec742
18	18	#include <stdlib.h> // div_t, *div
19	19
	20	#pragma GCC visibility push(default)
	21
20	22	//---------------------------------------
21	23

libcfa/src/concurrency/alarm.cfa

r29d8c02	r74ec742
141	141	//=============================================================================================
142	142
143		void sleep( Duration duration ) {
	143	void sleep( Duration duration ) libcfa_public {
144	144	alarm_node_t node = { active_thread(), duration, 0`s };
145	145

libcfa/src/concurrency/clib/cfathread.cfa

-              r29d8c02
+              r74ec742
+}
+#pragma GCC visibility push(default)
 //================================================================================
 // Main Api
 extern "C" {
         int cfathread_cluster_create(cfathread_cluster_t * cl) __attribute__((nonnull(1))) {
+        int cfathread_cluster_create(cfathread_cluster_t * cl) __attribute__((nonnull(1))) libcfa_public {
                 *cl = new();
                 return 0;
+        }
         cfathread_cluster_t cfathread_cluster_self(void) {
+        cfathread_cluster_t cfathread_cluster_self(void) libcfa_public {
                 return active_cluster();
+        }
         int cfathread_cluster_print_stats( cfathread_cluster_t cl ) {
+        int cfathread_cluster_print_stats( cfathread_cluster_t cl ) libcfa_public {
                 #if !defined(__CFA_NO_STATISTICS__)
                         print_stats_at_exit( *cl, CFA_STATS_READY_Q | CFA_STATS_IO );

libcfa/src/concurrency/coroutine.cfa

-              r29d8c02
+              r74ec742
 //-----------------------------------------------------------------------------
 forall(T &)
 void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src) {
+void copy(CoroutineCancelled(T) * dst, CoroutineCancelled(T) * src) libcfa_public {
         dst->virtual_table = src->virtual_table;
         dst->the_coroutine = src->the_coroutine;
 …
 forall(T &)
 const char * msg(CoroutineCancelled(T) *) {
+const char * msg(CoroutineCancelled(T) *) libcfa_public {
         return "CoroutineCancelled(...)";
+}
 …
 forall(T & | is_coroutine(T))
 void __cfaehm_cancelled_coroutine(
                 T & cor, coroutine$ * desc, EHM_DEFAULT_VTABLE(CoroutineCancelled, (T)) ) {
+                T & cor, coroutine$ * desc, EHM_DEFAULT_VTABLE(CoroutineCancelled, (T)) ) libcfa_public {
         verify( desc->cancellation );
         desc->state = Cancelled;
 …
 void __stack_prepare( __stack_info_t * this, size_t create_size );
 void __stack_clean  ( __stack_info_t * this );
+static void __stack_clean  ( __stack_info_t * this );
 //-----------------------------------------------------------------------------
 …
+}
 void ?{}( coroutine$ & this, const char name[], void * storage, size_t storageSize ) with( this ) {
+void ?{}( coroutine$ & this, const char name[], void * storage, size_t storageSize ) libcfa_public with( this ) {
         (this.context){0p, 0p};
         (this.stack){storage, storageSize};
 …
+}
 void ^?{}(coroutine$& this) {
+void ^?{}(coroutine$& this) libcfa_public {
         if(this.state != Halted && this.state != Start && this.state != Primed) {
                 coroutine$ * src = active_coroutine();
 …
 // Not inline since only ever called once per coroutine
 forall(T & | is_coroutine(T) | { EHM_DEFAULT_VTABLE(CoroutineCancelled, (T)); })
 void prime(T& cor) {
+void prime(T& cor) libcfa_public {
         coroutine$* this = get_coroutine(cor);
         assert(this->state == Start);
 …
+}
 [void *, size_t] __stack_alloc( size_t storageSize ) {
+static [void *, size_t] __stack_alloc( size_t storageSize ) {
         const size_t stack_data_size = libCeiling( sizeof(__stack_t), 16 ); // minimum alignment
         assert(__page_size != 0l);
 …
+}
 void __stack_clean  ( __stack_info_t * this ) {
+static void __stack_clean  ( __stack_info_t * this ) {
         void * storage = this->storage->limit;
 …
+}
 void __stack_prepare( __stack_info_t * this, size_t create_size ) {
+void __stack_prepare( __stack_info_t * this, size_t create_size ) libcfa_public {
         const size_t stack_data_size = libCeiling( sizeof(__stack_t), 16 ); // minimum alignment
         bool userStack;

libcfa/src/concurrency/coroutine.hfa

r29d8c02	r74ec742
113	113
114	114	extern void __stack_prepare( __stack_info_t * this, size_t size /* ignored if storage already allocated */);
115		~~extern void __stack_clean ( __stack_info_t * this );~~
116
117	115
118	116	// Suspend implementation inlined for performance

libcfa/src/concurrency/exception.cfa

-              r29d8c02
+              r74ec742
 extern "C" {
 struct exception_context_t * this_exception_context(void) {
+struct exception_context_t * this_exception_context(void) libcfa_public {
         return &__get_stack( active_coroutine() )->exception_context;
+}
 _Unwind_Reason_Code __cfaehm_cancellation_unwind( struct _Unwind_Exception * unwind_exception ) {
+_Unwind_Reason_Code __cfaehm_cancellation_unwind( struct _Unwind_Exception * unwind_exception ) libcfa_public {
         _Unwind_Stop_Fn stop_func;
         void * stop_param;

libcfa/src/concurrency/invoke.c

-              r29d8c02
+              r74ec742
 extern void enable_interrupts( _Bool poll );
 void __cfactx_invoke_coroutine(
+libcfa_public void __cfactx_invoke_coroutine(
         void (*main)(void *),
         void *this
 …
+}
 void __cfactx_coroutine_unwind(struct _Unwind_Exception * storage, struct coroutine$ * cor) __attribute__ ((__noreturn__));
+libcfa_public void __cfactx_coroutine_unwind(struct _Unwind_Exception * storage, struct coroutine$ * cor) __attribute__ ((__noreturn__));
 void __cfactx_coroutine_unwind(struct _Unwind_Exception * storage, struct coroutine$ * cor) {
         _Unwind_Reason_Code ret = _Unwind_ForcedUnwind( storage, __cfactx_coroutine_unwindstop, cor );
 …
+}
 void __cfactx_invoke_thread(
+libcfa_public void __cfactx_invoke_thread(
         void (*main)(void *),
         void *this
 …
+}
 void __cfactx_start(
+libcfa_public void __cfactx_start(
         void (*main)(void *),
         struct coroutine$ * cor,

libcfa/src/concurrency/io.cfa

-              r29d8c02
+              r74ec742
                                         remote = true;
                                         __STATS__( false, io.calls.helped++; )
+                                        __STATS__( true, io.calls.helped++; )
+                                }
                                 proc->io.target = MAX;
 …
         // for convenience, return both the index and the pointer to the sqe
         // sqe == &sqes[idx]
         struct $io_context * cfa_io_allocate(struct io_uring_sqe * sqes[], __u32 idxs[], __u32 want) {
+        struct $io_context * cfa_io_allocate(struct io_uring_sqe * sqes[], __u32 idxs[], __u32 want) libcfa_public {
                 // __cfadbg_print_safe(io, "Kernel I/O : attempting to allocate %u\n", want);
 …
+        }
         void cfa_io_submit( struct $io_context * inctx, __u32 idxs[], __u32 have, bool lazy ) __attribute__((nonnull (1))) {
+        void cfa_io_submit( struct $io_context * inctx, __u32 idxs[], __u32 have, bool lazy ) __attribute__((nonnull (1))) libcfa_public {
                 // __cfadbg_print_safe(io, "Kernel I/O : attempting to submit %u (%s)\n", have, lazy ? "lazy" : "eager");

libcfa/src/concurrency/io/call.cfa.in

r29d8c02	r74ec742
139	139	// I/O Interface
140	140	//=============================================================================================
	141	#pragma GCC visibility push(default)
141	142	"""
142	143

libcfa/src/concurrency/io/setup.cfa

-              r29d8c02
+              r74ec742
 #if !defined(CFA_HAVE_LINUX_IO_URING_H)
         void ?{}(io_context_params & this) {}
+        void ?{}(io_context_params & this) libcfa_public {}
         void  ?{}($io_context & this, struct cluster & cl) {}
 …
 #pragma GCC diagnostic pop
         void ?{}(io_context_params & this) {
+        void ?{}(io_context_params & this) libcfa_public {
                 this.num_entries = 256;
+        }

libcfa/src/concurrency/kernel.cfa

-              r29d8c02
+              r74ec742
 // KERNEL_ONLY
 void returnToKernel() {
+static void returnToKernel() {
         /* paranoid */ verify( ! __preemption_enabled() );
         coroutine$ * proc_cor = get_coroutine(kernelTLS().this_processor->runner);
 …
+}
 void unpark( thread$ * thrd, unpark_hint hint ) {
+void unpark( thread$ * thrd, unpark_hint hint ) libcfa_public {
         if( !thrd ) return;
 …
+}
 void park( void ) {
+void park( void ) libcfa_public {
         __disable_interrupts_checked();
                 /* paranoid */ verify( kernelTLS().this_thread->preempted == __NO_PREEMPTION );
 …
 // KERNEL ONLY
 bool force_yield( __Preemption_Reason reason ) {
+bool force_yield( __Preemption_Reason reason ) libcfa_public {
         __disable_interrupts_checked();
                 thread$ * thrd = kernelTLS().this_thread;
 …
 //-----------------------------------------------------------------------------
 // Debug
 bool threading_enabled(void) __attribute__((const)) {
+bool threading_enabled(void) __attribute__((const)) libcfa_public {
         return true;
+}
 …
 // Statistics
 #if !defined(__CFA_NO_STATISTICS__)
         void print_halts( processor & this ) {
+        void print_halts( processor & this ) libcfa_public {
                 this.print_halts = true;
+        }
 …
+        }
         void crawl_cluster_stats( cluster & this ) {
+        static void crawl_cluster_stats( cluster & this ) {
                 // Stop the world, otherwise stats could get really messed-up
                 // this doesn't solve all problems but does solve many
 …
         void print_stats_now( cluster & this, int flags ) {
+        void print_stats_now( cluster & this, int flags ) libcfa_public {
                 crawl_cluster_stats( this );
                 __print_stats( this.stats, flags, "Cluster", this.name, (void*)&this );

libcfa/src/concurrency/kernel.hfa

-              r29d8c02
+              r74ec742
 // Coroutine used py processors for the 2-step context switch
+coroutine processorCtx_t {
+struct processorCtx_t {
+        struct coroutine$ self;
         struct processor * proc;
 };

libcfa/src/concurrency/kernel/cluster.cfa

r29d8c02	r74ec742
49	49
50	50	// returns the maximum number of processors the RWLock support
51		__attribute__((weak)) unsigned __max_processors() {
	51	__attribute__((weak)) unsigned __max_processors() libcfa_public {
52	52	const char * max_cores_s = getenv("CFA_MAX_PROCESSORS");
53	53	if(!max_cores_s) {

libcfa/src/concurrency/kernel/private.hfa

-              r29d8c02
+              r74ec742
 //-----------------------------------------------------------------------------
 // Processor
+void main(processorCtx_t *);
+void main(processorCtx_t &);
+static inline coroutine$* get_coroutine(processorCtx_t & this) { return &this.self; }
 void * __create_pthread( pthread_t *, void * (*)(void *), void * );

libcfa/src/concurrency/kernel/startup.cfa

-              r29d8c02
+              r74ec742
 #endif
 cluster              * mainCluster;
+cluster              * mainCluster libcfa_public;
 processor            * mainProcessor;
 thread$              * mainThread;
 …
 };
 void ?{}( current_stack_info_t & this ) {
+static void ?{}( current_stack_info_t & this ) {
         __stack_context_t ctx;
         CtxGet( ctx );
 …
         // Construct the processor context of the main processor
         void ?{}(processorCtx_t & this, processor * proc) {
                 (this.__cor){ "Processor" };
                 this.__cor.starter = 0p;
+                (this.self){ "Processor" };
+                this.self.starter = 0p;
                 this.proc = proc;
+        }
 …
 // Construct the processor context of non-main processors
 static void ?{}(processorCtx_t & this, processor * proc, current_stack_info_t * info) {
         (this.__cor){ info };
+        (this.self){ info };
         this.proc = proc;
+}
 …
+}
 void ?{}(processor & this, const char name[], cluster & _cltr, thread$ * initT) {
+void ?{}(processor & this, const char name[], cluster & _cltr, thread$ * initT) libcfa_public {
         ( this.terminated ){};
         ( this.runner ){};
 …
+}
 void ?{}(processor & this, const char name[], cluster & _cltr) {
+void ?{}(processor & this, const char name[], cluster & _cltr) libcfa_public {
         (this){name, _cltr, 0p};
+}
 extern size_t __page_size;
 void ^?{}(processor & this) with( this ){
+void ^?{}(processor & this) libcfa_public with( this ) {
         /* paranoid */ verify( !__atomic_load_n(&do_terminate, __ATOMIC_ACQUIRE) );
         __cfadbg_print_safe(runtime_core, "Kernel : core %p signaling termination\n", &this);
 …
+}
 void ?{}(cluster & this, const char name[], Duration preemption_rate, unsigned num_io, const io_context_params & io_params) with( this ) {
+void ?{}(cluster & this, const char name[], Duration preemption_rate, unsigned num_io, const io_context_params & io_params) libcfa_public with( this ) {
         this.name = name;
         this.preemption_rate = preemption_rate;
 …
+}
 void ^?{}(cluster & this) {
+void ^?{}(cluster & this) libcfa_public {
         destroy(this.io.arbiter);

libcfa/src/concurrency/locks.cfa

-              r29d8c02
+              r74ec742
 #include <stdlib.hfa>
+#pragma GCC visibility push(default)
 //-----------------------------------------------------------------------------
 // info_thread
 …
+}
 void pop_and_set_new_owner( blocking_lock & this ) with( this ) {
+static void pop_and_set_new_owner( blocking_lock & this ) with( this ) {
         thread$ * t = &try_pop_front( blocked_threads );
         owner = t;
 …
         void ^?{}( alarm_node_wrap(L) & this ) { }
         void timeout_handler ( alarm_node_wrap(L) & this ) with( this ) {
+        static void timeout_handler ( alarm_node_wrap(L) & this ) with( this ) {
                 // This condition_variable member is called from the kernel, and therefore, cannot block, but it can spin.
                 lock( cond->lock __cfaabi_dbg_ctx2 );
 …
         // this casts the alarm node to our wrapped type since we used type erasure
         void alarm_node_wrap_cast( alarm_node_t & a ) { timeout_handler( (alarm_node_wrap(L) &)a ); }
+        static void alarm_node_wrap_cast( alarm_node_t & a ) { timeout_handler( (alarm_node_wrap(L) &)a ); }
+}
 //-----------------------------------------------------------------------------
 // condition variable
+// Synchronization Locks
 forall(L & | is_blocking_lock(L)) {
+        //-----------------------------------------------------------------------------
+        // condition variable
         void ?{}( condition_variable(L) & this ){
                 this.lock{};
 …
         void ^?{}( condition_variable(L) & this ){ }
         void process_popped( condition_variable(L) & this, info_thread(L) & popped ) with( this ) {
+        static void process_popped( condition_variable(L) & this, info_thread(L) & popped ) with( this ) {
                 if(&popped != 0p) {
                         popped.signalled = true;
 …
         int counter( condition_variable(L) & this ) with(this) { return count; }
         size_t queue_and_get_recursion( condition_variable(L) & this, info_thread(L) * i ) with(this) {
+        static size_t queue_and_get_recursion( condition_variable(L) & this, info_thread(L) * i ) with(this) {
                 // add info_thread to waiting queue
                 insert_last( blocked_threads, *i );
 …
         // helper for wait()'s' with no timeout
         void queue_info_thread( condition_variable(L) & this, info_thread(L) & i ) with(this) {
+        static void queue_info_thread( condition_variable(L) & this, info_thread(L) & i ) with(this) {
                 lock( lock __cfaabi_dbg_ctx2 );
                 size_t recursion_count = queue_and_get_recursion(this, &i);
 …
         // helper for wait()'s' with a timeout
         void queue_info_thread_timeout( condition_variable(L) & this, info_thread(L) & info, Duration t, Alarm_Callback callback ) with(this) {
+        static void queue_info_thread_timeout( condition_variable(L) & this, info_thread(L) & info, Duration t, Alarm_Callback callback ) with(this) {
                 lock( lock __cfaabi_dbg_ctx2 );
                 size_t recursion_count = queue_and_get_recursion(this, &info);
 …
         bool wait( condition_variable(L) & this, L & l, Duration duration                 ) with(this) { WAIT_TIME( 0   , &l , duration ) }
         bool wait( condition_variable(L) & this, L & l, uintptr_t info, Duration duration ) with(this) { WAIT_TIME( info, &l , duration ) }
+        //-----------------------------------------------------------------------------
+        // fast_cond_var
+        void  ?{}( fast_cond_var(L) & this ){
+                this.blocked_threads{};
+                #ifdef __CFA_DEBUG__
+                this.lock_used = 0p;
+                #endif
+        }
+        void ^?{}( fast_cond_var(L) & this ){ }
+        bool notify_one( fast_cond_var(L) & this ) with(this) {
+                bool ret = ! blocked_threads`isEmpty;
+                if ( ret ) {
+                        info_thread(L) & popped = try_pop_front( blocked_threads );
+                        on_notify(*popped.lock, popped.t);
+                }
+                return ret;
+        }
+        bool notify_all( fast_cond_var(L) & this ) with(this) {
+                bool ret = ! blocked_threads`isEmpty;
+                while( ! blocked_threads`isEmpty ) {
+                        info_thread(L) & popped = try_pop_front( blocked_threads );
+                        on_notify(*popped.lock, popped.t);
+                }
+                return ret;
+        }
+        uintptr_t front( fast_cond_var(L) & this ) with(this) { return blocked_threads`isEmpty ? NULL : blocked_threads`first.info; }
+        bool empty ( fast_cond_var(L) & this ) with(this) { return blocked_threads`isEmpty; }
+        void wait( fast_cond_var(L) & this, L & l ) {
+                wait( this, l, 0 );
+        }
+        void wait( fast_cond_var(L) & this, L & l, uintptr_t info ) with(this) {
+                // brand cond lock with lock
+                #ifdef __CFA_DEBUG__
+                        if ( lock_used == 0p ) lock_used = &l;
+                        else { assert(lock_used == &l); }
+                #endif
+                info_thread( L ) i = { active_thread(), info, &l };
+                insert_last( blocked_threads, i );
+                size_t recursion_count = on_wait( *i.lock );
+                park( );
+                on_wakeup(*i.lock, recursion_count);
+        }
+}

libcfa/src/concurrency/locks.hfa

-              r29d8c02
+              r74ec742
 static inline void   on_notify( owner_lock & this, struct thread$ * t ) { on_notify( (blocking_lock &)this, t ); }
+//-----------------------------------------------------------------------------
+// MCS Lock
 struct mcs_node {
         mcs_node * volatile next;
 …
+}
+//-----------------------------------------------------------------------------
+// Linear backoff Spinlock
 struct linear_backoff_then_block_lock {
         // Spin lock used for mutual exclusion
 …
 //-----------------------------------------------------------------------------
+// Fast Block Lock
+// High efficiency minimal blocking lock
+// - No reacquire for cond var
+// - No recursive acquisition
+// - No ownership
+struct fast_block_lock {
+        // Spin lock used for mutual exclusion
+        __spinlock_t lock;
+        // List of blocked threads
+        dlist( thread$ ) blocked_threads;
+        bool held:1;
+};
+static inline void  ?{}( fast_block_lock & this ) with(this) {
+        lock{};
+        blocked_threads{};
+        held = false;
+}
+static inline void ^?{}( fast_block_lock & this ) {}
+static inline void ?{}( fast_block_lock & this, fast_block_lock this2 ) = void;
+static inline void ?=?( fast_block_lock & this, fast_block_lock this2 ) = void;
+// if this is called recursively IT WILL DEADLOCK!!!!!
+static inline void lock(fast_block_lock & this) with(this) {
+        lock( lock __cfaabi_dbg_ctx2 );
+        if (held) {
+                insert_last( blocked_threads, *active_thread() );
+                unlock( lock );
+                park( );
+                return;
+        }
+        held = true;
+        unlock( lock );
+}
+static inline void unlock(fast_block_lock & this) with(this) {
+        lock( lock __cfaabi_dbg_ctx2 );
+        /* paranoid */ verifyf( held != false, "Attempt to release lock %p that isn't held", &this );
+        thread$ * t = &try_pop_front( blocked_threads );
+        held = ( t ? true : false );
+        unpark( t );
+        unlock( lock );
+}
+static inline void on_notify(fast_block_lock & this, struct thread$ * t ) { unpark(t); }
+static inline size_t on_wait(fast_block_lock & this) { unlock(this); return 0; }
+static inline void on_wakeup(fast_block_lock & this, size_t recursion ) { }
+//-----------------------------------------------------------------------------
 // is_blocking_lock
 trait is_blocking_lock(L & | sized(L)) {
 …
 // Synchronization Locks
 forall(L & | is_blocking_lock(L)) {
+        //-----------------------------------------------------------------------------
+        // condition_variable
+        // The multi-tool condition variable
+        // - can pass timeouts to wait for either a signal or timeout
+        // - can wait without passing a lock
+        // - can have waiters reacquire different locks while waiting on the same cond var
+        // - has shadow queue
+        // - can be signalled outside of critical sections with no locks held
         struct condition_variable {
                 // Spin lock used for mutual exclusion
 …
         bool wait( condition_variable(L) & this, L & l, Duration duration );
         bool wait( condition_variable(L) & this, L & l, uintptr_t info, Duration duration );
+}
+        //-----------------------------------------------------------------------------
+        // fast_cond_var
+        // The trimmed and slim condition variable
+        // - no internal lock so you must hold a lock while using this cond var
+        // - signalling without holding branded lock is UNSAFE!
+        // - only allows usage of one lock, cond var is branded after usage
+        struct fast_cond_var {
+                // List of blocked threads
+                dlist( info_thread(L) ) blocked_threads;
+                #ifdef __CFA_DEBUG__
+                L * lock_used;
+                #endif
+        };
+        void  ?{}( fast_cond_var(L) & this );
+        void ^?{}( fast_cond_var(L) & this );
+        bool notify_one( fast_cond_var(L) & this );
+        bool notify_all( fast_cond_var(L) & this );
+        uintptr_t front( fast_cond_var(L) & this );
+        bool empty  ( fast_cond_var(L) & this );
+        void wait( fast_cond_var(L) & this, L & l );
+        void wait( fast_cond_var(L) & this, L & l, uintptr_t info );
+}

libcfa/src/concurrency/monitor.cfa

-              r29d8c02
+              r74ec742
 static inline void restore( monitor$ * ctx [], __lock_size_t count, __spinlock_t * locks [], unsigned int /*in */ recursions [], __waitfor_mask_t /*in */ masks [] );
+static inline void ?{}(__condition_node_t & this, thread$ * waiting_thread, __lock_size_t count, uintptr_t user_info );
+static inline void ?{}(__condition_criterion_t & this );
+static inline void ?{}(__condition_criterion_t & this, monitor$ * target, __condition_node_t * owner );
 static inline void init     ( __lock_size_t count, monitor$ * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] );
 static inline void init_push( __lock_size_t count, monitor$ * monitors [], __condition_node_t & waiter, __condition_criterion_t criteria [] );
 …
 // Leave single monitor
 void __leave( monitor$ * this ) {
+static void __leave( monitor$ * this ) {
         // Lock the monitor spinlock
         lock( this->lock __cfaabi_dbg_ctx2 );
 …
 // Leave single monitor for the last time
 void __dtor_leave( monitor$ * this, bool join ) {
+static void __dtor_leave( monitor$ * this, bool join ) {
         __cfaabi_dbg_debug_do(
                 if( active_thread() != this->owner ) {
 …
 // Ctor for monitor guard
 // Sorts monitors before entering
 void ?{}( monitor_guard_t & this, monitor$ * m [], __lock_size_t count, fptr_t func ) {
+void ?{}( monitor_guard_t & this, monitor$ * m [], __lock_size_t count, fptr_t func ) libcfa_public {
         thread$ * thrd = active_thread();
 …
+}
 void ?{}( monitor_guard_t & this, monitor$ * m [], __lock_size_t count ) {
+void ?{}( monitor_guard_t & this, monitor$ * m [], __lock_size_t count ) libcfa_public {
         this{ m, count, 0p };
+}
 …
 // Dtor for monitor guard
 void ^?{}( monitor_guard_t & this ) {
+void ^?{}( monitor_guard_t & this ) libcfa_public {
         // __cfaabi_dbg_print_safe( "MGUARD : leaving %d\n", this.count);
 …
 // Ctor for monitor guard
 // Sorts monitors before entering
 void ?{}( monitor_dtor_guard_t & this, monitor$ * m [], fptr_t func, bool join ) {
+void ?{}( monitor_dtor_guard_t & this, monitor$ * m [], fptr_t func, bool join ) libcfa_public {
         // optimization
         thread$ * thrd = active_thread();
 …
 // Dtor for monitor guard
 void ^?{}( monitor_dtor_guard_t & this ) {
+void ^?{}( monitor_dtor_guard_t & this ) libcfa_public {
         // Leave the monitors in order
         __dtor_leave( this.m, this.join );
 …
 //-----------------------------------------------------------------------------
 // Internal scheduling types
 void ?{}(__condition_node_t & this, thread$ * waiting_thread, __lock_size_t count, uintptr_t user_info ) {
+static void ?{}(__condition_node_t & this, thread$ * waiting_thread, __lock_size_t count, uintptr_t user_info ) {
         this.waiting_thread = waiting_thread;
         this.count = count;
 …
+}
 void ?{}(__condition_criterion_t & this ) with( this ) {
+static void ?{}(__condition_criterion_t & this ) with( this ) {
         ready  = false;
         target = 0p;
 …
+}
 void ?{}(__condition_criterion_t & this, monitor$ * target, __condition_node_t & owner ) {
+static void ?{}(__condition_criterion_t & this, monitor$ * target, __condition_node_t & owner ) {
         this.ready  = false;
         this.target = target;
 …
 //-----------------------------------------------------------------------------
 // Internal scheduling
 void wait( condition & this, uintptr_t user_info = 0 ) {
+void wait( condition & this, uintptr_t user_info = 0 ) libcfa_public {
         brand_condition( this );
 …
+}
 bool signal( condition & this ) {
+bool signal( condition & this ) libcfa_public {
         if( is_empty( this ) ) { return false; }
 …
+}
 bool signal_block( condition & this ) {
+bool signal_block( condition & this ) libcfa_public {
         if( !this.blocked.head ) { return false; }
 …
 // Access the user_info of the thread waiting at the front of the queue
 uintptr_t front( condition & this ) {
+uintptr_t front( condition & this ) libcfa_public {
         verifyf( !is_empty(this),
                 "Attempt to access user data on an empty condition.\n"
 …
 //              setup mask
 //              block
 void __waitfor_internal( const __waitfor_mask_t & mask, int duration ) {
+void __waitfor_internal( const __waitfor_mask_t & mask, int duration ) libcfa_public {
         // This statment doesn't have a contiguous list of monitors...
         // Create one!
 …
 // Can't be accepted since a mutex stmt is effectively an anonymous routine
 // Thus we do not need a monitor group
 void lock( monitor$ * this ) {
+void lock( monitor$ * this ) libcfa_public {
         thread$ * thrd = active_thread();
 …
 // Leave routine for mutex stmt
 // Is just a wrapper around __leave for the is_lock trait to see
 void unlock( monitor$ * this ) { __leave( this ); }
+void unlock( monitor$ * this ) libcfa_public { __leave( this ); }
 // Local Variables: //

libcfa/src/concurrency/monitor.hfa

-              r29d8c02
+              r74ec742
+}
 void ?{}(__condition_node_t & this, thread$ * waiting_thread, __lock_size_t count, uintptr_t user_info );
 void ?{}(__condition_criterion_t & this );
 void ?{}(__condition_criterion_t & this, monitor$ * target, __condition_node_t * owner );
+// void ?{}(__condition_node_t & this, thread$ * waiting_thread, __lock_size_t count, uintptr_t user_info );
+// void ?{}(__condition_criterion_t & this );
+// void ?{}(__condition_criterion_t & this, monitor$ * target, __condition_node_t * owner );
 struct condition {

libcfa/src/concurrency/preemption.cfa

-              r29d8c02
+              r74ec742
 #endif
 __attribute__((weak)) Duration default_preemption() {
+__attribute__((weak)) Duration default_preemption() libcfa_public {
         const char * preempt_rate_s = getenv("CFA_DEFAULT_PREEMPTION");
         if(!preempt_rate_s) {
 …
 //----------
 // special case for preemption since used often
 __attribute__((optimize("no-reorder-blocks"))) bool __preemption_enabled() {
+__attribute__((optimize("no-reorder-blocks"))) bool __preemption_enabled() libcfa_public {
         // create a assembler label before
         // marked as clobber all to avoid movement
 …
 // Get data from the TLS block
 // struct asm_region __cfaasm_get;
 uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__)); //no inline to avoid problems
+uintptr_t __cfatls_get( unsigned long int offset ) __attribute__((__noinline__, visibility("default"))); //no inline to avoid problems
 uintptr_t __cfatls_get( unsigned long int offset ) {
         // create a assembler label before
 …
 extern "C" {
         // Disable interrupts by incrementing the counter
         void disable_interrupts() {
+        __attribute__((__noinline__, visibility("default"))) void disable_interrupts() libcfa_public {
                 // create a assembler label before
                 // marked as clobber all to avoid movement
 …
         // Enable interrupts by decrementing the counter
         // If counter reaches 0, execute any pending __cfactx_switch
         void enable_interrupts( bool poll ) {
+        void enable_interrupts( bool poll ) libcfa_public {
                 // Cache the processor now since interrupts can start happening after the atomic store
                 processor   * proc = __cfaabi_tls.this_processor;
 …
 //-----------------------------------------------------------------------------
 // Kernel Signal Debug
 void __cfaabi_check_preemption() {
+void __cfaabi_check_preemption() libcfa_public {
         bool ready = __preemption_enabled();
         if(!ready) { abort("Preemption should be ready"); }

libcfa/src/concurrency/ready_subqueue.hfa

-              r29d8c02
+              r74ec742
         /* paranoid */ verify( node->link.ts   != 0  );
         /* paranoid */ verify( this.anchor.ts  != 0  );
+        /* paranoid */ verify( (this.anchor.ts  == MAX) == is_empty );
         return [node, this.anchor.ts];
+}
 …
 // Return the timestamp
 static inline unsigned long long ts(__intrusive_lane_t & this) {
         // Cannot verify here since it may not be locked
+        // Cannot verify 'emptiness' here since it may not be locked
         /* paranoid */ verify(this.anchor.ts != 0);
         return this.anchor.ts;

libcfa/src/concurrency/thread.cfa

r29d8c02	r74ec742
26	26
27	27	extern uint32_t __global_random_seed, __global_random_prime, __global_random_mask;
	28
	29	#pragma GCC visibility push(default)
28	30
29	31	//-----------------------------------------------------------------------------

libcfa/src/containers/maybe.cfa

r29d8c02	r74ec742
17	17	#include <assert.h>
18	18
	19	#pragma GCC visibility push(default)
19	20
20	21	forall(T)

libcfa/src/containers/result.cfa

r29d8c02	r74ec742
17	17	#include <assert.h>
18	18
	19	#pragma GCC visibility push(default)
19	20
20	21	forall(T, E)

libcfa/src/containers/string.cfa

r29d8c02	r74ec742
18	18	#include <stdlib.hfa>
19	19
	20	#pragma GCC visibility push(default)
20	21
21	22	/*

libcfa/src/containers/string_sharectx.hfa

-              r29d8c02
+              r74ec742
 #pragma once
+#pragma GCC visibility push(default)
 //######################### String Sharing Context #########################
 struct VbyteHeap;
 // A string_sharectx
+// A string_sharectx
 //
 // Usage:

libcfa/src/containers/vector.cfa

-              r29d8c02
+              r74ec742
 #include <stdlib.hfa>
+#pragma GCC visibility push(default)
 forall(T, allocator_t | allocator_c(T, allocator_t))
 void copy_internal(vector(T, allocator_t)* this, vector(T, allocator_t)* other);
+static void copy_internal(vector(T, allocator_t)* this, vector(T, allocator_t)* other);
 //------------------------------------------------------------------------------
 …
 forall(T, allocator_t | allocator_c(T, allocator_t))
 void copy_internal(vector(T, allocator_t)* this, vector(T, allocator_t)* other)
+static void copy_internal(vector(T, allocator_t)* this, vector(T, allocator_t)* other)
+{
         this->size = other->size;

libcfa/src/device/cpu.cfa

r29d8c02	r74ec742
31	31	}
32	32
	33	#include "bits/defs.hfa"
33	34	#include "algorithms/range_iterator.hfa"
34	35
…	…
456	457	}
457	458
458		cpu_info_t cpu_info;
	459	libcfa_public cpu_info_t cpu_info;

libcfa/src/exception.c

-              r29d8c02
+              r74ec742
 #include "stdhdr/assert.h"
 #include "virtual.h"
+#pragma GCC visibility push(default)
 #include "lsda.h"
 …
 #else // defined( __ARM_ARCH )
         // The return code from _Unwind_RaiseException seems to be corrupt on ARM at end of stack.
         // This workaround tries to keep default exception handling working.
+        // This workaround tries to keep default exception handling working.
         if ( ret == _URC_FATAL_PHASE1_ERROR || ret == _URC_FATAL_PHASE2_ERROR ) {
 #endif

libcfa/src/fstream.cfa

-              r29d8c02
+              r74ec742
 #include <assert.h>
 #include <errno.h>                                                                              // errno
+#pragma GCC visibility push(default)
 // *********************************** ofstream ***********************************
 …
                 // abort | IO_MSG "open output file \"" | name | "\"" | nl | strerror( errno );
         } // if
         (os){ file };                                                                           // initialize
+        (os){ file };                                                                           // initialize
 } // open
 …
         va_list args;
         va_start( args, format );
         int len;
     for ( cnt; 10 ) {
 …
                 // abort | IO_MSG "open input file \"" | name | "\"" | nl | strerror( errno );
         } // if
         (is){ file };                                                                           // initialize
+        (is){ file };                                                                           // initialize
 } // open

libcfa/src/fstream.hfa

r29d8c02	r74ec742
18	18	#include "bits/weakso_locks.hfa" // mutex_lock
19	19	#include "iostream.hfa"
20		~~#include <exception.hfa>~~
21	20
22	21

libcfa/src/heap.cfa

-              r29d8c02
+              r74ec742
 // Created On       : Tue Dec 19 21:58:35 2017
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Mon Apr 25 18:51:36 2022
 // Update Count     : 1147
+// Last Modified On : Fri Apr 29 19:05:03 2022
+// Update Count     : 1167
 //
 …
 static bool traceHeap = false;
 inline bool traceHeap() { return traceHeap; }
 bool traceHeapOn() {
+inline bool traceHeap() libcfa_public { return traceHeap; }
+bool traceHeapOn() libcfa_public {
         bool temp = traceHeap;
         traceHeap = true;
 …
 } // traceHeapOn
 bool traceHeapOff() {
+bool traceHeapOff() libcfa_public {
         bool temp = traceHeap;
         traceHeap = false;
 …
 } // traceHeapOff
 bool traceHeapTerm() { return false; }
+bool traceHeapTerm() libcfa_public { return false; }
 static bool prtFree = false;
 bool prtFree() {
+static bool prtFree() {
         return prtFree;
 } // prtFree
 bool prtFreeOn() {
+static bool prtFreeOn() {
         bool temp = prtFree;
         prtFree = true;
 …
 } // prtFreeOn
 bool prtFreeOff() {
+static bool prtFreeOff() {
         bool temp = prtFree;
         prtFree = false;
 …
+#ifdef __CFA_DEBUG__
+static size_t allocUnfreed;                                                             // running total of allocations minus frees
+static void prtUnfreed() {
+        if ( allocUnfreed != 0 ) {
+                // DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
+                char helpText[512];
+                int len = snprintf( helpText, sizeof(helpText), "CFA warning (UNIX pid:%ld) : program terminating with %zu(0x%zx) bytes of storage allocated but not freed.\n"
+                                                        "Possible cause is unfreed storage allocated by the program or system/library routines called from the program.\n",
+                                                        (long int)getpid(), allocUnfreed, allocUnfreed ); // always print the UNIX pid
+                __cfaabi_bits_write( STDERR_FILENO, helpText, len ); // print debug/nodebug
+        } // if
+} // prtUnfreed
+extern int cfa_main_returned;                                                   // from interpose.cfa
+extern "C" {
+        void heapAppStart() {                                                           // called by __cfaabi_appready_startup
+                allocUnfreed = 0;
+        } // heapAppStart
+        void heapAppStop() {                                                            // called by __cfaabi_appready_startdown
+                fclose( stdin ); fclose( stdout );
+                if ( cfa_main_returned ) prtUnfreed();                  // do not check unfreed storage if exit called
+        } // heapAppStop
+} // extern "C"
+#endif // __CFA_DEBUG__
+// statically allocated variables => zero filled.
+static size_t heapExpand;                                                               // sbrk advance
+static size_t mmapStart;                                                                // cross over point for mmap
+static unsigned int maxBucketsUsed;                                             // maximum number of buckets in use
+// extern visibility, used by runtime kernel
+size_t __page_size;                                                                             // architecture pagesize
+int __map_prot;                                                                                 // common mmap/mprotect protection
+#define SPINLOCK 0
+#define LOCKFREE 1
+#define BUCKETLOCK SPINLOCK
+#if BUCKETLOCK == SPINLOCK
+#elif BUCKETLOCK == LOCKFREE
+#include <stackLockFree.hfa>
+#else
+        #error undefined lock type for bucket lock
+#endif // LOCKFREE
+// Recursive definitions: HeapManager needs size of bucket array and bucket area needs sizeof HeapManager storage.
+// Break recursion by hardcoding number of buckets and statically checking number is correct after bucket array defined.
+enum { NoBucketSizes = 91 };                                                    // number of buckets sizes
+struct Heap {
+        struct Storage {
+                struct Header {                                                                 // header
+                        union Kind {
+                                struct RealHeader {
+                                        union {
+                                                struct {                                                // 4-byte word => 8-byte header, 8-byte word => 16-byte header
+                                                        union {
+                                                                // 2nd low-order bit => zero filled, 3rd low-order bit => mmapped
+                                                                // FreeHeader * home;           // allocated block points back to home locations (must overlay alignment)
+                                                                void * home;                    // allocated block points back to home locations (must overlay alignment)
+                                                                size_t blockSize;               // size for munmap (must overlay alignment)
+                                                                #if BUCKETLOCK == SPINLOCK
+                                                                Storage * next;                 // freed block points to next freed block of same size
+                                                                #endif // SPINLOCK
+                                                        };
+                                                        size_t size;                            // allocation size in bytes
+                                                };
+                                                #if BUCKETLOCK == LOCKFREE
+                                                Link(Storage) next;                             // freed block points next freed block of same size (double-wide)
+                                                #endif // LOCKFREE
+                                        };
+                                } real; // RealHeader
+                                struct FakeHeader {
+                                        uintptr_t alignment;                            // 1st low-order bit => fake header & alignment
+                                        uintptr_t offset;
+                                } fake; // FakeHeader
+                        } kind; // Kind
+                } header; // Header
+                char pad[libAlign() - sizeof( Header )];
+                char data[0];                                                                   // storage
+        }; // Storage
+        static_assert( libAlign() >= sizeof( Storage ), "minimum alignment < sizeof( Storage )" );
+        struct FreeHeader {
+                #if BUCKETLOCK == SPINLOCK
+                __spinlock_t lock;                                                              // must be first field for alignment
+                Storage * freeList;
+                #else
+                StackLF(Storage) freeList;
+                #endif // BUCKETLOCK
+                size_t blockSize;                                                               // size of allocations on this list
+        }; // FreeHeader
+        // must be first fields for alignment
+        __spinlock_t extlock;                                                           // protects allocation-buffer extension
+        FreeHeader freeLists[NoBucketSizes];                            // buckets for different allocation sizes
+        void * heapBegin;                                                                       // start of heap
+        void * heapEnd;                                                                         // logical end of heap
+        size_t heapRemaining;                                                           // amount of storage not allocated in the current chunk
+}; // Heap
+#if BUCKETLOCK == LOCKFREE
+static inline {
+        Link(Heap.Storage) * ?`next( Heap.Storage * this ) { return &this->header.kind.real.next; }
+        void ?{}( Heap.FreeHeader & ) {}
+        void ^?{}( Heap.FreeHeader & ) {}
+} // distribution
+#endif // LOCKFREE
+static inline size_t getKey( const Heap.FreeHeader & freeheader ) { return freeheader.blockSize; }
+#ifdef FASTLOOKUP
+enum { LookupSizes = 65_536 + sizeof(Heap.Storage) }; // number of fast lookup sizes
+static unsigned char lookup[LookupSizes];                               // O(1) lookup for small sizes
+#endif // FASTLOOKUP
+static const off_t mmapFd = -1;                                                 // fake or actual fd for anonymous file
+#ifdef __CFA_DEBUG__
+static bool heapBoot = 0;                                                               // detect recursion during boot
+#endif // __CFA_DEBUG__
+// Size of array must harmonize with NoBucketSizes and individual bucket sizes must be multiple of 16.
+// Smaller multiples of 16 and powers of 2 are common allocation sizes, so make them generate the minimum required bucket size.
+// malloc(0) returns 0p, so no bucket is necessary for 0 bytes returning an address that can be freed.
+static const unsigned int bucketSizes[] @= {                    // different bucket sizes
++ sizeof(Heap.Storage), 32 + sizeof(Heap.Storage), 48 + sizeof(Heap.Storage), 64 + sizeof(Heap.Storage), // 4
++ sizeof(Heap.Storage), 112 + sizeof(Heap.Storage), 128 + sizeof(Heap.Storage), // 3
+, 192, 224, 256 + sizeof(Heap.Storage), // 4
+, 384, 448, 512 + sizeof(Heap.Storage), // 4
+, 768, 896, 1_024 + sizeof(Heap.Storage), // 4
+_536, 2_048 + sizeof(Heap.Storage), // 2
+_560, 3_072, 3_584, 4_096 + sizeof(Heap.Storage), // 4
+_144, 8_192 + sizeof(Heap.Storage), // 2
+_216, 10_240, 11_264, 12_288, 13_312, 14_336, 15_360, 16_384 + sizeof(Heap.Storage), // 8
+_432, 20_480, 22_528, 24_576, 26_624, 28_672, 30_720, 32_768 + sizeof(Heap.Storage), // 8
+_864, 40_960, 45_056, 49_152, 53_248, 57_344, 61_440, 65_536 + sizeof(Heap.Storage), // 8
+_728, 81_920, 90_112, 98_304, 106_496, 114_688, 122_880, 131_072 + sizeof(Heap.Storage), // 8
+_456, 163_840, 180_224, 196_608, 212_992, 229_376, 245_760, 262_144 + sizeof(Heap.Storage), // 8
+_912, 327_680, 360_448, 393_216, 425_984, 458_752, 491_520, 524_288 + sizeof(Heap.Storage), // 8
+_360, 786_432, 917_504, 1_048_576 + sizeof(Heap.Storage), // 4
+_179_648, 1_310_720, 1_441_792, 1_572_864, 1_703_936, 1_835_008, 1_966_080, 2_097_152 + sizeof(Heap.Storage), // 8
+_621_440, 3_145_728, 3_670_016, 4_194_304 + sizeof(Heap.Storage), // 4
+};
+static_assert( NoBucketSizes == sizeof(bucketSizes) / sizeof(bucketSizes[0] ), "size of bucket array wrong" );
+// The constructor for heapManager is called explicitly in memory_startup.
+static Heap heapManager __attribute__(( aligned (128) )) @= {}; // size of cache line to prevent false sharing
+//####################### Memory Allocation Routines Helpers ####################
+//####################### Heap Statistics ####################
 …
         return lhs;
 } // ?+=?
+#endif // __STATISTICS__
+#define SPINLOCK 0
+#define LOCKFREE 1
+#define BUCKETLOCK SPINLOCK
+#if BUCKETLOCK == SPINLOCK
+#elif BUCKETLOCK == LOCKFREE
+#include <stackLockFree.hfa>
+#else
+        #error undefined lock type for bucket lock
+#endif // LOCKFREE
+// Recursive definitions: HeapManager needs size of bucket array and bucket area needs sizeof HeapManager storage.
+// Break recursion by hardcoding number of buckets and statically checking number is correct after bucket array defined.
+enum { NoBucketSizes = 91 };                                                    // number of buckets sizes
+struct Heap {
+        struct Storage {
+                struct Header {                                                                 // header
+                        union Kind {
+                                struct RealHeader {
+                                        union {
+                                                struct {                                                // 4-byte word => 8-byte header, 8-byte word => 16-byte header
+                                                        union {
+                                                                // 2nd low-order bit => zero filled, 3rd low-order bit => mmapped
+                                                                // FreeHeader * home;           // allocated block points back to home locations (must overlay alignment)
+                                                                void * home;                    // allocated block points back to home locations (must overlay alignment)
+                                                                size_t blockSize;               // size for munmap (must overlay alignment)
+                                                                #if BUCKETLOCK == SPINLOCK
+                                                                Storage * next;                 // freed block points to next freed block of same size
+                                                                #endif // SPINLOCK
+                                                        };
+                                                        size_t size;                            // allocation size in bytes
+                                                };
+                                                #if BUCKETLOCK == LOCKFREE
+                                                Link(Storage) next;                             // freed block points next freed block of same size (double-wide)
+                                                #endif // LOCKFREE
+                                        };
+                                } real; // RealHeader
+                                struct FakeHeader {
+                                        uintptr_t alignment;                            // 1st low-order bit => fake header & alignment
+                                        uintptr_t offset;
+                                } fake; // FakeHeader
+                        } kind; // Kind
+                } header; // Header
+                char pad[libAlign() - sizeof( Header )];
+                char data[0];                                                                   // storage
+        }; // Storage
+        static_assert( libAlign() >= sizeof( Storage ), "minimum alignment < sizeof( Storage )" );
+        struct FreeHeader {
+                size_t blockSize __attribute__(( aligned (8) )); // size of allocations on this list
+                #if BUCKETLOCK == SPINLOCK
+                __spinlock_t lock;
+                Storage * freeList;
+                #else
+                StackLF(Storage) freeList;
+                #endif // BUCKETLOCK
+        } __attribute__(( aligned (8) )); // FreeHeader
+        FreeHeader freeLists[NoBucketSizes];                            // buckets for different allocation sizes
+        __spinlock_t extlock;                                                           // protects allocation-buffer extension
+        void * heapBegin;                                                                       // start of heap
+        void * heapEnd;                                                                         // logical end of heap
+        size_t heapRemaining;                                                           // amount of storage not allocated in the current chunk
+}; // Heap
+#if BUCKETLOCK == LOCKFREE
+static inline {
+        Link(Heap.Storage) * ?`next( Heap.Storage * this ) { return &this->header.kind.real.next; }
+        void ?{}( Heap.FreeHeader & ) {}
+        void ^?{}( Heap.FreeHeader & ) {}
+} // distribution
+#endif // LOCKFREE
+static inline size_t getKey( const Heap.FreeHeader & freeheader ) { return freeheader.blockSize; }
+#ifdef FASTLOOKUP
+enum { LookupSizes = 65_536 + sizeof(Heap.Storage) }; // number of fast lookup sizes
+static unsigned char lookup[LookupSizes];                               // O(1) lookup for small sizes
+#endif // FASTLOOKUP
+static const off_t mmapFd = -1;                                                 // fake or actual fd for anonymous file
+#ifdef __CFA_DEBUG__
+static bool heapBoot = 0;                                                               // detect recursion during boot
+#endif // __CFA_DEBUG__
+// Size of array must harmonize with NoBucketSizes and individual bucket sizes must be multiple of 16.
+// Smaller multiples of 16 and powers of 2 are common allocation sizes, so make them generate the minimum required bucket size.
+// malloc(0) returns 0p, so no bucket is necessary for 0 bytes returning an address that can be freed.
+static const unsigned int bucketSizes[] @= {                    // different bucket sizes
++ sizeof(Heap.Storage), 32 + sizeof(Heap.Storage), 48 + sizeof(Heap.Storage), 64 + sizeof(Heap.Storage), // 4
++ sizeof(Heap.Storage), 112 + sizeof(Heap.Storage), 128 + sizeof(Heap.Storage), // 3
+, 192, 224, 256 + sizeof(Heap.Storage), // 4
+, 384, 448, 512 + sizeof(Heap.Storage), // 4
+, 768, 896, 1_024 + sizeof(Heap.Storage), // 4
+_536, 2_048 + sizeof(Heap.Storage), // 2
+_560, 3_072, 3_584, 4_096 + sizeof(Heap.Storage), // 4
+_144, 8_192 + sizeof(Heap.Storage), // 2
+_216, 10_240, 11_264, 12_288, 13_312, 14_336, 15_360, 16_384 + sizeof(Heap.Storage), // 8
+_432, 20_480, 22_528, 24_576, 26_624, 28_672, 30_720, 32_768 + sizeof(Heap.Storage), // 8
+_864, 40_960, 45_056, 49_152, 53_248, 57_344, 61_440, 65_536 + sizeof(Heap.Storage), // 8
+_728, 81_920, 90_112, 98_304, 106_496, 114_688, 122_880, 131_072 + sizeof(Heap.Storage), // 8
+_456, 163_840, 180_224, 196_608, 212_992, 229_376, 245_760, 262_144 + sizeof(Heap.Storage), // 8
+_912, 327_680, 360_448, 393_216, 425_984, 458_752, 491_520, 524_288 + sizeof(Heap.Storage), // 8
+_360, 786_432, 917_504, 1_048_576 + sizeof(Heap.Storage), // 4
+_179_648, 1_310_720, 1_441_792, 1_572_864, 1_703_936, 1_835_008, 1_966_080, 2_097_152 + sizeof(Heap.Storage), // 8
+_621_440, 3_145_728, 3_670_016, 4_194_304 + sizeof(Heap.Storage), // 4
+};
+static_assert( NoBucketSizes == sizeof(bucketSizes) / sizeof(bucketSizes[0] ), "size of bucket array wrong" );
+// The constructor for heapManager is called explicitly in memory_startup.
+static Heap heapManager __attribute__(( aligned (128) )) @= {}; // size of cache line to prevent false sharing
+//####################### Memory Allocation Routines Helpers ####################
+#ifdef __CFA_DEBUG__
+static size_t allocUnfreed;                                                             // running total of allocations minus frees
+static void prtUnfreed() {
+        if ( allocUnfreed != 0 ) {
+                // DO NOT USE STREAMS AS THEY MAY BE UNAVAILABLE AT THIS POINT.
+                char helpText[512];
+                __cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText),
+                                                                        "CFA warning (UNIX pid:%ld) : program terminating with %zu(0x%zx) bytes of storage allocated but not freed.\n"
+                                                                        "Possible cause is unfreed storage allocated by the program or system/library routines called from the program.\n",
+                                                                        (long int)getpid(), allocUnfreed, allocUnfreed ); // always print the UNIX pid
+        } // if
+} // prtUnfreed
+extern int cfa_main_returned;                                                   // from interpose.cfa
+extern "C" {
+        void heapAppStart() {                                                           // called by __cfaabi_appready_startup
+                allocUnfreed = 0;
+        } // heapAppStart
+        void heapAppStop() {                                                            // called by __cfaabi_appready_startdown
+                fclose( stdin ); fclose( stdout );
+                if ( cfa_main_returned ) prtUnfreed();                  // do not check unfreed storage if exit called
+        } // heapAppStop
+} // extern "C"
+#endif // __CFA_DEBUG__
+#ifdef __STATISTICS__
 static HeapStatistics stats;                                                    // zero filled
 static unsigned int sbrk_calls;
 …
+// statically allocated variables => zero filled.
+static size_t heapExpand;                                                               // sbrk advance
+static size_t mmapStart;                                                                // cross over point for mmap
+static unsigned int maxBucketsUsed;                                             // maximum number of buckets in use
+// extern visibility, used by runtime kernel
+// would be cool to remove libcfa_public but it's needed for libcfathread
+libcfa_public size_t __page_size;                                                       // architecture pagesize
+libcfa_public int __map_prot;                                                           // common mmap/mprotect protection
 // thunk problem
 size_t Bsearchl( unsigned int key, const unsigned int * vals, size_t dim ) {
 …
         } else {
                 fakeHeader( header, alignment );
                 if ( unlikely( MmappedBit( header ) ) ) {
                         assert( addr < heapBegin || heapEnd < addr );
+                if ( unlikely( MmappedBit( header ) ) ) {               // mmapped ?
+                        verify( addr < heapBegin || heapEnd < addr );
                         size = ClearStickyBits( header->kind.real.blockSize ); // mmap size
                         return true;
 …
         checkHeader( header < (Heap.Storage.Header *)heapBegin || (Heap.Storage.Header *)heapEnd < header, name, addr ); // bad address ? (offset could be + or -)
+        if ( freeHead < &freeLists[0] || &freeLists[NoBucketSizes] <= freeHead ) {
+                abort( "Attempt to %s storage %p with corrupted header.\n"
+                           "Possible cause is duplicate free on same block or overwriting of header information.",
+                           name, addr );
+        } // if
+        Heap * homeManager;
+        if ( unlikely( freeHead == 0p || // freed and only free-list node => null link
+                                   // freed and link points at another free block not to a bucket in the bucket array.
+                                   freeHead < &freeLists[0] || &freeLists[NoBucketSizes] <= freeHead ) ) {
+                abort( "**** Error **** attempt to %s storage %p with corrupted header.\n"
+                           "Possible cause is duplicate free on same block or overwriting of header information.",
+                           name, addr );
+        } // if
         #endif // __CFA_DEBUG__
 …
                 sbrk_storage += increase;
                 #endif // __STATISTICS__
                 #ifdef __CFA_DEBUG__
                 // Set new memory to garbage so subsequent uninitialized usages might fail.
 …
                 //Memset( (char *)heapEnd + heapRemaining, increase );
                 #endif // __CFA_DEBUG__
                 rem = heapRemaining + increase - size;
         } // if
 …
         __atomic_add_fetch( &allocUnfreed, tsize, __ATOMIC_SEQ_CST );
         if ( traceHeap() ) {
+                enum { BufferSize = 64 };
+                char helpText[BufferSize];
+                int len = snprintf( helpText, BufferSize, "%p = Malloc( %zu ) (allocated %zu)\n", addr, size, tsize );
+                __cfaabi_bits_write( STDERR_FILENO, helpText, len ); // print debug/nodebug
+                char helpText[64];
+                __cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText),
+                                                                        "%p = Malloc( %zu ) (allocated %zu)\n", addr, size, tsize ); // print debug/nodebug
         } // if
         #endif // __CFA_DEBUG__
 …
         if ( traceHeap() ) {
                 char helpText[64];
                 int len = snprintf( helpText, sizeof(helpText), "Free( %p ) size:%zu\n", addr, size );
                 __cfaabi_bits_write( STDERR_FILENO, helpText, len ); // print debug/nodebug
+                __cfaabi_bits_print_buffer( STDERR_FILENO, helpText, sizeof(helpText),
+                                                                        "Free( %p ) size:%zu\n", addr, size ); // print debug/nodebug
         } // if
         #endif // __CFA_DEBUG__
 …
 size_t prtFree( Heap & manager ) with( manager ) {
+static size_t prtFree( Heap & manager ) with( manager ) {
         size_t total = 0;
         #ifdef __STATISTICS__
 …
         // Allocates size bytes and returns a pointer to the allocated memory.  The contents are undefined. If size is 0,
         // then malloc() returns a unique pointer value that can later be successfully passed to free().
         void * malloc( size_t size ) {
+        void * malloc( size_t size ) libcfa_public {
                 #ifdef __STATISTICS__
                 if ( likely( size > 0 ) ) {
 …
         // Same as malloc() except size bytes is an array of dim elements each of elemSize bytes.
         void * aalloc( size_t dim, size_t elemSize ) {
+        void * aalloc( size_t dim, size_t elemSize ) libcfa_public {
                 size_t size = dim * elemSize;
                 #ifdef __STATISTICS__
 …
         // Same as aalloc() with memory set to zero.
         void * calloc( size_t dim, size_t elemSize ) {
+        void * calloc( size_t dim, size_t elemSize ) libcfa_public {
                 size_t size = dim * elemSize;
           if ( unlikely( size ) == 0 ) {                        // 0 BYTE ALLOCATION RETURNS NULL POINTER
 …
         // not 0p, then the call is equivalent to free(oaddr). Unless oaddr is 0p, it must have been returned by an earlier
         // call to malloc(), alloc(), calloc() or realloc(). If the area pointed to was moved, a free(oaddr) is done.
         void * resize( void * oaddr, size_t size ) {
+        void * resize( void * oaddr, size_t size ) libcfa_public {
                 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
           if ( unlikely( size == 0 ) ) {                                        // special cases
 …
         // Same as resize() but the contents are unchanged in the range from the start of the region up to the minimum of
         // the old and new sizes.
         void * realloc( void * oaddr, size_t size ) {
+        void * realloc( void * oaddr, size_t size ) libcfa_public {
                 // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
           if ( unlikely( size == 0 ) ) {                                        // special cases
 …
         // Same as realloc() except the new allocation size is large enough for an array of nelem elements of size elsize.
         void * reallocarray( void * oaddr, size_t dim, size_t elemSize ) {
+        void * reallocarray( void * oaddr, size_t dim, size_t elemSize ) libcfa_public {
                 return realloc( oaddr, dim * elemSize );
         } // reallocarray
 …
         // Same as malloc() except the memory address is a multiple of alignment, which must be a power of two. (obsolete)
         void * memalign( size_t alignment, size_t size ) {
+        void * memalign( size_t alignment, size_t size ) libcfa_public {
                 #ifdef __STATISTICS__
                 if ( likely( size > 0 ) ) {
 …
         // Same as aalloc() with memory alignment.
         void * amemalign( size_t alignment, size_t dim, size_t elemSize ) {
+        void * amemalign( size_t alignment, size_t dim, size_t elemSize ) libcfa_public {
                 size_t size = dim * elemSize;
                 #ifdef __STATISTICS__
 …
         // Same as calloc() with memory alignment.
         void * cmemalign( size_t alignment, size_t dim, size_t elemSize ) {
+        void * cmemalign( size_t alignment, size_t dim, size_t elemSize ) libcfa_public {
                 size_t size = dim * elemSize;
           if ( unlikely( size ) == 0 ) {                                        // 0 BYTE ALLOCATION RETURNS NULL POINTER
 …
         // Same as memalign(), but ISO/IEC 2011 C11 Section 7.22.2 states: the value of size shall be an integral multiple
         // of alignment. This requirement is universally ignored.
         void * aligned_alloc( size_t alignment, size_t size ) {
+        void * aligned_alloc( size_t alignment, size_t size ) libcfa_public {
                 return memalign( alignment, size );
         } // aligned_alloc
 …
         // is 0, then posix_memalign() returns either 0p, or a unique pointer value that can later be successfully passed to
         // free(3).
         int posix_memalign( void ** memptr, size_t alignment, size_t size ) {
+        int posix_memalign( void ** memptr, size_t alignment, size_t size ) libcfa_public {
           if ( unlikely( alignment < libAlign() || ! is_pow2( alignment ) ) ) return EINVAL; // check alignment
                 *memptr = memalign( alignment, size );
 …
         // Allocates size bytes and returns a pointer to the allocated memory. The memory address shall be a multiple of the
         // page size.  It is equivalent to memalign(sysconf(_SC_PAGESIZE),size).
         void * valloc( size_t size ) {
+        void * valloc( size_t size ) libcfa_public {
                 return memalign( __page_size, size );
         } // valloc
 …
         // Same as valloc but rounds size to multiple of page size.
         void * pvalloc( size_t size ) {
+        void * pvalloc( size_t size ) libcfa_public {
                 return memalign( __page_size, ceiling2( size, __page_size ) ); // round size to multiple of page size
         } // pvalloc
 …
         // or realloc().  Otherwise, or if free(ptr) has already been called before, undefined behaviour occurs. If ptr is
         // 0p, no operation is performed.
         void free( void * addr ) {
+        void free( void * addr ) libcfa_public {
           if ( unlikely( addr == 0p ) ) {                                       // special case
                         #ifdef __STATISTICS__
 …
         // Returns the alignment of an allocation.
         size_t malloc_alignment( void * addr ) {
+        size_t malloc_alignment( void * addr ) libcfa_public {
           if ( unlikely( addr == 0p ) ) return libAlign();      // minimum alignment
                 Heap.Storage.Header * header = HeaderAddr( addr );
 …
         // Returns true if the allocation is zero filled, e.g., allocated by calloc().
         bool malloc_zero_fill( void * addr ) {
+        bool malloc_zero_fill( void * addr ) libcfa_public {
           if ( unlikely( addr == 0p ) ) return false;           // null allocation is not zero fill
                 Heap.Storage.Header * header = HeaderAddr( addr );
 …
         // Returns original total allocation size (not bucket size) => array size is dimension * sizeof(T).
         size_t malloc_size( void * addr ) {
+        size_t malloc_size( void * addr ) libcfa_public {
           if ( unlikely( addr == 0p ) ) return 0;                       // null allocation has zero size
                 Heap.Storage.Header * header = HeaderAddr( addr );
 …
         // Returns the number of usable bytes in the block pointed to by ptr, a pointer to a block of memory allocated by
         // malloc or a related function.
         size_t malloc_usable_size( void * addr ) {
+        size_t malloc_usable_size( void * addr ) libcfa_public {
           if ( unlikely( addr == 0p ) ) return 0;                       // null allocation has 0 size
                 Heap.Storage.Header * header;
 …
         // Prints (on default standard error) statistics about memory allocated by malloc and related functions.
         void malloc_stats( void ) {
+        void malloc_stats( void ) libcfa_public {
                 #ifdef __STATISTICS__
                 printStats();
 …
         // Changes the file descriptor where malloc_stats() writes statistics.
         int malloc_stats_fd( int fd __attribute__(( unused )) ) {
+        int malloc_stats_fd( int fd __attribute__(( unused )) ) libcfa_public {
                 #ifdef __STATISTICS__
                 int temp = stats_fd;
 …
         // The string is printed on the file stream stream.  The exported string includes information about all arenas (see
         // malloc).
         int malloc_info( int options, FILE * stream __attribute__(( unused )) ) {
+        int malloc_info( int options, FILE * stream __attribute__(( unused )) ) libcfa_public {
           if ( options != 0 ) { errno = EINVAL; return -1; }
                 #ifdef __STATISTICS__
 …
         // Adjusts parameters that control the behaviour of the memory-allocation functions (see malloc). The param argument
         // specifies the parameter to be modified, and value specifies the new value for that parameter.
         int mallopt( int option, int value ) {
+        int mallopt( int option, int value ) libcfa_public {
           if ( value < 0 ) return 0;
                 choose( option ) {
 …
         // Attempt to release free memory at the top of the heap (by calling sbrk with a suitable argument).
         int malloc_trim( size_t ) {
+        int malloc_trim( size_t ) libcfa_public {
                 return 0;                                                                               // => impossible to release memory
         } // malloc_trim
 …
         // structure dynamically allocated via malloc, and a pointer to that data structure is returned as the function
         // result.  (The caller must free this memory.)
         void * malloc_get_state( void ) {
+        void * malloc_get_state( void ) libcfa_public {
                 return 0p;                                                                              // unsupported
         } // malloc_get_state
 …
         // Restores the state of all malloc internal bookkeeping variables to the values recorded in the opaque data
         // structure pointed to by state.
         int malloc_set_state( void * ) {
+        int malloc_set_state( void * ) libcfa_public {
                 return 0;                                                                               // unsupported
         } // malloc_set_state
 …
         // Sets the amount (bytes) to extend the heap when there is insufficent free storage to service an allocation.
         __attribute__((weak)) size_t malloc_expansion() { return __CFA_DEFAULT_HEAP_EXPANSION__; }
+        __attribute__((weak)) size_t malloc_expansion() libcfa_public { return __CFA_DEFAULT_HEAP_EXPANSION__; }
         // Sets the crossover point between allocations occuring in the sbrk area or separately mmapped.
         __attribute__((weak)) size_t malloc_mmap_start() { return __CFA_DEFAULT_MMAP_START__; }
+        __attribute__((weak)) size_t malloc_mmap_start() libcfa_public { return __CFA_DEFAULT_MMAP_START__; }
         // Amount subtracted to adjust for unfreed program storage (debug only).
         __attribute__((weak)) size_t malloc_unfreed() { return __CFA_DEFAULT_HEAP_UNFREED__; }
+        __attribute__((weak)) size_t malloc_unfreed() libcfa_public { return __CFA_DEFAULT_HEAP_UNFREED__; }
 } // extern "C"
 // Must have CFA linkage to overload with C linkage realloc.
 void * resize( void * oaddr, size_t nalign, size_t size ) {
+void * resize( void * oaddr, size_t nalign, size_t size ) libcfa_public {
         // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
   if ( unlikely( size == 0 ) ) {                                                // special cases
 …
 void * realloc( void * oaddr, size_t nalign, size_t size ) {
+void * realloc( void * oaddr, size_t nalign, size_t size ) libcfa_public {
         // If size is equal to 0, either NULL or a pointer suitable to be passed to free() is returned.
   if ( unlikely( size == 0 ) ) {                                                // special cases

libcfa/src/interpose.cfa

-              r29d8c02
+              r74ec742
 //=============================================================================================
 void preload_libgcc(void) {
+static void preload_libgcc(void) {
         dlopen( "libgcc_s.so.1", RTLD_NOW );
         if ( const char * error = dlerror() ) abort( "interpose_symbol : internal error pre-loading libgcc, %s\n", error );
 …
 typedef void (* generic_fptr_t)(void);
 generic_fptr_t interpose_symbol( const char symbol[], const char version[] ) {
+static generic_fptr_t interpose_symbol( const char symbol[], const char version[] ) {
         const char * error;
 …
 //=============================================================================================
 void sigHandler_segv( __CFA_SIGPARMS__ );
 void sigHandler_ill ( __CFA_SIGPARMS__ );
 void sigHandler_fpe ( __CFA_SIGPARMS__ );
 void sigHandler_abrt( __CFA_SIGPARMS__ );
 void sigHandler_term( __CFA_SIGPARMS__ );
 struct {
+static void sigHandler_segv( __CFA_SIGPARMS__ );
+static void sigHandler_ill ( __CFA_SIGPARMS__ );
+static void sigHandler_fpe ( __CFA_SIGPARMS__ );
+static void sigHandler_abrt( __CFA_SIGPARMS__ );
+static void sigHandler_term( __CFA_SIGPARMS__ );
+static struct {
         void (* exit)( int ) __attribute__(( __noreturn__ ));
         void (* abort)( void ) __attribute__(( __noreturn__ ));
 } __cabi_libc;
 int cfa_main_returned;
+libcfa_public int cfa_main_returned;
 extern "C" {
 …
 // Forward declare abort after the __typeof__ call to avoid ambiguities
 void exit( int status, const char fmt[], ... ) __attribute__(( format(printf, 2, 3), __nothrow__, __leaf__, __noreturn__ ));
 void abort( const char fmt[], ... ) __attribute__(( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ ));
 void abort( bool signalAbort, const char fmt[], ... ) __attribute__(( format(printf, 2, 3), __nothrow__, __leaf__, __noreturn__ ));
 void __abort( bool signalAbort, const char fmt[], va_list args ) __attribute__(( __nothrow__, __leaf__, __noreturn__ ));
+libcfa_public void exit( int status, const char fmt[], ... ) __attribute__(( format(printf, 2, 3), __nothrow__, __leaf__, __noreturn__ ));
+libcfa_public void abort( const char fmt[], ... ) __attribute__(( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ ));
+libcfa_public void abort( bool signalAbort, const char fmt[], ... ) __attribute__(( format(printf, 2, 3), __nothrow__, __leaf__, __noreturn__ ));
+libcfa_public void __abort( bool signalAbort, const char fmt[], va_list args ) __attribute__(( __nothrow__, __leaf__, __noreturn__ ));
 extern "C" {
         void abort( void ) __attribute__(( __nothrow__, __leaf__, __noreturn__ )) {
+        libcfa_public void abort( void ) __attribute__(( __nothrow__, __leaf__, __noreturn__ )) {
                 abort( false, "%s", "" );
+        }
         void __cabi_abort( const char fmt[], ... ) __attribute__(( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ )) {
+        libcfa_public void __cabi_abort( const char fmt[], ... ) __attribute__(( format(printf, 1, 2), __nothrow__, __leaf__, __noreturn__ )) {
                 va_list argp;
                 va_start( argp, fmt );
 …
+        }
         void exit( int status ) __attribute__(( __nothrow__, __leaf__, __noreturn__ )) {
+        libcfa_public void exit( int status ) __attribute__(( __nothrow__, __leaf__, __noreturn__ )) {
                 __cabi_libc.exit( status );
+        }

libcfa/src/iostream.cfa

r29d8c02	r74ec742
32	32	#include "bitmanip.hfa" // high1
33	33
	34	#pragma GCC visibility push(default)
34	35
35	36	// ********************************* ostream *********************************

libcfa/src/limits.cfa

r29d8c02	r74ec742
20	20	#include <complex.h>
21	21	#include "limits.hfa"
	22
	23	#pragma GCC visibility push(default)
22	24
23	25	// Integral Constants

libcfa/src/memory.cfa

r29d8c02	r74ec742
16	16	#include "memory.hfa"
17	17	#include "stdlib.hfa"
	18
	19	#pragma GCC visibility push(default)
18	20
19	21	// Internal data object.

libcfa/src/parseargs.cfa

-              r29d8c02
+              r74ec742
 #include "common.hfa"
 #include "limits.hfa"
+#pragma GCC visibility push(default)
 extern int cfa_args_argc __attribute__((weak));
 …
+        }
+        if(strcmp(arg, "Y") == 0) {
+                value = true;
+                return true;
+        }
+        if(strcmp(arg, "y") == 0) {
+                value = true;
+                return true;
+        }
         if(strcmp(arg, "no") == 0) {
+                value = false;
+                return true;
+        }
+        if(strcmp(arg, "N") == 0) {
+                value = false;
+                return true;
+        }
+        if(strcmp(arg, "n") == 0) {
                 value = false;
                 return true;

libcfa/src/parseconfig.cfa

-              r29d8c02
+              r74ec742
+#pragma GCC visibility push(default)
 // *********************************** exceptions ***********************************
 // TODO: Add names of missing config entries to exception (see further below)
 static vtable(Missing_Config_Entries) Missing_Config_Entries_vt;
+vtable(Missing_Config_Entries) Missing_Config_Entries_vt;
 [ void ] ?{}( & Missing_Config_Entries this, unsigned int num_missing ) {
 …
 static vtable(Parse_Failure) Parse_Failure_vt;
+vtable(Parse_Failure) Parse_Failure_vt;
 [ void ] ?{}( & Parse_Failure this, [] char failed_key, [] char failed_value ) {
 …
 static vtable(Validation_Failure) Validation_Failure_vt;
+vtable(Validation_Failure) Validation_Failure_vt;
 [ void ] ?{}( & Validation_Failure this, [] char failed_key, [] char failed_value ) {
 …
 [ bool ] comments( & ifstream in, [] char name ) {
+static [ bool ] comments( & ifstream in, [] char name ) {
         while () {
                 in | name;

libcfa/src/rational.cfa

r29d8c02	r74ec742
17	17	#include "fstream.hfa"
18	18	#include "stdlib.hfa"
	19
	20	#pragma GCC visibility push(default)
19	21
20	22	forall( T \| Arithmetic( T ) ) {

libcfa/src/startup.cfa

-              r29d8c02
+              r74ec742
         } // __cfaabi_appready_shutdown
         void disable_interrupts() __attribute__(( weak )) {}
         void enable_interrupts() __attribute__(( weak )) {}
+        void disable_interrupts() __attribute__(( weak )) libcfa_public {}
+        void enable_interrupts() __attribute__(( weak )) libcfa_public {}
 …
 struct __spinlock_t;
 extern "C" {
         void __cfaabi_dbg_record_lock(struct __spinlock_t & this, const char prev_name[]) __attribute__(( weak )) {}
+        void __cfaabi_dbg_record_lock(struct __spinlock_t & this, const char prev_name[]) __attribute__(( weak )) libcfa_public {}
+}

libcfa/src/stdlib.cfa

-              r29d8c02
+              r74ec742
 #include <complex.h>                                                                    // _Complex_I
 #include <assert.h>
+#pragma GCC visibility push(default)
 //---------------------------------------
 …
 #define GENERATOR LCG
+uint32_t __global_random_seed;                                                  // sequential/concurrent
+uint32_t __global_random_state;                                                 // sequential only
+// would be cool to make hidden but it's needed for libcfathread
+__attribute__((visibility("default"))) uint32_t __global_random_seed;                                                   // sequential/concurrent
+__attribute__((visibility("hidden"))) uint32_t __global_random_state;                                                   // sequential only
 void set_seed( PRNG & prng, uint32_t seed_ ) with( prng ) { state = seed = seed_; GENERATOR( state ); } // set seed

libcfa/src/strstream.cfa

-              r29d8c02
+              r74ec742
 //
+//
 // Cforall Version 1.0.0 Copyright (C) 2021 University of Waterloo
 //
+//
 // The contents of this file are covered under the licence agreement in the
 // file "LICENCE" distributed with Cforall.
 //
 // strstream.cfa --
 //
+// strstream.cfa --
+//
 // Author           : Peter A. Buhr
 // Created On       : Thu Apr 22 22:24:35 2021
 …
 // Last Modified On : Sun Oct 10 16:13:20 2021
 // Update Count     : 101
 //
+//
 #include "strstream.hfa"
 …
 #include <unistd.h>                                                                             // sbrk, sysconf
+#pragma GCC visibility push(default)
 // *********************************** strstream ***********************************

libcfa/src/time.cfa

r29d8c02	r74ec742
18	18	#include <stdio.h> // snprintf
19	19	#include <assert.h>
	20
	21	#pragma GCC visibility push(default)
20	22
21	23	static char * nanomsd( long int ns, char * buf ) { // most significant digits

libcfa/src/virtual.c

r29d8c02	r74ec742
16	16	#include "virtual.h"
17	17	#include "assert.h"
	18
	19	#pragma GCC visibility push(default)
18	20
19	21	int __cfavir_is_parent(

src/AST/Convert.cpp

-              r29d8c02
+              r74ec742
         };
     template<typename T>
     Getter<T> get() {
         return Getter<T>{ *this };
+    }
+        template<typename T>
+        Getter<T> get() {
+                return Getter<T>{ *this };
+        }
         Label makeLabel(Statement * labelled, const ast::Label& label) {
 …
                         // GET_ACCEPT_1(type, FunctionType),
                         std::move(forall),
+                        std::move(assertions),
                         std::move(paramVars),
                         std::move(returnVars),
 …
                 cache.emplace( old, decl );
-                decl->assertions = std::move(assertions);
                 decl->withExprs = GET_ACCEPT_V(withExprs, Expr);
                 decl->stmts = GET_ACCEPT_1(statements, CompoundStmt);
 …
+        }
+        // Convert SynTree::EnumDecl to AST::EnumDecl
         virtual void visit( const EnumDecl * old ) override final {
                 if ( inCache( old ) ) return;
 …
                         ty->forall.emplace_back(new ast::TypeInstType(param));
                         for (auto asst : param->assertions) {
+                                ty->assertions.emplace_back(new ast::VariableExpr({}, asst));
+                                ty->assertions.emplace_back(
+                                        new ast::VariableExpr(param->location, asst));
+                        }
+                }

src/AST/Copy.cpp

-              r29d8c02
+              r74ec742
 // Created On       : Thr Nov 11  9:16:00 2019
 // Last Modified By : Andrew Beach
 // Last Modified On : Thr Nov 11  9:28:00 2021
 // Update Count     : 0
+// Last Modified On : Tue May  3 16:28:00 2022
+// Update Count     : 1
 //
 …
+        }
+        void postvisit( const UniqueExpr * node ) {
+                readonlyInsert( &node->object );
+        }
         void postvisit( const MemberExpr * node ) {
                 readonlyInsert( &node->member );

src/AST/Decl.cpp

-              r29d8c02
+              r74ec742
 // Author           : Aaron B. Moss
 // Created On       : Thu May 9 10:00:00 2019
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Tue Jan 12 16:54:55 2021
 // Update Count     : 23
+// Last Modified By : Andrew Beach
+// Last Modified On : Thu May  5 12:10:00 2022
+// Update Count     : 24
 //
 …
 // --- FunctionDecl
 FunctionDecl::FunctionDecl( const CodeLocation & loc, const std::string & name,
+FunctionDecl::FunctionDecl( const CodeLocation & loc, const std::string & name,
         std::vector<ptr<TypeDecl>>&& forall,
         std::vector<ptr<DeclWithType>>&& params, std::vector<ptr<DeclWithType>>&& returns,
 …
+        }
         this->type = ftype;
+}
+FunctionDecl::FunctionDecl( const CodeLocation & location, const std::string & name,
+        std::vector<ptr<TypeDecl>>&& forall, std::vector<ptr<DeclWithType>>&& assertions,
+        std::vector<ptr<DeclWithType>>&& params, std::vector<ptr<DeclWithType>>&& returns,
+        CompoundStmt * stmts, Storage::Classes storage, Linkage::Spec linkage,
+        std::vector<ptr<Attribute>>&& attrs, Function::Specs fs, bool isVarArgs)
+: DeclWithType( location, name, storage, linkage, std::move(attrs), fs ),
+                params( std::move(params) ), returns( std::move(returns) ),
+                type_params( std::move( forall) ), assertions( std::move( assertions ) ),
+                type( nullptr ), stmts( stmts ) {
+        FunctionType * type = new FunctionType( (isVarArgs) ? VariableArgs : FixedArgs );
+        for ( auto & param : this->params ) {
+                type->params.emplace_back( param->get_type() );
+        }
+        for ( auto & ret : this->returns ) {
+                type->returns.emplace_back( ret->get_type() );
+        }
+        for ( auto & param : this->type_params ) {
+                type->forall.emplace_back( new TypeInstType( param ) );
+        }
+        for ( auto & assertion : this->assertions ) {
+                type->assertions.emplace_back(
+                        new VariableExpr( assertion->location, assertion ) );
+        }
+        this->type = type;
+}

src/AST/Decl.hpp

-              r29d8c02
+              r74ec742
 // Author           : Aaron B. Moss
 // Created On       : Thu May 9 10:00:00 2019
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Fri Mar 12 18:25:05 2021
 // Update Count     : 32
+// Last Modified By : Andrew Beach
+// Last Modified On : Thu May  5 12:09:00 2022
+// Update Count     : 33
 //
 …
         std::vector< ptr<Expr> > withExprs;
+        // The difference between the two constructors is in how they handle
+        // assertions. The first constructor uses the assertions from the type
+        // parameters, in the style of the old ast, and puts them on the type.
+        // The second takes an explicite list of assertions and builds a list of
+        // references to them on the type.
         FunctionDecl( const CodeLocation & loc, const std::string & name, std::vector<ptr<TypeDecl>>&& forall,
                 std::vector<ptr<DeclWithType>>&& params, std::vector<ptr<DeclWithType>>&& returns,
                 CompoundStmt * stmts, Storage::Classes storage = {}, Linkage::Spec linkage = Linkage::C,
                 std::vector<ptr<Attribute>>&& attrs = {}, Function::Specs fs = {}, bool isVarArgs = false);
+        // : DeclWithType( loc, name, storage, linkage, std::move(attrs), fs ), params(std::move(params)), returns(std::move(returns)),
+        //  stmts( stmts ) {}
+        FunctionDecl( const CodeLocation & location, const std::string & name,
+                std::vector<ptr<TypeDecl>>&& forall, std::vector<ptr<DeclWithType>>&& assertions,
+                std::vector<ptr<DeclWithType>>&& params, std::vector<ptr<DeclWithType>>&& returns,
+                CompoundStmt * stmts, Storage::Classes storage = {}, Linkage::Spec linkage = Linkage::C,
+                std::vector<ptr<Attribute>>&& attrs = {}, Function::Specs fs = {}, bool isVarArgs = false);
         const Type * get_type() const override;

src/AST/Expr.cpp

-              r29d8c02
+              r74ec742
 // Created On       : Wed May 15 17:00:00 2019
 // Last Modified By : Andrew Beach
 // Created On       : Tue Nov 30 14:23:00 2021
 // Update Count     : 7
+// Created On       : Wed May 18 13:56:00 2022
+// Update Count     : 8
 //
 …
 #include "Copy.hpp"                // for shallowCopy
-#include "Eval.hpp"                // for call
 #include "GenericSubstitution.hpp"
 #include "LinkageSpec.hpp"
 …
 // --- UntypedExpr
+bool UntypedExpr::get_lvalue() const {
+        std::string fname = InitTweak::getFunctionName( this );
+        return lvalueFunctionNames.count( fname );
+}
 UntypedExpr * UntypedExpr::createDeref( const CodeLocation & loc, const Expr * arg ) {
         assert( arg );
         UntypedExpr * ret = call( loc, "*?", arg );
+        UntypedExpr * ret = createCall( loc, "*?", { arg } );
         if ( const Type * ty = arg->result ) {
                 const Type * base = InitTweak::getPointerBase( ty );
 …
+}
-bool UntypedExpr::get_lvalue() const {
-        std::string fname = InitTweak::getFunctionName( this );
-        return lvalueFunctionNames.count( fname );
+}
 UntypedExpr * UntypedExpr::createAssign( const CodeLocation & loc, const Expr * lhs, const Expr * rhs ) {
         assert( lhs && rhs );
         UntypedExpr * ret = call( loc, "?=?", lhs, rhs );
+        UntypedExpr * ret = createCall( loc, "?=?", { lhs, rhs } );
         if ( lhs->result && rhs->result ) {
                 // if both expressions are typed, assumes that this assignment is a C bitwise assignment,
 …
+        }
         return ret;
+}
+UntypedExpr * UntypedExpr::createCall( const CodeLocation & loc,
+                const std::string & name, std::vector<ptr<Expr>> && args ) {
+        return new UntypedExpr( loc,
+                        new NameExpr( loc, name ), std::move( args ) );
+}

src/AST/Expr.hpp

-              r29d8c02
+              r74ec742
         /// Creates a new assignment expression
         static UntypedExpr * createAssign( const CodeLocation & loc, const Expr * lhs, const Expr * rhs );
+        /// Creates a new call of a variable.
+        static UntypedExpr * createCall( const CodeLocation & loc,
+                const std::string & name, std::vector<ptr<Expr>> && args );
         const Expr * accept( Visitor & v ) const override { return v.visit( this ); }
 …
 public:
         ptr<Expr> expr;
         ptr<ObjectDecl> object;
+        readonly<ObjectDecl> object;
         ptr<VariableExpr> var;
         unsigned long long id;

src/AST/Label.hpp

r29d8c02	r74ec742
34	34	std::vector< ptr<Attribute> > attributes;
35	35
36		Label( ~~CodeLocation~~ loc, const std::string& name = "",
	36	Label( const CodeLocation& loc, const std::string& name = "",
37	37	std::vector<ptr<Attribute>> && attrs = std::vector<ptr<Attribute>>{} )
38	38	: location( loc ), name( name ), attributes( attrs ) {}

src/AST/Node.hpp

-              r29d8c02
+              r74ec742
 // Created On       : Wed May 8 10:27:04 2019
 // Last Modified By : Andrew Beach
 // Last Modified On : Fri Mar 25 10:33:00 2022
 // Update Count     : 7
+// Last Modified On : Mon May  9 10:20:00 2022
+// Update Count     : 8
 //
 …
         bool unique() const { return strong_count == 1; }
+        bool isManaged() const {return strong_count > 0; }
+        bool isManaged() const { return strong_count > 0; }
+        bool isReferenced() const { return weak_count > 0; }
+        bool isStable() const {
+                return (1 == strong_count || (1 < strong_count && 0 == weak_count));
+        }
 private:

src/AST/Pass.proto.hpp

-              r29d8c02
+              r74ec742
         template< typename node_t >
         struct result1 {
                 bool differs;
                 const node_t * value;
+                bool differs = false;
+                const node_t * value = nullptr;
                 template< typename object_t, typename super_t, typename field_t >
 …
                 };
                 bool differs;
+                bool differs = false;
                 container_t< delta > values;
 …
         template< template<class...> class container_t, typename node_t >
         struct resultN {
                 bool differs;
+                bool differs = false;
                 container_t<ptr<node_t>> values;

src/AST/Stmt.cpp

-              r29d8c02
+              r74ec742
 // Author           : Aaron B. Moss
 // Created On       : Wed May  8 13:00:00 2019
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Wed Feb  2 19:01:20 2022
 // Update Count     : 3
+// Last Modified By : Andrew Beach
+// Last Modified On : Tue May  3 15:18:20 2022
+// Update Count     : 4
 //
 #include "Stmt.hpp"
+#include "Copy.hpp"
 #include "DeclReplacer.hpp"
 #include "Type.hpp"
 …
 // --- CompoundStmt
+CompoundStmt::CompoundStmt( const CompoundStmt& other ) : Stmt(other), kids(other.kids) {
+CompoundStmt::CompoundStmt( const CompoundStmt& other ) : Stmt(other), kids() {
+        // Statements can have weak references to them, if that happens inserting
+        // the original node into the new list will put the original node in a
+        // bad state, where it cannot be mutated. To avoid this, just perform an
+        // additional shallow copy on the statement.
+        for ( const Stmt * kid : other.kids ) {
+                if ( kid->isReferenced() ) {
+                        kids.emplace_back( ast::shallowCopy( kid ) );
+                } else {
+                        kids.emplace_back( kid );
+                }
+        }
         // when cloning a compound statement, we may end up cloning declarations which
         // are referred to by VariableExprs throughout the block. Cloning a VariableExpr

src/AST/Stmt.hpp

-              r29d8c02
+              r74ec742
         // cannot be, they are sub-types of this type, for organization.
     StmtClause( const CodeLocation & loc )
+        StmtClause( const CodeLocation & loc )
                 : ParseNode(loc) {}
 …
 class WaitForClause final : public StmtClause {
   public:
     ptr<Expr> target_func;
     std::vector<ptr<Expr>> target_args;
     ptr<Stmt> stmt;
     ptr<Expr> cond;
     WaitForClause( const CodeLocation & loc )
+        ptr<Expr> target_func;
+        std::vector<ptr<Expr>> target_args;
+        ptr<Stmt> stmt;
+        ptr<Expr> cond;
+        WaitForClause( const CodeLocation & loc )
                 : StmtClause( loc ) {}
         const WaitForClause * accept( Visitor & v ) const override { return v.visit( this ); }
   private:
     WaitForClause * clone() const override { return new WaitForClause{ *this }; }
     MUTATE_FRIEND
+        WaitForClause * clone() const override { return new WaitForClause{ *this }; }
+        MUTATE_FRIEND
 };

src/AST/Util.cpp

-              r29d8c02
+              r74ec742
 // file "LICENCE" distributed with Cforall.
 //
 // Util.hpp -- General utilities for working with the AST.
+// Util.cpp -- General utilities for working with the AST.
 //
 // Author           : Andrew Beach
 // Created On       : Wed Jan 19  9:46:00 2022
 // Last Modified By : Andrew Beach
 // Last Modified On : Fri Mar 11 18:07:00 2022
 // Update Count     : 1
+// Last Modified On : Wed May 11 16:16:00 2022
+// Update Count     : 3
 //
 …
 /// Check that every note that can has a set CodeLocation.
+struct SetCodeLocationsCore {
+        void previsit( const ParseNode * node ) {
+                assert( node->location.isSet() );
+void isCodeLocationSet( const ParseNode * node ) {
+        assert( node->location.isSet() );
+}
+void areLabelLocationsSet( const Stmt * stmt ) {
+        for ( const Label& label : stmt->labels ) {
+                assert( label.location.isSet() );
+        }
+};
+}
+/// Make sure the reference counts are in a valid combination.
+void isStable( const Node * node ) {
+        assert( node->isStable() );
+}
+/// Check that a FunctionDecl is synchronized with it's FunctionType.
+void functionDeclMatchesType( const FunctionDecl * decl ) {
+        // The type is a cache of sorts, if it is missing that is only a
+        // problem if isTypeFixed is set.
+        if ( decl->isTypeFixed ) {
+                assert( decl->type );
+        } else if ( !decl->type ) {
+                return;
+        }
+        const FunctionType * type = decl->type;
+        // Check that `type->forall` corresponds with `decl->type_params`.
+        assert( type->forall.size() == decl->type_params.size() );
+        // Check that `type->assertions` corresponds with `decl->assertions`.
+        assert( type->assertions.size() == decl->assertions.size() );
+        // Check that `type->params` corresponds with `decl->params`.
+        assert( type->params.size() == decl->params.size() );
+        // Check that `type->returns` corresponds with `decl->returns`.
+        assert( type->returns.size() == decl->returns.size() );
+}
 struct InvariantCore {
 …
         // None of the passes should make changes so ordering doesn't matter.
         NoStrongCyclesCore no_strong_cycles;
-        SetCodeLocationsCore set_code_locations;
         void previsit( const Node * node ) {
                 no_strong_cycles.previsit( node );
+                isStable( node );
+        }
         void previsit( const ParseNode * node ) {
+                no_strong_cycles.previsit( node );
+                set_code_locations.previsit( node );
+                previsit( (const Node *)node );
+                isCodeLocationSet( node );
+        }
+        void previsit( const FunctionDecl * node ) {
+                previsit( (const ParseNode *)node );
+                functionDeclMatchesType( node );
+        }
+        void previsit( const Stmt * node ) {
+                previsit( (const ParseNode *)node );
+                areLabelLocationsSet( node );
+        }

src/AST/module.mk

r29d8c02	r74ec742
29	29	AST/DeclReplacer.cpp \
30	30	AST/DeclReplacer.hpp \
31		~~AST/Eval.hpp \~~
32	31	AST/Expr.cpp \
33	32	AST/Expr.hpp \

src/CodeGen/CodeGenerator.cc

-              r29d8c02
+              r74ec742
 } // namespace CodeGen
-unsigned Indenter::tabsize = 2;
-std::ostream & operator<<( std::ostream & out, const BaseSyntaxNode * node ) {
-        if ( node ) {
-                node->print( out );
-        } else {
-                out << "nullptr";
+        }
-        return out;
+}
 // Local Variables: //
 // tab-width: 4 //

src/CodeGen/FixMain.cc

r29d8c02	r74ec742
49	49
50	50	}
51
52		~~bool FixMain::replace_main = false;~~
53	51
54	52	template<typename container>

src/CodeGen/LinkOnce.cc

-              r29d8c02
+              r74ec742
                                 new ConstantExpr( Constant::from_string( section_name ) )
                         );
+                        // Unconditionnaly add "visibility(default)" to anything with gnu.linkonce
+                        // visibility is a mess otherwise
+                        attributes.push_back(new Attribute("visibility", {new ConstantExpr( Constant::from_string( "default" ) )}));
+                }
                 visit_children = false;

src/CodeGen/module.mk

-              r29d8c02
+              r74ec742
 ## Author           : Richard C. Bilson
 ## Created On       : Mon Jun  1 17:49:17 2015
 ## Last Modified By : Peter A. Buhr
 ## Last Modified On : Sat Dec 14 07:29:42 2019
 ## Update Count     : 4
+## Last Modified By : Andrew Beach
+## Last Modified On : Tue May 17 14:26:00 2022
+## Update Count     : 5
 ###############################################################################
+#SRC +=  ArgTweak/Rewriter.cc \
+#       ArgTweak/Mutate.cc
+SRC_CODEGEN = \
+        CodeGen/FixMain2.cc \
+        CodeGen/FixMain.h \
+        CodeGen/OperatorTable.cc \
+        CodeGen/OperatorTable.h
 SRC_CODEGEN = \
+SRC += $(SRC_CODEGEN) \
         CodeGen/CodeGenerator.cc \
         CodeGen/CodeGenerator.h \
+        CodeGen/Generate.cc \
+        CodeGen/Generate.h \
         CodeGen/FixMain.cc \
+        CodeGen/FixMain.h \
+        CodeGen/FixNames.cc \
+        CodeGen/FixNames.h \
         CodeGen/GenType.cc \
         CodeGen/GenType.h \
         CodeGen/LinkOnce.cc \
         CodeGen/LinkOnce.h \
-        CodeGen/OperatorTable.cc \
-        CodeGen/OperatorTable.h \
         CodeGen/Options.h
-SRC += $(SRC_CODEGEN) CodeGen/Generate.cc CodeGen/Generate.h CodeGen/FixNames.cc CodeGen/FixNames.h
 SRCDEMANGLE += $(SRC_CODEGEN)

src/Common/CodeLocationTools.cpp

-              r29d8c02
+              r74ec742
 // Created On       : Fri Dec  4 15:42:00 2020
 // Last Modified By : Andrew Beach
 // Last Modified On : Mon Mar 14 15:14:00 2022
 // Update Count     : 4
+// Last Modified On : Wed May 11 16:16:00 2022
+// Update Count     : 5
 //
 …
 namespace {
-// There are a lot of helpers in this file that could be used much more
-// generally if anyone has another use for them.
-// Check if a node type has a code location.
-template<typename node_t>
-struct has_code_location : public std::is_base_of<ast::ParseNode, node_t> {};
-template<typename node_t, bool has_location>
-struct __GetCL;
-template<typename node_t>
-struct __GetCL<node_t, true> {
-        static inline CodeLocation const * get( node_t const * node ) {
-                return &node->location;
+        }
-        static inline CodeLocation * get( node_t * node ) {
-                return &node->location;
+        }
-};
-template<typename node_t>
-struct __GetCL<node_t, false> {
-        static inline CodeLocation * get( node_t const * ) {
-                return nullptr;
+        }
-};
-template<typename node_t>
-CodeLocation const * get_code_location( node_t const * node ) {
-        return __GetCL< node_t, has_code_location< node_t >::value >::get( node );
+}
-template<typename node_t>
-CodeLocation * get_code_location( node_t * node ) {
-        return __GetCL< node_t, has_code_location< node_t >::value >::get( node );
+}
 // Fill every location with a nearby (parent) location.
 class FillCore : public ast::WithGuards {
         CodeLocation const * parent;
+        template<typename node_t>
+        node_t const * parse_visit( node_t const * node ) {
+                if ( node->location.isUnset() ) {
+                        assert( parent );
+                        node_t * newNode = ast::mutate( node );
+                        newNode->location = *parent;
+                        return newNode;
+                }
+                GuardValue( parent ) = &node->location;
+                return node;
+        }
+        bool hasUnsetLabels( const ast::Stmt * stmt ) {
+                for ( const ast::Label& label : stmt->labels ) {
+                        if ( label.location.isUnset() ) {
+                                return true;
+                        }
+                }
+                return false;
+        }
+        template<typename node_t>
+        node_t const * stmt_visit( node_t const * node ) {
+                assert( node->location.isSet() );
+                if ( hasUnsetLabels( node ) ) {
+                        node_t * newNode = ast::mutate( node );
+                        for ( ast::Label& label : newNode->labels ) {
+                                if ( label.location.isUnset() ) {
+                                        label.location = newNode->location;
+                                }
+                        }
+                        return newNode;
+                }
+                return node;
+        }
+        template<typename node_t>
+        auto visit( node_t const * node, long ) {
+                return node;
+        }
+        template<typename node_t>
+        auto visit( node_t const * node, int ) -> typename
+                        std::remove_reference< decltype( node->location, node ) >::type {
+                return parse_visit( node );
+        }
+        template<typename node_t>
+        auto visit( node_t const * node, char ) -> typename
+                        std::remove_reference< decltype( node->labels, node ) >::type {
+                return stmt_visit( parse_visit( node ) );
+        }
 public:
         FillCore() : parent( nullptr ) {}
+        FillCore( const CodeLocation& location ) : parent( &location ) {
+                assert( location.isSet() );
+        }
         template<typename node_t>
         node_t const * previsit( node_t const * node ) {
+                GuardValue( parent );
+                CodeLocation const * location = get_code_location( node );
+                if ( location && location->isUnset() ) {
+                        assert( parent );
+                        node_t * newNode = ast::mutate( node );
+                        CodeLocation * newLocation = get_code_location( newNode );
+                        assert( newLocation );
+                        *newLocation = *parent;
+                        parent = newLocation;
+                        return newNode;
+                } else if ( location ) {
+                        parent = location;
+                }
+                return node;
+                return visit( node, '\0' );
+        }
 };
 …
         template<typename node_t>
+        void previsit( node_t const * node ) {
+                CodeLocation const * location = get_code_location( node );
+                if ( location && location->isUnset() ) {
+        auto previsit( node_t const * node ) -> decltype( node->location, void() ) {
+                if ( node->location.isUnset() ) {
                         unset.push_back( node );
+                }
+        }
-};
-class LocalFillCore : public ast::WithGuards {
-        CodeLocation const * parent;
-public:
-        LocalFillCore( CodeLocation const & location ) : parent( &location ) {
-                assert( location.isSet() );
+        }
-        template<typename node_t>
-        auto previsit( node_t const * node )
-                        -> typename std::enable_if<has_code_location<node_t>::value, node_t const *>::type {
-                if ( node->location.isSet() ) {
-                        GuardValue( parent ) = &node->location;
-                        return node;
-                } else {
-                        node_t * mut = ast::mutate( node );
-                        mut->location = *parent;
-                        return mut;
+                }
+        }
 …
 ast::Node const * localFillCodeLocations(
                 CodeLocation const & location , ast::Node const * node ) {
         ast::Pass<LocalFillCore> visitor( location );
+        ast::Pass<FillCore> visitor( location );
         return node->accept( visitor );
+}

src/Common/Indenter.h

-              r29d8c02
+              r74ec742
 // Created On       : Fri Jun 30 16:55:23 2017
 // Last Modified By : Andrew Beach
 // Last Modified On : Fri Aug 11 11:15:00 2017
 // Update Count     : 1
+// Last Modified On : Fri May 13 14:10:00 2022
+// Update Count     : 2
 //
+#ifndef INDENTER_H
+#define INDENTER_H
+#pragma once
+#include <ostream>
 struct Indenter {
 …
         return out << std::string(indent.indent * indent.amt, ' ');
+}
-#endif // INDENTER_H

src/Common/SemanticError.h

-              r29d8c02
+              r74ec742
 // Created On       : Mon May 18 07:44:20 2015
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Thu Jul 19 10:09:17 2018
 // Update Count     : 31
+// Last Modified On : Wed May  4 14:08:26 2022
+// Update Count     : 35
 //
 …
         {"aggregate-forward-decl" , Severity::Warn    , "forward declaration of nested aggregate: %s"                },
         {"superfluous-decl"       , Severity::Warn    , "declaration does not allocate storage: %s"                  },
+        {"superfluous-else"       , Severity::Warn    , "else clause never executed for empty loop conditional"      },
         {"gcc-attributes"         , Severity::Warn    , "invalid attribute: %s"                                      },
         {"c++-like-copy"          , Severity::Warn    , "Constructor from reference is not a valid copy constructor" },
 …
         AggrForwardDecl,
         SuperfluousDecl,
+        SuperfluousElse,
         GccAttributes,
         CppCopy,
 …
 );
 #define SemanticWarning(loc, id, ...) SemanticWarningImpl(loc, id, WarningFormats[(int)id].message, __VA_ARGS__)
+#define SemanticWarning(loc, id, ...) SemanticWarningImpl(loc, id, WarningFormats[(int)id].message, ##__VA_ARGS__)
 void SemanticWarningImpl (CodeLocation loc, Warning warn, const char * const fmt, ...) __attribute__((format(printf, 3, 4)));

src/Common/module.mk

-              r29d8c02
+              r74ec742
 ## Author           : Richard C. Bilson
 ## Created On       : Mon Jun  1 17:49:17 2015
 ## Last Modified By : Peter A. Buhr
 ## Last Modified On : Tue Sep 27 11:06:38 2016
 ## Update Count     : 4
+## Last Modified By : Andrew Beach
+## Last Modified On : Tue May 17 14:27:00 2022
+## Update Count     : 5
 ###############################################################################
 SRC_COMMON = \
+      Common/Assert.cc \
+      Common/CodeLocation.h \
+      Common/CodeLocationTools.hpp \
+      Common/CodeLocationTools.cpp \
+      Common/CompilerError.h \
+      Common/Debug.h \
+      Common/DeclStats.hpp \
+      Common/DeclStats.cpp \
+      Common/ErrorObjects.h \
+      Common/Eval.cc \
+      Common/Examine.cc \
+      Common/Examine.h \
+      Common/FilterCombos.h \
+      Common/Indenter.h \
+      Common/PassVisitor.cc \
+      Common/PassVisitor.h \
+      Common/PassVisitor.impl.h \
+      Common/PassVisitor.proto.h \
+      Common/PersistentMap.h \
+      Common/ResolvProtoDump.hpp \
+      Common/ResolvProtoDump.cpp \
+      Common/ScopedMap.h \
+      Common/SemanticError.cc \
+      Common/SemanticError.h \
+      Common/Stats.h \
+      Common/Stats/Base.h \
+      Common/Stats/Counter.cc \
+      Common/Stats/Counter.h \
+      Common/Stats/Heap.cc \
+      Common/Stats/Heap.h \
+      Common/Stats/ResolveTime.cc \
+      Common/Stats/ResolveTime.h \
+      Common/Stats/Stats.cc \
+      Common/Stats/Time.cc \
+      Common/Stats/Time.h \
+      Common/UnimplementedError.h \
+      Common/UniqueName.cc \
+      Common/UniqueName.h \
+      Common/utility.h \
+      Common/VectorMap.h
+        Common/Assert.cc \
+        Common/CodeLocation.h \
+        Common/CodeLocationTools.hpp \
+        Common/CodeLocationTools.cpp \
+        Common/CompilerError.h \
+        Common/Debug.h \
+        Common/DeclStats.hpp \
+        Common/DeclStats.cpp \
+        Common/ErrorObjects.h \
+        Common/Eval.cc \
+        Common/Examine.cc \
+        Common/Examine.h \
+        Common/FilterCombos.h \
+        Common/Indenter.h \
+        Common/Indenter.cc \
+        Common/PassVisitor.cc \
+        Common/PassVisitor.h \
+        Common/PassVisitor.impl.h \
+        Common/PassVisitor.proto.h \
+        Common/PersistentMap.h \
+        Common/ResolvProtoDump.hpp \
+        Common/ResolvProtoDump.cpp \
+        Common/ScopedMap.h \
+        Common/SemanticError.cc \
+        Common/SemanticError.h \
+        Common/Stats.h \
+        Common/Stats/Base.h \
+        Common/Stats/Counter.cc \
+        Common/Stats/Counter.h \
+        Common/Stats/Heap.cc \
+        Common/Stats/Heap.h \
+        Common/Stats/ResolveTime.cc \
+        Common/Stats/ResolveTime.h \
+        Common/Stats/Stats.cc \
+        Common/Stats/Time.cc \
+        Common/Stats/Time.h \
+        Common/UnimplementedError.h \
+        Common/UniqueName.cc \
+        Common/UniqueName.h \
+        Common/utility.h \
+        Common/VectorMap.h
+SRC += $(SRC_COMMON) Common/DebugMalloc.cc
+SRC += $(SRC_COMMON) \
+        Common/DebugMalloc.cc
 SRCDEMANGLE += $(SRC_COMMON)

src/Common/utility.h

-              r29d8c02
+              r74ec742
 // Author           : Richard C. Bilson
 // Created On       : Mon May 18 07:44:20 2015
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Tue Feb 11 13:00:36 2020
 // Update Count     : 50
+// Last Modified By : Andrew Beach
+// Last Modified On : Mon Apr 25 14:26:00 2022
+// Update Count     : 51
 //
 …
+}
+template<typename Container, typename Pred>
+void erase_if( Container & cont, Pred && pred ) {
+        auto keep_end = std::remove_if( cont.begin(), cont.end(), pred );
+        cont.erase( keep_end, cont.end() );
+}
 template< typename... Args >
 auto zip(Args&&... args) -> decltype(zipWith(std::forward<Args>(args)..., std::make_pair)) {

src/Concurrency/module.mk

-              r29d8c02
+              r74ec742
 ## Author           : Thierry Delisle
 ## Created On       : Mon Mar 13 12:48:40 2017
 ## Last Modified By :
 ## Last Modified On :
 ## Update Count     : 0
+## Last Modified By : Andrew Beach
+## Last Modified On : Tue May 17 13:28:00 2022
+## Update Count     : 1
 ###############################################################################
 SRC_CONCURRENCY = \
+SRC += \
         Concurrency/KeywordsNew.cpp \
+        Concurrency/Keywords.cc
+SRC += $(SRC_CONCURRENCY) \
+        Concurrency/Keywords.cc \
         Concurrency/Keywords.h \
         Concurrency/Waitfor.cc \
         Concurrency/Waitfor.h
-SRCDEMANGLE += $(SRC_CONCURRENCY)

src/ControlStruct/LabelGeneratorNew.hpp

r29d8c02	r74ec742
18	18	#include <string> // for string
19	19
20		~~class~~ CodeLocation;
	20	struct CodeLocation;
21	21
22	22	namespace ast {

src/ControlStruct/MultiLevelExit.cpp

-              r29d8c02
+              r74ec742
 #include "AST/Pass.hpp"
 #include "AST/Stmt.hpp"
+#include "Common/CodeLocationTools.hpp"
 #include "LabelGeneratorNew.hpp"
 …
         // Labels on different stmts require different approaches to access
         switch ( stmt->kind ) {
           case BranchStmt::Goto:
+        case BranchStmt::Goto:
                 return stmt;
           case BranchStmt::Continue:
           case BranchStmt::Break: {
                   bool isContinue = stmt->kind == BranchStmt::Continue;
                   // Handle unlabeled break and continue.
                   if ( stmt->target.empty() ) {
                           if ( isContinue ) {
                                   targetEntry = findEnclosingControlStructure( isContinueTarget );
                           } else {
                                   if ( enclosing_control_structures.empty() ) {
+        case BranchStmt::Continue:
+        case BranchStmt::Break: {
+                bool isContinue = stmt->kind == BranchStmt::Continue;
+                // Handle unlabeled break and continue.
+                if ( stmt->target.empty() ) {
+                        if ( isContinue ) {
+                                targetEntry = findEnclosingControlStructure( isContinueTarget );
+                        } else {
+                                if ( enclosing_control_structures.empty() ) {
                                           SemanticError( stmt->location,
                                                                          "'break' outside a loop, 'switch', or labelled block" );
+                                  }
                                   targetEntry = findEnclosingControlStructure( isBreakTarget );
+                          }
                           // Handle labeled break and continue.
                   } else {
                           // Lookup label in table to find attached control structure.
                           targetEntry = findEnclosingControlStructure(
                                   [ targetStmt = target_table.at(stmt->target) ](auto entry){
+                                }
+                                targetEntry = findEnclosingControlStructure( isBreakTarget );
+                        }
+                        // Handle labeled break and continue.
+                } else {
+                        // Lookup label in table to find attached control structure.
+                        targetEntry = findEnclosingControlStructure(
+                                [ targetStmt = target_table.at(stmt->target) ](auto entry){
                                           return entry.stmt == targetStmt;
                                   } );
+                  }
                   // Ensure that selected target is valid.
                   if ( targetEntry == enclosing_control_structures.rend() || ( isContinue && ! isContinueTarget( *targetEntry ) ) ) {
                           SemanticError( stmt->location, toString( (isContinue ? "'continue'" : "'break'"),
+                                } );
+                }
+                // Ensure that selected target is valid.
+                if ( targetEntry == enclosing_control_structures.rend() || ( isContinue && ! isContinueTarget( *targetEntry ) ) ) {
+                        SemanticError( stmt->location, toString( (isContinue ? "'continue'" : "'break'"),
                                                         " target must be an enclosing ", (isContinue ? "loop: " : "control structure: "),
                                                         stmt->originalTarget ) );
+                  }
                   break;
+          }
           // handle fallthrough in case/switch stmts
           case BranchStmt::FallThrough: {
                   targetEntry = findEnclosingControlStructure( isFallthroughTarget );
                   // Check that target is valid.
                   if ( targetEntry == enclosing_control_structures.rend() ) {
                           SemanticError( stmt->location, "'fallthrough' must be enclosed in a 'switch' or 'choose'" );
+                  }
                   if ( ! stmt->target.empty() ) {
                           // Labelled fallthrough: target must be a valid fallthough label.
                           if ( ! fallthrough_labels.count( stmt->target ) ) {
                                   SemanticError( stmt->location, toString( "'fallthrough' target must be a later case statement: ",
+                }
+                break;
+        }
+        // handle fallthrough in case/switch stmts
+        case BranchStmt::FallThrough: {
+                targetEntry = findEnclosingControlStructure( isFallthroughTarget );
+                // Check that target is valid.
+                if ( targetEntry == enclosing_control_structures.rend() ) {
+                        SemanticError( stmt->location, "'fallthrough' must be enclosed in a 'switch' or 'choose'" );
+                }
+                if ( ! stmt->target.empty() ) {
+                        // Labelled fallthrough: target must be a valid fallthough label.
+                        if ( ! fallthrough_labels.count( stmt->target ) ) {
+                                SemanticError( stmt->location, toString( "'fallthrough' target must be a later case statement: ",
                                                                                                                    stmt->originalTarget ) );
+                          }
                           return new BranchStmt( stmt->location, BranchStmt::Goto, stmt->originalTarget );
+                  }
                   break;
+          }
           case BranchStmt::FallThroughDefault: {
                   targetEntry = findEnclosingControlStructure( isFallthroughDefaultTarget );
                   // Check if in switch or choose statement.
                   if ( targetEntry == enclosing_control_structures.rend() ) {
                           SemanticError( stmt->location, "'fallthrough' must be enclosed in a 'switch' or 'choose'" );
+                  }
                   // Check if switch or choose has default clause.
                   auto switchStmt = strict_dynamic_cast< const SwitchStmt * >( targetEntry->stmt );
                   bool foundDefault = false;
                   for ( auto caseStmt : switchStmt->cases ) {
                           if ( caseStmt->isDefault() ) {
                                   foundDefault = true;
                                   break;
+                          }
+                  }
                   if ( ! foundDefault ) {
                           SemanticError( stmt->location, "'fallthrough default' must be enclosed in a 'switch' or 'choose'"
                                                          "control structure with a 'default' clause" );
+                  }
                   break;
+          }
           default:
+                        }
+                        return new BranchStmt( stmt->location, BranchStmt::Goto, stmt->originalTarget );
+                }
+                break;
+        }
+        case BranchStmt::FallThroughDefault: {
+                targetEntry = findEnclosingControlStructure( isFallthroughDefaultTarget );
+                // Check if in switch or choose statement.
+                if ( targetEntry == enclosing_control_structures.rend() ) {
+                        SemanticError( stmt->location, "'fallthrough' must be enclosed in a 'switch' or 'choose'" );
+                }
+                // Check if switch or choose has default clause.
+                auto switchStmt = strict_dynamic_cast< const SwitchStmt * >( targetEntry->stmt );
+                bool foundDefault = false;
+                for ( auto caseStmt : switchStmt->cases ) {
+                        if ( caseStmt->isDefault() ) {
+                                foundDefault = true;
+                                break;
+                        }
+                }
+                if ( ! foundDefault ) {
+                        SemanticError( stmt->location, "'fallthrough default' must be enclosed in a 'switch' or 'choose'"
+                                                   "control structure with a 'default' clause" );
+                }
+                break;
+        }
+        default:
                 assert( false );
+        }
 …
         Label exitLabel( CodeLocation(), "" );
         switch ( stmt->kind ) {
           case BranchStmt::Break:
+        case BranchStmt::Break:
                 assert( ! targetEntry->useBreakExit().empty() );
                 exitLabel = targetEntry->useBreakExit();
                 break;
           case BranchStmt::Continue:
+        case BranchStmt::Continue:
                 assert( ! targetEntry->useContExit().empty() );
                 exitLabel = targetEntry->useContExit();
                 break;
           case BranchStmt::FallThrough:
+        case BranchStmt::FallThrough:
                 assert( ! targetEntry->useFallExit().empty() );
                 exitLabel = targetEntry->useFallExit();
                 break;
           case BranchStmt::FallThroughDefault:
+        case BranchStmt::FallThroughDefault:
                 assert( ! targetEntry->useFallDefaultExit().empty() );
                 exitLabel = targetEntry->useFallDefaultExit();
 …
+                }
                 break;
           default:
+        default:
                 assert(0);
+        }
 …
+                }
+                ptr<Stmt> else_stmt = nullptr;
+                Stmt * loop_kid = nullptr;
+                // check if loop node and if so add else clause if it exists
+                const WhileDoStmt * whilePtr = dynamic_cast<const WhileDoStmt *>(kid.get());
+                if ( whilePtr && whilePtr->else_) {
+                        else_stmt = whilePtr->else_;
+                        WhileDoStmt * mutate_ptr = mutate(whilePtr);
+                        mutate_ptr->else_ = nullptr;
+                        loop_kid = mutate_ptr;
+                }
+                const ForStmt * forPtr = dynamic_cast<const ForStmt *>(kid.get());
+                if ( forPtr && forPtr->else_) {
+                        else_stmt = forPtr->else_;
+                        ForStmt * mutate_ptr = mutate(forPtr);
+                        mutate_ptr->else_ = nullptr;
+                        loop_kid = mutate_ptr;
+                }
                 try {
+                        ret.push_back( kid->accept( *visitor ) );
+                        if (else_stmt) ret.push_back( loop_kid->accept( *visitor ) );
+                        else ret.push_back( kid->accept( *visitor ) );
                 } catch ( SemanticErrorException & e ) {
                         errors.append( e );
+                }
+                if (else_stmt) ret.push_back(else_stmt);
                 if ( ! break_label.empty() ) {
 …
         Pass<MultiLevelExitCore> visitor( labelTable );
         const CompoundStmt * ret = stmt->accept( visitor );
+        return ret;
+        // There are some unset code locations slipping in, possibly by Labels.
+        const Node * node = localFillCodeLocations( ret->location, ret );
+        return strict_dynamic_cast<const CompoundStmt *>( node );
+}
 } // namespace ControlStruct

src/ControlStruct/module.mk

-              r29d8c02
+              r74ec742
 ## Author           : Richard C. Bilson
 ## Created On       : Mon Jun  1 17:49:17 2015
 ## Last Modified By : Peter A. Buhr
 ## Last Modified On : Sat Jan 29 12:04:19 2022
 ## Update Count     : 7
+## Last Modified By : Andrew Beach
+## Last Modified On : Tue May 17 14:30:00 2022
+## Update Count     : 8
 ###############################################################################
 SRC_CONTROLSTRUCT = \
+SRC += \
         ControlStruct/ExceptDecl.cc \
         ControlStruct/ExceptDecl.h \
+        ControlStruct/ExceptTranslateNew.cpp \
+        ControlStruct/ExceptTranslate.cc \
+        ControlStruct/ExceptTranslate.h \
         ControlStruct/FixLabels.cpp \
         ControlStruct/FixLabels.hpp \
 …
         ControlStruct/Mutate.h
-SRC += $(SRC_CONTROLSTRUCT) \
-        ControlStruct/ExceptTranslateNew.cpp \
-        ControlStruct/ExceptTranslate.cc \
-        ControlStruct/ExceptTranslate.h
-SRCDEMANGLE += $(SRC_CONTROLSTRUCT)

src/GenPoly/Lvalue.cc

-              r29d8c02
+              r74ec742
 // Author           : Richard C. Bilson
 // Created On       : Mon May 18 07:44:20 2015
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Fri Dec 13 23:14:38 2019
 // Update Count     : 7
+// Last Modified By : Andrew Beach
+// Last Modified On : Mon May 16 14:09:00 2022
+// Update Count     : 8
 //
 …
         } // namespace
+        static bool referencesEliminated = false;
+        // used by UntypedExpr::createDeref to determine whether result type of dereference should be ReferenceType or value type.
+        bool referencesPermissable() {
+                return ! referencesEliminated;
+        }
+        // Stored elsewhere (Lvalue2, initially false).
+        extern bool referencesEliminated;
         void convertLvalue( std::list< Declaration* > & translationUnit ) {

src/GenPoly/module.mk

-              r29d8c02
+              r74ec742
 ## Author           : Richard C. Bilson
 ## Created On       : Mon Jun  1 17:49:17 2015
 ## Last Modified By : Peter A. Buhr
 ## Last Modified On : Mon Jun  1 17:52:30 2015
 ## Update Count     : 1
+## Last Modified By : Andrew Beach
+## Last Modified On : Tue May 17 14:31:00 2022
+## Update Count     : 2
 ###############################################################################
+SRC += GenPoly/Box.cc \
+       GenPoly/Box.h \
+       GenPoly/ErasableScopedMap.h \
+       GenPoly/FindFunction.cc \
+       GenPoly/FindFunction.h \
+       GenPoly/GenPoly.cc \
+       GenPoly/GenPoly.h \
+       GenPoly/InstantiateGeneric.cc \
+       GenPoly/InstantiateGeneric.h \
+       GenPoly/Lvalue.cc \
+       GenPoly/Lvalue.h \
+       GenPoly/ScopedSet.h \
+       GenPoly/ScrubTyVars.cc \
+       GenPoly/ScrubTyVars.h \
+       GenPoly/Specialize.cc \
+       GenPoly/Specialize.h
+SRC_GENPOLY = \
+        GenPoly/GenPoly.cc \
+        GenPoly/GenPoly.h \
+        GenPoly/Lvalue2.cc \
+        GenPoly/Lvalue.h
+SRCDEMANGLE += GenPoly/GenPoly.cc GenPoly/GenPoly.h GenPoly/Lvalue.cc GenPoly/Lvalue.h
+SRC += $(SRC_GENPOLY) \
+        GenPoly/Box.cc \
+        GenPoly/Box.h \
+        GenPoly/ErasableScopedMap.h \
+        GenPoly/FindFunction.cc \
+        GenPoly/FindFunction.h \
+        GenPoly/InstantiateGeneric.cc \
+        GenPoly/InstantiateGeneric.h \
+        GenPoly/Lvalue.cc \
+        GenPoly/ScopedSet.h \
+        GenPoly/ScrubTyVars.cc \
+        GenPoly/ScrubTyVars.h \
+        GenPoly/Specialize.cc \
+        GenPoly/Specialize.h
+SRCDEMANGLE += $(SRC_GENPOLY)

src/InitTweak/FixInitNew.cpp

-              r29d8c02
+              r74ec742
                 auto expr = new ast::ImplicitCopyCtorExpr( appExpr->location, mutExpr );
+                // Move the type substitution to the new top-level, if it is attached to the appExpr.
+                // Ensure it is not deleted with the ImplicitCopyCtorExpr by removing it before deletion.
+                // The substitution is needed to obtain the type of temporary variables so that copy constructor
+                // calls can be resolved.
+                // Move the type substitution to the new top-level. The substitution
+                // is needed to obtain the type of temporary variables so that copy
+                // constructor calls can be resolved.
                 assert( typeSubs );
-                // assert (mutExpr->env);
                 expr->env = tmp;
-                // mutExpr->env = nullptr;
-                //std::swap( expr->env, appExpr->env );
                 return expr;
+        }
         void ResolveCopyCtors::previsit(const ast::Expr * expr) {
+                if (expr->env) {
+                        GuardValue(env);
+                        GuardValue(envModified);
+                        env = expr->env->clone();
+                        envModified = false;
+                }
+                if ( nullptr == expr->env ) {
+                        return;
+                }
+                GuardValue( env ) = expr->env->clone();
+                GuardValue( envModified ) = false;
+        }
         const ast::Expr * ResolveCopyCtors::postvisit(const ast::Expr * expr) {
+                if (expr->env) {
+                        if (envModified) {
+                                auto mutExpr = mutate(expr);
+                                mutExpr->env = env;
+                                return mutExpr;
+                        }
+                        else {
+                                // env was not mutated, skip and delete the shallow copy
+                                delete env;
+                                return expr;
+                        }
+                }
+                else {
+                // No local environment, skip.
+                if ( nullptr == expr->env ) {
+                        return expr;
+                // Environment was modified, mutate and replace.
+                } else if ( envModified ) {
+                        auto mutExpr = mutate(expr);
+                        mutExpr->env = env;
+                        return mutExpr;
+                // Environment was not mutated, delete the shallow copy before guard.
+                } else {
+                        delete env;
                         return expr;
+                }
 …
         const ast::Expr * ResolveCopyCtors::makeCtorDtor( const std::string & fname, const ast::ObjectDecl * var, const ast::Expr * cpArg ) {
                 assert( var );
                 assert (var->isManaged());
                 assert (!cpArg || cpArg->isManaged());
+                assert( var->isManaged() );
+                assert( !cpArg || cpArg->isManaged() );
                 // arrays are not copy constructed, so this should always be an ExprStmt
                 ast::ptr< ast::Stmt > stmt = genCtorDtor(var->location, fname, var, cpArg );
 …
                 auto exprStmt = stmt.strict_as<ast::ImplicitCtorDtorStmt>()->callStmt.strict_as<ast::ExprStmt>();
                 ast::ptr<ast::Expr> untyped = exprStmt->expr; // take ownership of expr
-                // exprStmt->expr = nullptr;
                 // resolve copy constructor
 …
                         env->add( *resolved->env );
                         envModified = true;
-                        // delete resolved->env;
                         auto mut = mutate(resolved.get());
                         assertf(mut == resolved.get(), "newly resolved expression must be unique");
                         mut->env = nullptr;
                 } // if
-                // delete stmt;
                 if ( auto assign = resolved.as<ast::TupleAssignExpr>() ) {
                         // fix newly generated StmtExpr

src/InitTweak/module.mk

-              r29d8c02
+              r74ec742
 ## Author           : Richard C. Bilson
 ## Created On       : Mon Jun  1 17:49:17 2015
 ## Last Modified By : Rob Schluntz
 ## Last Modified On : Fri May 13 11:36:24 2016
 ## Update Count     : 3
+## Last Modified By : Andrew Beach
+## Last Modified On : Tue May 17 14:31:00 2022
+## Update Count     : 4
 ###############################################################################
+SRC += \
+        InitTweak/FixGlobalInit.cc \
+        InitTweak/FixGlobalInit.h \
+        InitTweak/FixInit.cc \
+        InitTweak/FixInit.h \
+        InitTweak/GenInit.cc \
+        InitTweak/GenInit.h \
+        InitTweak/InitTweak.cc \
+        InitTweak/InitTweak.h \
+        InitTweak/FixInitNew.cpp
+SRCDEMANGLE += \
+SRC_INITTWEAK = \
         InitTweak/GenInit.cc \
         InitTweak/GenInit.h \
 …
         InitTweak/InitTweak.h
+SRC += $(SRC_INITTWEAK) \
+        InitTweak/FixGlobalInit.cc \
+        InitTweak/FixGlobalInit.h \
+        InitTweak/FixInit.cc \
+        InitTweak/FixInit.h \
+        InitTweak/FixInitNew.cpp
+SRCDEMANGLE += $(SRC_INITTWEAK)

src/Parser/DeclarationNode.cc

-              r29d8c02
+              r74ec742
 } // DeclarationNode::newAggregate
 DeclarationNode * DeclarationNode::newEnum( const string * name, DeclarationNode * constants, bool body) {
+DeclarationNode * DeclarationNode::newEnum( const string * name, DeclarationNode * constants, bool body, DeclarationNode * base) {
         DeclarationNode * newnode = new DeclarationNode;
         newnode->type = new TypeData( TypeData::Enum );
 …
         newnode->type->enumeration.body = body;
         newnode->type->enumeration.anon = name == nullptr;
+        if ( base && base->type)  {
+                newnode->type->base = base->type;
+        } // if
+        // Check: if base has TypeData
         return newnode;
 } // DeclarationNode::newEnum
 …
                 return newName( name ); // Not explicitly inited enum value;
         } // if
 } // DeclarationNode::newEnumGeneric
+} // DeclarationNode::newEnumValueGeneric
 DeclarationNode * DeclarationNode::newFromTypedef( const string * name ) {

src/Parser/ParseNode.h

r29d8c02	r74ec742
235	235	static DeclarationNode * newFunction( const std::string * name, DeclarationNode * ret, DeclarationNode * param, StatementNode * body );
236	236	static DeclarationNode * newAggregate( AggregateDecl::Aggregate kind, const std::string * name, ExpressionNode * actuals, DeclarationNode * fields, bool body );
237		static DeclarationNode * newEnum( const std::string * name, DeclarationNode * constants, bool body );
	237	static DeclarationNode * newEnum( const std::string * name, DeclarationNode * constants, bool body, DeclarationNode * base = nullptr );
238	238	static DeclarationNode * newEnumConstant( const std::string * name, ExpressionNode * constant );
239	239	static DeclarationNode * newEnumValueGeneric( const std::string * name, InitializerNode * init );

src/Parser/TypeData.cc

-              r29d8c02
+              r74ec742
                 if ( enumeration.body ) {
                         os << string( indent + 2, ' ' ) << " with body" << endl;
+                } // if
+                if ( base ) {
+                        os << "for ";
+                        base->print( os, indent + 2 );
                 } // if
                 break;
 …
                         ObjectDecl * member = dynamic_cast< ObjectDecl * >(* members);
                         member->set_init( new SingleInit( maybeMoveBuild< Expression >( cur->consume_enumeratorValue() ) ) );
                 } else {
+                } else if ( !cur->initializer ) {
                         if ( baseType && (!dynamic_cast<BasicType *>(baseType) || !dynamic_cast<BasicType *>(baseType)->isWholeNumber())) {
                                 SemanticError( td->location, "A non whole number enum value decl must be explicitly initialized." );
+                        }
+                } // if
+                }
+                // else cur is a List Initializer and has been set as init in buildList()
+                // if
         } // for
         ret->set_body( td->enumeration.body ); // Boolean; if it has body
+        ret->set_body( td->enumeration.body );
         return ret;
 } // buildEnum

src/Parser/parser.yy

-              r29d8c02
+              r74ec742
 // Created On       : Sat Sep  1 20:22:55 2001
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Mon Mar 14 16:35:29 2022
 // Update Count     : 5276
+// Last Modified On : Sat May 14 09:16:22 2022
+// Update Count     : 5401
 //
 …
 #include "Common/SemanticError.h"                                               // error_str
 #include "Common/utility.h"                                                             // for maybeMoveBuild, maybeBuild, CodeLo...
+#include "SynTree/Attribute.h"     // for Attribute
 extern DeclarationNode * parseTree;
 …
 } // appendStr
+DeclarationNode * distAttr( DeclarationNode * specifier, DeclarationNode * declList ) {
+        // distribute declaration_specifier across all declared variables, e.g., static, const, __attribute__.
+        DeclarationNode * cur = declList, * cl = (new DeclarationNode)->addType( specifier );
+DeclarationNode * distAttr( DeclarationNode * typeSpec, DeclarationNode * declList ) {
+        // distribute declaration_specifier across all declared variables, e.g., static, const, but not __attribute__.
+        assert( declList );
+//      printf( "distAttr1 typeSpec %p\n", typeSpec ); typeSpec->print( std::cout );
+        DeclarationNode * cur = declList, * cl = (new DeclarationNode)->addType( typeSpec );
+//      printf( "distAttr2 cl %p\n", cl ); cl->type->print( std::cout );
+//      cl->type->aggregate.name = cl->type->aggInst.aggregate->aggregate.name;
         for ( cur = dynamic_cast<DeclarationNode *>( cur->get_next() ); cur != nullptr; cur = dynamic_cast<DeclarationNode *>( cur->get_next() ) ) {
                 cl->cloneBaseType( cur );
         } // for
         declList->addType( cl );
+//      printf( "distAttr3 declList %p\n", declList ); declList->print( std::cout, 0 );
         return declList;
 } // distAttr
 …
 void distInl( DeclarationNode * declaration ) {
         // distribute EXTENSION across all declarations
+        // distribute INLINE across all declarations
         for ( DeclarationNode *iter = declaration; iter != nullptr; iter = (DeclarationNode *)iter->get_next() ) {
                 iter->set_inLine( true );
 …
                 if ( ! ( typeSpec->type && (typeSpec->type->kind == TypeData::Aggregate || typeSpec->type->kind == TypeData::Enum) ) ) {
                         stringstream ss;
                         typeSpec->type->print( ss );
+                        // printf( "fieldDecl1 typeSpec %p\n", typeSpec ); typeSpec->type->print( std::cout );
                         SemanticWarning( yylloc, Warning::SuperfluousDecl, ss.str().c_str() );
                         return nullptr;
                 } // if
+                // printf( "fieldDecl2 typeSpec %p\n", typeSpec ); typeSpec->type->print( std::cout );
                 fieldList = DeclarationNode::newName( nullptr );
         } // if
+        return distAttr( typeSpec, fieldList );                         // mark all fields in list
+//      return distAttr( typeSpec, fieldList );                         // mark all fields in list
+        // printf( "fieldDecl3 typeSpec %p\n", typeSpec ); typeSpec->print( std::cout, 0 );
+        DeclarationNode * temp = distAttr( typeSpec, fieldList );                               // mark all fields in list
+        // printf( "fieldDecl4 temp %p\n", temp ); temp->print( std::cout, 0 );
+        return temp;
 } // fieldDecl
 …
 iteration_statement:
         WHILE '(' ')' statement                                                         // CFA => while ( 1 )
+        WHILE '(' ')' statement                                                         %prec THEN // CFA => while ( 1 )
                 { $$ = new StatementNode( build_while( new CondCtl( nullptr, new ExpressionNode( build_constantInteger( *new string( "1" ) ) ) ), maybe_build_compound( $4 ) ) ); }
+        | WHILE '(' ')' statement ELSE statement                        // CFA
+                {
+                        $$ = new StatementNode( build_while( new CondCtl( nullptr, new ExpressionNode( build_constantInteger( *new string( "1" ) ) ) ), maybe_build_compound( $4 ) ) );
+                        SemanticWarning( yylloc, Warning::SuperfluousElse );
+                }
         | WHILE '(' conditional_declaration ')' statement       %prec THEN
                 { $$ = new StatementNode( build_while( $3, maybe_build_compound( $5 ) ) ); }
 …
         | DO statement WHILE '(' ')' ';'                                        // CFA => do while( 1 )
                 { $$ = new StatementNode( build_do_while( new ExpressionNode( build_constantInteger( *new string( "1" ) ) ), maybe_build_compound( $2 ) ) ); }
+        | DO statement WHILE '(' comma_expression ')' ';'       %prec THEN
+        | DO statement WHILE '(' ')' ELSE statement                     // CFA
+                {
+                        $$ = new StatementNode( build_do_while( new ExpressionNode( build_constantInteger( *new string( "1" ) ) ), maybe_build_compound( $2 ) ) );
+                        SemanticWarning( yylloc, Warning::SuperfluousElse );
+                }
+        | DO statement WHILE '(' comma_expression ')' ';'
                 { $$ = new StatementNode( build_do_while( $5, maybe_build_compound( $2 ) ) ); }
         | DO statement WHILE '(' comma_expression ')' ELSE statement // CFA
                 { $$ = new StatementNode( build_do_while( $5, maybe_build_compound( $2 ), $8 ) ); }
         | FOR '(' ')' statement                                                         // CFA => for ( ;; )
+        | FOR '(' ')' statement                                                         %prec THEN // CFA => for ( ;; )
                 { $$ = new StatementNode( build_for( new ForCtrl( (ExpressionNode * )nullptr, (ExpressionNode * )nullptr, (ExpressionNode * )nullptr ), maybe_build_compound( $4 ) ) ); }
+        | FOR '(' ')' statement ELSE statement                          // CFA
+                {
+                        $$ = new StatementNode( build_for( new ForCtrl( (ExpressionNode * )nullptr, (ExpressionNode * )nullptr, (ExpressionNode * )nullptr ), maybe_build_compound( $4 ) ) );
+                        SemanticWarning( yylloc, Warning::SuperfluousElse );
+                }
         | FOR '(' for_control_expression_list ')' statement     %prec THEN
                 { $$ = new StatementNode( build_for( $3, maybe_build_compound( $5 ) ) ); }
 …
 declaration:                                                                                    // old & new style declarations
         c_declaration ';'
+                {
+                        // printf( "C_DECLARATION1 %p %s\n", $$, $$->name ? $$->name->c_str() : "(nil)" );
+                        // for ( Attribute * attr: reverseIterate( $$->attributes ) ) {
+                        //   printf( "\tattr %s\n", attr->name.c_str() );
+                        // } // for
+                }
         | cfa_declaration ';'                                                           // CFA
         | static_assert                                                                         // C11
 …
         basic_type_specifier
         | sue_type_specifier
+                {
+                        // printf( "sue_type_specifier2 %p %s\n", $$, $$->type->aggregate.name ? $$->type->aggregate.name->c_str() : "(nil)" );
+                        // for ( Attribute * attr: reverseIterate( $$->attributes ) ) {
+                        //   printf( "\tattr %s\n", attr->name.c_str() );
+                        // } // for
+                }
         | type_type_specifier
+        ;
 …
 sue_declaration_specifier:                                                              // struct, union, enum + storage class + type specifier
         sue_type_specifier
+                {
+                        // printf( "sue_declaration_specifier %p %s\n", $$, $$->type->aggregate.name ? $$->type->aggregate.name->c_str() : "(nil)" );
+                        // for ( Attribute * attr: reverseIterate( $$->attributes ) ) {
+                        //   printf( "\tattr %s\n", attr->name.c_str() );
+                        // } // for
+                }
         | declaration_qualifier_list sue_type_specifier
                 { $$ = $2->addQualifiers( $1 ); }
 …
 sue_type_specifier:                                                                             // struct, union, enum + type specifier
         elaborated_type
+                {
+                        // printf( "sue_type_specifier %p %s\n", $$, $$->type->aggregate.name ? $$->type->aggregate.name->c_str() : "(nil)" );
+                        // for ( Attribute * attr: reverseIterate( $$->attributes ) ) {
+                        //   printf( "\tattr %s\n", attr->name.c_str() );
+                        // } // for
+                }
         | type_qualifier_list
                 { if ( $1->type != nullptr && $1->type->forall ) forall = true; } // remember generic type
 …
 elaborated_type:                                                                                // struct, union, enum
         aggregate_type
+                {
+                        // printf( "elaborated_type %p %s\n", $$, $$->type->aggregate.name ? $$->type->aggregate.name->c_str() : "(nil)" );
+                        // for ( Attribute * attr: reverseIterate( $$->attributes ) ) {
+                        //   printf( "\tattr %s\n", attr->name.c_str() );
+                        // } // for
+                }
         | enum_type
+        ;
 …
+                }
           '{' field_declaration_list_opt '}' type_parameters_opt
+                { $$ = DeclarationNode::newAggregate( $1, $3, $8, $6, true )->addQualifiers( $2 ); }
+                {
+                        // printf( "aggregate_type1 %s\n", $3.str->c_str() );
+                        // if ( $2 )
+                        //      for ( Attribute * attr: reverseIterate( $2->attributes ) ) {
+                        //              printf( "copySpecifiers12 %s\n", attr->name.c_str() );
+                        //      } // for
+                        $$ = DeclarationNode::newAggregate( $1, $3, $8, $6, true )->addQualifiers( $2 );
+                        // printf( "aggregate_type2 %p %s\n", $$, $$->type->aggregate.name ? $$->type->aggregate.name->c_str() : "(nil)" );
+                        // for ( Attribute * attr: reverseIterate( $$->attributes ) ) {
+                        //      printf( "aggregate_type3 %s\n", attr->name.c_str() );
+                        // } // for
+                }
         | aggregate_key attribute_list_opt TYPEDEFname          // unqualified type name
+                {
 …
           '{' field_declaration_list_opt '}' type_parameters_opt
+                {
+                        // printf( "AGG3\n" );
                         DeclarationNode::newFromTypedef( $3 );
                         $$ = DeclarationNode::newAggregate( $1, $3, $8, $6, true )->addQualifiers( $2 );
 …
           '{' field_declaration_list_opt '}' type_parameters_opt
+                {
+                        // printf( "AGG4\n" );
                         DeclarationNode::newFromTypeGen( $3, nullptr );
                         $$ = DeclarationNode::newAggregate( $1, $3, $8, $6, true )->addQualifiers( $2 );
 …
 field_declaration:
         type_specifier field_declaring_list_opt ';'
+                { $$ = fieldDecl( $1, $2 ); }
+                {
+                        // printf( "type_specifier1 %p %s\n", $$, $$->type->aggregate.name ? $$->type->aggregate.name->c_str() : "(nil)" );
+                        $$ = fieldDecl( $1, $2 );
+                        // printf( "type_specifier2 %p %s\n", $$, $$->type->aggregate.name ? $$->type->aggregate.name->c_str() : "(nil)" );
+                        // for ( Attribute * attr: reverseIterate( $$->attributes ) ) {
+                        //   printf( "\tattr %s\n", attr->name.c_str() );
+                        // } // for
+                }
         | EXTENSION type_specifier field_declaring_list_opt ';' // GCC
                 { $$ = fieldDecl( $2, $3 ); distExt( $$ ); }
 …
+        ;
 enum_type: // static DeclarationNode * newEnum( const std::string * name, DeclarationNode * constants, bool body, bool typed );                                                                                         // enum
+enum_type:
         ENUM attribute_list_opt '{' enumerator_list comma_opt '}'
                 { $$ = DeclarationNode::newEnum( nullptr, $4, true )->addQualifiers( $2 ); }
 …
                         { SemanticError( yylloc, "storage-class and CV qualifiers are not meaningful for enumeration constants, which are const." ); }
                         $$ = DeclarationNode::newEnum( nullptr, $7, true ) ->addQualifiers( $5 )  -> addEnumBase( $3 );
+                }
         | ENUM '(' cfa_abstract_parameter_declaration ')' attribute_list_opt identifier attribute_list_opt // Question: why attributes/qualifier after identifier
+                        $$ = DeclarationNode::newEnum( nullptr, $7, true, $3 )->addQualifiers( $5 );
+                }
+        | ENUM '(' cfa_abstract_parameter_declaration ')' attribute_list_opt identifier attribute_list_opt
+                {
                         if ( $3->storageClasses.val != 0 || $3->type->qualifiers.val != 0 ) { SemanticError( yylloc, "storage-class and CV qualifiers are not meaningful for enumeration constants, which are const." ); }
 …
           '{' enumerator_list comma_opt '}'
+                {
                         $$ = DeclarationNode::newEnum( $6, $10, true ) -> addQualifiers( $5 ) -> addQualifiers( $7 ) -> addEnumBase( $3 );
+                        $$ = DeclarationNode::newEnum( $6, $10, true, $3 )->addQualifiers( $5 )->addQualifiers( $7 );
+                }
         | ENUM '(' cfa_abstract_parameter_declaration ')' attribute_list_opt typedef_name attribute_list_opt '{' enumerator_list comma_opt '}'
 …
                         if ( $3->storageClasses.val != 0 || $3->type->qualifiers.val != 0 ) { SemanticError( yylloc, "storage-class and CV qualifiers are not meaningful for enumeration constants, which are const." ); }
                         typedefTable.makeTypedef( *$6->name );
                         $$ = DeclarationNode::newEnum( $6->name, $9, true ) -> addQualifiers( $5 ) -> addQualifiers( $7 ) -> addEnumBase( $3 );
+                        $$ = DeclarationNode::newEnum( $6->name, $9, true, $3 )->addQualifiers( $5 )->addQualifiers( $7 );
+                }
         | enum_type_nobody
 …
         // empty
                 { $$ = nullptr; forall = false; }
+        | WITH '(' tuple_expression_list ')'
+                { $$ = $3; forall = false; }
+        | WITH '(' tuple_expression_list ')' attribute_list_opt
+                {
+                        $$ = $3; forall = false;
+                        if ( $5 ) {
+                                SemanticError( yylloc, "Attributes cannot be associated with function body. Move attribute(s) before \"with\" clause." );
+                                $$ = nullptr;
+                        } // if
+                }
+        ;

src/ResolvExpr/AlternativeFinder.cc

r29d8c02	r74ec742
42	42	#include "SymTab/Indexer.h" // for Indexer
43	43	#include "SymTab/Mangler.h" // for Mangler
44		#include "SymTab/Validate~~.h"~~ // for validateType
	44	#include "SymTab/ValidateType.h" // for validateType
45	45	#include "SynTree/Constant.h" // for Constant
46	46	#include "SynTree/Declaration.h" // for DeclarationWithType, TypeDecl, Dec...

src/ResolvExpr/Resolver.cc

-              r29d8c02
+              r74ec742
                         // enumerator initializers should not use the enum type to initialize, since
                         // the enum type is still incomplete at this point. Use signed int instead.
+                        // TODO: BasicType::SignedInt may not longer be true
                         currentObject = CurrentObject( new BasicType( Type::Qualifiers(), BasicType::SignedInt ) );
+                }
 …
                         // enum type is still incomplete at this point. Use `int` instead.
+                        if (dynamic_cast< const ast::EnumInstType * >( objectDecl->get_type() )->base->base) { // const ast::PointerType &
+                                // const ast::Type * enumBase =  (dynamic_cast< const ast::EnumInstType * >( objectDecl->get_type() )->base->base.get());
+                                // const ast::PointerType * enumBaseAsPtr = dynamic_cast<const ast::PointerType *>(enumBase);
+                                // if ( enumBaseAsPtr ) {
+                                //      const ast::Type * pointerBase = enumBaseAsPtr->base.get();
+                                //      if ( dynamic_cast<const ast::BasicType *>(pointerBase) ) {
+                                //              objectDecl = fixObjectType(objectDecl, context);
+                                //              if (dynamic_cast<const ast::BasicType *>(pointerBase)->kind == ast::BasicType::Char)
+                                //              currentObject = ast::CurrentObject{
+                                //                      objectDecl->location,  new ast::PointerType{
+                                //                              new ast::BasicType{ ast::BasicType::Char }
+                                //                      } };
+                                //      } else {
+                                //              objectDecl = fixObjectType(objectDecl, context);
+                                //              currentObject = ast::CurrentObject{objectDecl->location, new ast::BasicType{ ast::BasicType::SignedInt } };
+                                //      }
+                                // }
+                        if (dynamic_cast< const ast::EnumInstType * >( objectDecl->get_type() )->base->base) {
                                 objectDecl = fixObjectType( objectDecl, context );
                                 const ast::Type * enumBase =  (dynamic_cast< const ast::EnumInstType * >( objectDecl->get_type() )->base->base.get());

src/SymTab/Autogen.h

-              r29d8c02
+              r74ec742
 #include "AST/Decl.hpp"
-#include "AST/Eval.hpp"
 #include "AST/Expr.hpp"
 #include "AST/Init.hpp"
 …
         template< typename OutIter >
         ast::ptr< ast::Stmt > genCall(
                 InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam,
                 const CodeLocation & loc, const std::string & fname, OutIter && out,
+                InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam,
+                const CodeLocation & loc, const std::string & fname, OutIter && out,
                 const ast::Type * type, const ast::Type * addCast, LoopDirection forward = LoopForward );
 …
+        }
         /// inserts into out a generated call expression to function fname with arguments dstParam and
+        /// inserts into out a generated call expression to function fname with arguments dstParam and
         /// srcParam. Should only be called with non-array types.
         /// optionally returns a statement which must be inserted prior to the containing loop, if
+        /// optionally returns a statement which must be inserted prior to the containing loop, if
         /// there is one
         template< typename OutIter >
         ast::ptr< ast::Stmt > genScalarCall(
                 InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam,
                 const CodeLocation & loc, std::string fname, OutIter && out, const ast::Type * type,
+        ast::ptr< ast::Stmt > genScalarCall(
+                InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam,
+                const CodeLocation & loc, std::string fname, OutIter && out, const ast::Type * type,
                 const ast::Type * addCast = nullptr
         ) {
 …
                 if ( addCast ) {
                         // cast to T& with qualifiers removed, so that qualified objects can be constructed and
                         // destructed with the same functions as non-qualified objects. Unfortunately, lvalue
                         // is considered a qualifier - for AddressExpr to resolve, its argument must have an
+                        // cast to T& with qualifiers removed, so that qualified objects can be constructed and
+                        // destructed with the same functions as non-qualified objects. Unfortunately, lvalue
+                        // is considered a qualifier - for AddressExpr to resolve, its argument must have an
                         // lvalue-qualified type, so remove all qualifiers except lvalue.
                         // xxx -- old code actually removed lvalue too...
                         ast::ptr< ast::Type > guard = addCast;  // prevent castType from mutating addCast
                         ast::ptr< ast::Type > castType = addCast;
                         ast::remove_qualifiers(
                                 castType,
+                        ast::remove_qualifiers(
+                                castType,
                                 ast::CV::Const | ast::CV::Volatile | ast::CV::Restrict | ast::CV::Atomic );
                         dstParam = new ast::CastExpr{ dstParam, new ast::ReferenceType{ castType } };
 …
                 srcParam.clearArrayIndices();
                 return listInit;
+        }
 …
+        }
         /// Store in out a loop which calls fname on each element of the array with srcParam and
+        /// Store in out a loop which calls fname on each element of the array with srcParam and
         /// dstParam as arguments. If forward is true, loop goes from 0 to N-1, else N-1 to 0
         template< typename OutIter >
         void genArrayCall(
                 InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam,
                 const CodeLocation & loc, const std::string & fname, OutIter && out,
                 const ast::ArrayType * array, const ast::Type * addCast = nullptr,
                 LoopDirection forward = LoopForward
+                InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam,
+                const CodeLocation & loc, const std::string & fname, OutIter && out,
+                const ast::ArrayType * array, const ast::Type * addCast = nullptr,
+                LoopDirection forward = LoopForward
         ) {
                 static UniqueName indexName( "_index" );
 …
                 } else {
                         // generate: for ( int i = N-1; i >= 0; --i )
                         begin = ast::call(
                                 loc, "?-?", array->dimension, ast::ConstantExpr::from_int( loc, 1 ) );
+                        begin = ast::UntypedExpr::createCall( loc, "?-?",
+                                { array->dimension, ast::ConstantExpr::from_int( loc, 1 ) } );
                         end = ast::ConstantExpr::from_int( loc, 0 );
                         cmp = "?>=?";
 …
+                }
                 ast::ptr< ast::DeclWithType > index = new ast::ObjectDecl{
                         loc, indexName.newName(), new ast::BasicType{ ast::BasicType::SignedInt },
+                ast::ptr< ast::DeclWithType > index = new ast::ObjectDecl{
+                        loc, indexName.newName(), new ast::BasicType{ ast::BasicType::SignedInt },
                         new ast::SingleInit{ loc, begin } };
                 ast::ptr< ast::Expr > indexVar = new ast::VariableExpr{ loc, index };
+                ast::ptr< ast::Expr > cond = ast::call( loc, cmp, indexVar, end );
+                ast::ptr< ast::Expr > inc = ast::call( loc, update, indexVar );
+                ast::ptr< ast::Expr > dstIndex = ast::call( loc, "?[?]", dstParam, indexVar );
+                // srcParam must keep track of the array indices to build the source parameter and/or
+                ast::ptr< ast::Expr > cond = ast::UntypedExpr::createCall(
+                        loc, cmp, { indexVar, end } );
+                ast::ptr< ast::Expr > inc = ast::UntypedExpr::createCall(
+                        loc, update, { indexVar } );
+                ast::ptr< ast::Expr > dstIndex = ast::UntypedExpr::createCall(
+                        loc, "?[?]", { dstParam, indexVar } );
+                // srcParam must keep track of the array indices to build the source parameter and/or
                 // array list initializer
                 srcParam.addArrayIndex( indexVar, array->dimension );
 …
                 // for stmt's body, eventually containing call
                 ast::CompoundStmt * body = new ast::CompoundStmt{ loc };
                 ast::ptr< ast::Stmt > listInit = genCall(
                         srcParam, dstIndex, loc, fname, std::back_inserter( body->kids ), array->base, addCast,
+                ast::ptr< ast::Stmt > listInit = genCall(
+                        srcParam, dstIndex, loc, fname, std::back_inserter( body->kids ), array->base, addCast,
                         forward );
                 // block containing the stmt and index variable
                 ast::CompoundStmt * block = new ast::CompoundStmt{ loc };
 …
         template< typename OutIter >
         ast::ptr< ast::Stmt > genCall(
                 InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam,
                 const CodeLocation & loc, const std::string & fname, OutIter && out,
+                InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam,
+                const CodeLocation & loc, const std::string & fname, OutIter && out,
                 const ast::Type * type, const ast::Type * addCast, LoopDirection forward
         ) {
                 if ( auto at = dynamic_cast< const ast::ArrayType * >( type ) ) {
                         genArrayCall(
                                 srcParam, dstParam, loc, fname, std::forward< OutIter >(out), at, addCast,
+                        genArrayCall(
+                                srcParam, dstParam, loc, fname, std::forward< OutIter >(out), at, addCast,
                                 forward );
                         return {};
                 } else {
                         return genScalarCall(
+                        return genScalarCall(
                                 srcParam, dstParam, loc, fname, std::forward< OutIter >( out ), type, addCast );
+                }
 …
+        }
         static inline ast::ptr< ast::Stmt > genImplicitCall(
                 InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam,
                 const CodeLocation & loc, const std::string & fname, const ast::ObjectDecl * obj,
                 LoopDirection forward = LoopForward
+        static inline ast::ptr< ast::Stmt > genImplicitCall(
+                InitTweak::InitExpander_new & srcParam, const ast::Expr * dstParam,
+                const CodeLocation & loc, const std::string & fname, const ast::ObjectDecl * obj,
+                LoopDirection forward = LoopForward
         ) {
                 // unnamed bit fields are not copied as they cannot be accessed
 …
                 std::vector< ast::ptr< ast::Stmt > > stmts;
                 genCall(
+                genCall(
                         srcParam, dstParam, loc, fname, back_inserter( stmts ), obj->type, addCast, forward );
 …
                         const ast::Stmt * callStmt = stmts.front();
                         if ( addCast ) {
                                 // implicitly generated ctor/dtor calls should be wrapped so that later passes are
+                                // implicitly generated ctor/dtor calls should be wrapped so that later passes are
                                 // aware they were generated.
                                 callStmt = new ast::ImplicitCtorDtorStmt{ callStmt->location, callStmt };
 …
 // compile-command: "make install" //
 // End: //

src/SymTab/Demangle.cc

r29d8c02	r74ec742
5	5	// file "LICENCE" distributed with Cforall.
6	6	//
7		// Demangle~~r.cc --~~
	7	// Demangle.cc -- Convert a mangled name into a human readable name.
8	8	//
9	9	// Author : Rob Schluntz

src/SymTab/Mangler.h

-              r29d8c02
+              r74ec742
+}
-extern "C" {
-        char * cforall_demangle(const char *, int);
+}
 // Local Variables: //
 // tab-width: 4 //

src/SymTab/Validate.cc

-              r29d8c02
+              r74ec742
 // Created On       : Sun May 17 21:50:04 2015
 // Last Modified By : Andrew Beach
 // Last Modified On : Fri Nov 12 11:00:00 2021
 // Update Count     : 364
+// Last Modified On : Tue May 17 14:36:00 2022
+// Update Count     : 366
 //
 …
 #include "ResolvExpr/ResolveTypeof.h"  // for resolveTypeof
 #include "SymTab/Autogen.h"            // for SizeType
+#include "SymTab/ValidateType.h"       // for decayEnumsAndPointers, decayFo...
 #include "SynTree/LinkageSpec.h"       // for C
 #include "SynTree/Attribute.h"         // for noAttributes, Attribute
 …
         };
-        /// Replaces enum types by int, and function or array types in function parameter and return lists by appropriate pointers.
-        struct EnumAndPointerDecay_old {
-                void previsit( EnumDecl * aggregateDecl );
-                void previsit( FunctionType * func );
-        };
-        /// Associates forward declarations of aggregates with their definitions
-        struct LinkReferenceToTypes_old final : public WithIndexer, public WithGuards, public WithVisitorRef<LinkReferenceToTypes_old>, public WithShortCircuiting {
-                LinkReferenceToTypes_old( const Indexer * indexer );
-                void postvisit( TypeInstType * typeInst );
-                void postvisit( EnumInstType * enumInst );
-                void postvisit( StructInstType * structInst );
-                void postvisit( UnionInstType * unionInst );
-                void postvisit( TraitInstType * traitInst );
-                void previsit( QualifiedType * qualType );
-                void postvisit( QualifiedType * qualType );
-                void postvisit( EnumDecl * enumDecl );
-                void postvisit( StructDecl * structDecl );
-                void postvisit( UnionDecl * unionDecl );
-                void postvisit( TraitDecl * traitDecl );
-                void previsit( StructDecl * structDecl );
-                void previsit( UnionDecl * unionDecl );
-                void renameGenericParams( std::list< TypeDecl * > & params );
-          private:
-                const Indexer * local_indexer;
-                typedef std::map< std::string, std::list< EnumInstType * > > ForwardEnumsType;
-                typedef std::map< std::string, std::list< StructInstType * > > ForwardStructsType;
-                typedef std::map< std::string, std::list< UnionInstType * > > ForwardUnionsType;
-                ForwardEnumsType forwardEnums;
-                ForwardStructsType forwardStructs;
-                ForwardUnionsType forwardUnions;
-                /// true if currently in a generic type body, so that type parameter instances can be renamed appropriately
-                bool inGeneric = false;
-        };
         /// Does early resolution on the expressions that give enumeration constants their values
         struct ResolveEnumInitializers final : public WithIndexer, public WithGuards, public WithVisitorRef<ResolveEnumInitializers>, public WithShortCircuiting {
 …
                 void previsit( StructDecl * aggrDecl );
                 void previsit( UnionDecl * aggrDecl );
-        };
-        // These structs are the sub-sub-passes of ForallPointerDecay_old.
-        struct TraitExpander_old final {
-                void previsit( FunctionType * );
-                void previsit( StructDecl * );
-                void previsit( UnionDecl * );
-        };
-        struct AssertionFixer_old final {
-                void previsit( FunctionType * );
-                void previsit( StructDecl * );
-                void previsit( UnionDecl * );
-        };
-        struct CheckOperatorTypes_old final {
-                void previsit( ObjectDecl * );
-        };
-        struct FixUniqueIds_old final {
-                void previsit( DeclarationWithType * );
         };
 …
         void validate_A( std::list< Declaration * > & translationUnit ) {
-                PassVisitor<EnumAndPointerDecay_old> epc;
                 PassVisitor<HoistTypeDecls> hoistDecls;
+                {
 …
                         ReplaceTypedef::replaceTypedef( translationUnit );
                         ReturnTypeFixer::fix( translationUnit ); // must happen before autogen
                         acceptAll( translationUnit, epc ); // must happen before VerifyCtorDtorAssign, because void return objects should not exist; before LinkReferenceToTypes_old because it is an indexer and needs correct types for mangling
+                        decayEnumsAndPointers( translationUnit ); // must happen before VerifyCtorDtorAssign, because void return objects should not exist; before LinkReferenceToTypes_old because it is an indexer and needs correct types for mangling
+                }
+        }
         void validate_B( std::list< Declaration * > & translationUnit ) {
-                PassVisitor<LinkReferenceToTypes_old> lrt( nullptr );
                 PassVisitor<FixQualifiedTypes> fixQual;
+                {
                         Stats::Heap::newPass("validate-B");
                         Stats::Time::BlockGuard guard("validate-B");
                         acceptAll( translationUnit, lrt ); // must happen before autogen, because sized flag needs to propagate to generated functions
+                        //linkReferenceToTypes( translationUnit );
                         mutateAll( translationUnit, fixQual ); // must happen after LinkReferenceToTypes_old, because aggregate members are accessed
                         HoistStruct::hoistStruct( translationUnit );
 …
                         });
+                }
+        }
-        static void decayForallPointers( std::list< Declaration * > & translationUnit ) {
-                PassVisitor<TraitExpander_old> te;
-                acceptAll( translationUnit, te );
-                PassVisitor<AssertionFixer_old> af;
-                acceptAll( translationUnit, af );
-                PassVisitor<CheckOperatorTypes_old> cot;
-                acceptAll( translationUnit, cot );
-                PassVisitor<FixUniqueIds_old> fui;
-                acceptAll( translationUnit, fui );
+        }
 …
+        }
-        void validateType( Type * type, const Indexer * indexer ) {
-                PassVisitor<EnumAndPointerDecay_old> epc;
-                PassVisitor<LinkReferenceToTypes_old> lrt( indexer );
-                PassVisitor<TraitExpander_old> te;
-                PassVisitor<AssertionFixer_old> af;
-                PassVisitor<CheckOperatorTypes_old> cot;
-                PassVisitor<FixUniqueIds_old> fui;
-                type->accept( epc );
-                type->accept( lrt );
-                type->accept( te );
-                type->accept( af );
-                type->accept( cot );
-                type->accept( fui );
+        }
         void HoistTypeDecls::handleType( Type * type ) {
                 // some type declarations are buried in expressions and not easy to hoist during parsing; hoist them here
 …
+        }
-        void EnumAndPointerDecay_old::previsit( EnumDecl * enumDecl ) {
-                // Set the type of each member of the enumeration to be EnumConstant
-                for ( std::list< Declaration * >::iterator i = enumDecl->members.begin(); i != enumDecl->members.end(); ++i ) {
-                        ObjectDecl * obj = dynamic_cast< ObjectDecl * >( * i );
-                        assert( obj );
-                        obj->set_type( new EnumInstType( Type::Qualifiers( Type::Const ), enumDecl->name ) );
-                } // for
+        }
-        namespace {
-                template< typename DWTList >
-                void fixFunctionList( DWTList & dwts, bool isVarArgs, FunctionType * func ) {
-                        auto nvals = dwts.size();
-                        bool containsVoid = false;
-                        for ( auto & dwt : dwts ) {
-                                // fix each DWT and record whether a void was found
-                                containsVoid |= fixFunction( dwt );
+                        }
-                        // the only case in which "void" is valid is where it is the only one in the list
-                        if ( containsVoid && ( nvals > 1 || isVarArgs ) ) {
-                                SemanticError( func, "invalid type void in function type " );
+                        }
-                        // one void is the only thing in the list; remove it.
-                        if ( containsVoid ) {
-                                delete dwts.front();
-                                dwts.clear();
+                        }
+                }
+        }
-        void EnumAndPointerDecay_old::previsit( FunctionType * func ) {
-                // Fix up parameters and return types
-                fixFunctionList( func->parameters, func->isVarArgs, func );
-                fixFunctionList( func->returnVals, false, func );
+        }
-        LinkReferenceToTypes_old::LinkReferenceToTypes_old( const Indexer * other_indexer ) : WithIndexer( false ) {
-                if ( other_indexer ) {
-                        local_indexer = other_indexer;
-                } else {
-                        local_indexer = &indexer;
-                } // if
+        }
-        void LinkReferenceToTypes_old::postvisit( EnumInstType * enumInst ) {
-                const EnumDecl * st = local_indexer->lookupEnum( enumInst->name );
-                // it's not a semantic error if the enum is not found, just an implicit forward declaration
-                if ( st ) {
-                        enumInst->baseEnum = const_cast<EnumDecl *>(st); // Just linking in the node
-                } // if
-                if ( ! st || ! st->body ) {
-                        // use of forward declaration
-                        forwardEnums[ enumInst->name ].push_back( enumInst );
-                } // if
+        }
-        void LinkReferenceToTypes_old::postvisit( StructInstType * structInst ) {
-                const StructDecl * st = local_indexer->lookupStruct( structInst->name );
-                // it's not a semantic error if the struct is not found, just an implicit forward declaration
-                if ( st ) {
-                        structInst->baseStruct = const_cast<StructDecl *>(st); // Just linking in the node
-                } // if
-                if ( ! st || ! st->body ) {
-                        // use of forward declaration
-                        forwardStructs[ structInst->name ].push_back( structInst );
-                } // if
+        }
-        void LinkReferenceToTypes_old::postvisit( UnionInstType * unionInst ) {
-                const UnionDecl * un = local_indexer->lookupUnion( unionInst->name );
-                // it's not a semantic error if the union is not found, just an implicit forward declaration
-                if ( un ) {
-                        unionInst->baseUnion = const_cast<UnionDecl *>(un); // Just linking in the node
-                } // if
-                if ( ! un || ! un->body ) {
-                        // use of forward declaration
-                        forwardUnions[ unionInst->name ].push_back( unionInst );
-                } // if
+        }
-        void LinkReferenceToTypes_old::previsit( QualifiedType * ) {
-                visit_children = false;
+        }
-        void LinkReferenceToTypes_old::postvisit( QualifiedType * qualType ) {
-                // linking only makes sense for the 'oldest ancestor' of the qualified type
-                qualType->parent->accept( * visitor );
+        }
-        template< typename Decl >
-        void normalizeAssertions( std::list< Decl * > & assertions ) {
-                // ensure no duplicate trait members after the clone
-                auto pred = [](Decl * d1, Decl * d2) {
-                        // only care if they're equal
-                        DeclarationWithType * dwt1 = dynamic_cast<DeclarationWithType *>( d1 );
-                        DeclarationWithType * dwt2 = dynamic_cast<DeclarationWithType *>( d2 );
-                        if ( dwt1 && dwt2 ) {
-                                if ( dwt1->name == dwt2->name && ResolvExpr::typesCompatible( dwt1->get_type(), dwt2->get_type(), SymTab::Indexer() ) ) {
-                                        // std::cerr << "=========== equal:" << std::endl;
-                                        // std::cerr << "d1: " << d1 << std::endl;
-                                        // std::cerr << "d2: " << d2 << std::endl;
-                                        return false;
+                                }
+                        }
-                        return d1 < d2;
-                };
-                std::set<Decl *, decltype(pred)> unique_members( assertions.begin(), assertions.end(), pred );
-                // if ( unique_members.size() != assertions.size() ) {
-                //      std::cerr << "============different" << std::endl;
-                //      std::cerr << unique_members.size() << " " << assertions.size() << std::endl;
-                // }
-                std::list< Decl * > order;
-                order.splice( order.end(), assertions );
-                std::copy_if( order.begin(), order.end(), back_inserter( assertions ), [&]( Decl * decl ) {
-                        return unique_members.count( decl );
-                });
+        }
         // expand assertions from trait instance, performing the appropriate type variable substitutions
         template< typename Iterator >
 …
                 // substitute trait decl parameters for instance parameters
                 applySubstitution( inst->baseTrait->parameters.begin(), inst->baseTrait->parameters.end(), inst->parameters.begin(), asserts.begin(), asserts.end(), out );
+        }
-        void LinkReferenceToTypes_old::postvisit( TraitDecl * traitDecl ) {
-                if ( traitDecl->name == "sized" ) {
-                        // "sized" is a special trait - flick the sized status on for the type variable
-                        assertf( traitDecl->parameters.size() == 1, "Built-in trait 'sized' has incorrect number of parameters: %zd", traitDecl->parameters.size() );
-                        TypeDecl * td = traitDecl->parameters.front();
-                        td->set_sized( true );
+                }
-                // move assertions from type parameters into the body of the trait
-                for ( TypeDecl * td : traitDecl->parameters ) {
-                        for ( DeclarationWithType * assert : td->assertions ) {
-                                if ( TraitInstType * inst = dynamic_cast< TraitInstType * >( assert->get_type() ) ) {
-                                        expandAssertions( inst, back_inserter( traitDecl->members ) );
-                                } else {
-                                        traitDecl->members.push_back( assert->clone() );
+                                }
+                        }
-                        deleteAll( td->assertions );
-                        td->assertions.clear();
-                } // for
+        }
-        void LinkReferenceToTypes_old::postvisit( TraitInstType * traitInst ) {
-                // handle other traits
-                const TraitDecl * traitDecl = local_indexer->lookupTrait( traitInst->name );
-                if ( ! traitDecl ) {
-                        SemanticError( traitInst->location, "use of undeclared trait " + traitInst->name );
-                } // if
-                if ( traitDecl->parameters.size() != traitInst->parameters.size() ) {
-                        SemanticError( traitInst, "incorrect number of trait parameters: " );
-                } // if
-                traitInst->baseTrait = const_cast<TraitDecl *>(traitDecl); // Just linking in the node
-                // need to carry over the 'sized' status of each decl in the instance
-                for ( auto p : group_iterate( traitDecl->parameters, traitInst->parameters ) ) {
-                        TypeExpr * expr = dynamic_cast< TypeExpr * >( std::get<1>(p) );
-                        if ( ! expr ) {
-                                SemanticError( std::get<1>(p), "Expression parameters for trait instances are currently unsupported: " );
+                        }
-                        if ( TypeInstType * inst = dynamic_cast< TypeInstType * >( expr->get_type() ) ) {
-                                TypeDecl * formalDecl = std::get<0>(p);
-                                TypeDecl * instDecl = inst->baseType;
-                                if ( formalDecl->get_sized() ) instDecl->set_sized( true );
+                        }
+                }
-                // normalizeAssertions( traitInst->members );
+        }
-        void LinkReferenceToTypes_old::postvisit( EnumDecl * enumDecl ) {
-                // visit enum members first so that the types of self-referencing members are updated properly
-                if ( enumDecl->body ) {
-                        ForwardEnumsType::iterator fwds = forwardEnums.find( enumDecl->name );
-                        if ( fwds != forwardEnums.end() ) {
-                                for ( std::list< EnumInstType * >::iterator inst = fwds->second.begin(); inst != fwds->second.end(); ++inst ) {
-                                        (* inst)->baseEnum = enumDecl;
-                                } // for
-                                forwardEnums.erase( fwds );
-                        } // if
-                } // if
+        }
-        void LinkReferenceToTypes_old::renameGenericParams( std::list< TypeDecl * > & params ) {
-                // rename generic type parameters uniquely so that they do not conflict with user-defined function forall parameters, e.g.
-                //   forall(otype T)
-                //   struct Box {
-                //     T x;
-                //   };
-                //   forall(otype T)
-                //   void f(Box(T) b) {
-                //     ...
-                //   }
-                // The T in Box and the T in f are different, so internally the naming must reflect that.
-                GuardValue( inGeneric );
-                inGeneric = ! params.empty();
-                for ( TypeDecl * td : params ) {
-                        td->name = "__" + td->name + "_generic_";
+                }
+        }
-        void LinkReferenceToTypes_old::previsit( StructDecl * structDecl ) {
-                renameGenericParams( structDecl->parameters );
+        }
-        void LinkReferenceToTypes_old::previsit( UnionDecl * unionDecl ) {
-                renameGenericParams( unionDecl->parameters );
+        }
-        void LinkReferenceToTypes_old::postvisit( StructDecl * structDecl ) {
-                // visit struct members first so that the types of self-referencing members are updated properly
-                // xxx - need to ensure that type parameters match up between forward declarations and definition (most importantly, number of type parameters and their defaults)
-                if ( structDecl->body ) {
-                        ForwardStructsType::iterator fwds = forwardStructs.find( structDecl->name );
-                        if ( fwds != forwardStructs.end() ) {
-                                for ( std::list< StructInstType * >::iterator inst = fwds->second.begin(); inst != fwds->second.end(); ++inst ) {
-                                        (* inst)->baseStruct = structDecl;
-                                } // for
-                                forwardStructs.erase( fwds );
-                        } // if
-                } // if
+        }
-        void LinkReferenceToTypes_old::postvisit( UnionDecl * unionDecl ) {
-                if ( unionDecl->body ) {
-                        ForwardUnionsType::iterator fwds = forwardUnions.find( unionDecl->name );
-                        if ( fwds != forwardUnions.end() ) {
-                                for ( std::list< UnionInstType * >::iterator inst = fwds->second.begin(); inst != fwds->second.end(); ++inst ) {
-                                        (* inst)->baseUnion = unionDecl;
-                                } // for
-                                forwardUnions.erase( fwds );
-                        } // if
-                } // if
+        }
-        void LinkReferenceToTypes_old::postvisit( TypeInstType * typeInst ) {
-                // ensure generic parameter instances are renamed like the base type
-                if ( inGeneric && typeInst->baseType ) typeInst->name = typeInst->baseType->name;
-                if ( const NamedTypeDecl * namedTypeDecl = local_indexer->lookupType( typeInst->name ) ) {
-                        if ( const TypeDecl * typeDecl = dynamic_cast< const TypeDecl * >( namedTypeDecl ) ) {
-                                typeInst->set_isFtype( typeDecl->kind == TypeDecl::Ftype );
-                        } // if
-                } // if
+        }
 …
+                                                }
+                                        }
+                                }
+                        }
 …
         void ForallPointerDecay_old::previsit( UnionDecl * aggrDecl ) {
                 forallFixer( aggrDecl->parameters, aggrDecl );
+        }
-        void TraitExpander_old::previsit( FunctionType * ftype ) {
-                expandTraits( ftype->forall );
+        }
-        void TraitExpander_old::previsit( StructDecl * aggrDecl ) {
-                expandTraits( aggrDecl->parameters );
+        }
-        void TraitExpander_old::previsit( UnionDecl * aggrDecl ) {
-                expandTraits( aggrDecl->parameters );
+        }
-        void AssertionFixer_old::previsit( FunctionType * ftype ) {
-                fixAssertions( ftype->forall, ftype );
+        }
-        void AssertionFixer_old::previsit( StructDecl * aggrDecl ) {
-                fixAssertions( aggrDecl->parameters, aggrDecl );
+        }
-        void AssertionFixer_old::previsit( UnionDecl * aggrDecl ) {
-                fixAssertions( aggrDecl->parameters, aggrDecl );
+        }
-        void CheckOperatorTypes_old::previsit( ObjectDecl * object ) {
-                // ensure that operator names only apply to functions or function pointers
-                if ( CodeGen::isOperator( object->name ) && ! dynamic_cast< FunctionType * >( object->type->stripDeclarator() ) ) {
-                        SemanticError( object->location, toCString( "operator ", object->name.c_str(), " is not a function or function pointer." )  );
+                }
+        }
-        void FixUniqueIds_old::previsit( DeclarationWithType * decl ) {
-                decl->fixUniqueId();
+        }

src/SymTab/Validate.h

-              r29d8c02
+              r74ec742
 // Author           : Richard C. Bilson
 // Created On       : Sun May 17 21:53:34 2015
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Sat Jul 22 09:46:07 2017
 // Update Count     : 4
+// Last Modified By : Andrew Beach
+// Last Modified On : Tue May 17 14:35:00 2022
+// Update Count     : 5
 //
 …
         /// Normalizes struct and function declarations
         void validate( std::list< Declaration * > &translationUnit, bool doDebug = false );
-        void validateType( Type *type, const Indexer *indexer );
         // Sub-passes of validate.
 …
         void validate_E( std::list< Declaration * > &translationUnit );
         void validate_F( std::list< Declaration * > &translationUnit );
-        const ast::Type * validateType(
-                const CodeLocation & loc, const ast::Type * type, const ast::SymbolTable & symtab );
 } // namespace SymTab

src/SymTab/demangler.cc

r29d8c02	r74ec742
1		#include "~~Mangler~~.h"
	1	#include "Demangle.h"
2	2	#include <iostream>
3	3	#include <fstream>

src/SymTab/module.mk

-              r29d8c02
+              r74ec742
 ## Created On       : Mon Jun  1 17:49:17 2015
 ## Last Modified By : Andrew Beach
 ## Last Modified On : Thr Aug 10 16:08:00 2017
 ## Update Count     : 4
+## Last Modified On : Tue May 17 14:46:00 2022
+## Update Count     : 5
 ###############################################################################
 SRC_SYMTAB = \
       SymTab/Autogen.cc \
       SymTab/Autogen.h \
       SymTab/FixFunction.cc \
       SymTab/FixFunction.h \
       SymTab/Indexer.cc \
       SymTab/Indexer.h \
       SymTab/Mangler.cc \
       SymTab/ManglerCommon.cc \
       SymTab/Mangler.h \
       SymTab/Validate.cc \
       SymTab/Validate.h
+        SymTab/Autogen.cc \
+        SymTab/Autogen.h \
+        SymTab/FixFunction.cc \
+        SymTab/FixFunction.h \
+        SymTab/Indexer.cc \
+        SymTab/Indexer.h \
+        SymTab/Mangler.cc \
+        SymTab/ManglerCommon.cc \
+        SymTab/Mangler.h \
+        SymTab/ValidateType.cc \
+        SymTab/ValidateType.h
+SRC += $(SRC_SYMTAB)
+SRCDEMANGLE += $(SRC_SYMTAB) SymTab/Demangle.cc
+SRC += $(SRC_SYMTAB) \
+        SymTab/Validate.cc \
+        SymTab/Validate.h
+SRCDEMANGLE += $(SRC_SYMTAB) \
+        SymTab/Demangle.cc \
+        SymTab/Demangle.h

src/SynTree/module.mk

r29d8c02	r74ec742
24	24	SynTree/AttrType.cc \
25	25	SynTree/BaseSyntaxNode.h \
	26	SynTree/BaseSyntaxNode.cc \
26	27	SynTree/BasicType.cc \
27	28	SynTree/CommaExpr.cc \

src/Tuples/TupleExpansion.cc

-              r29d8c02
+              r74ec742
 // Author           : Rodolfo G. Esteves
 // Created On       : Mon May 18 07:44:20 2015
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Fri Dec 13 23:45:51 2019
 // Update Count     : 24
+// Last Modified By : Andrew Beach
+// Last Modified On : Tue May 17 15:02:00 2022
+// Update Count     : 25
 //
 …
                 return nullptr;
+        }
-        namespace {
-                /// determines if impurity (read: side-effects) may exist in a piece of code. Currently gives a very crude approximation, wherein any function call expression means the code may be impure
-                struct ImpurityDetector : public WithShortCircuiting {
-                        ImpurityDetector( bool ignoreUnique ) : ignoreUnique( ignoreUnique ) {}
-                        void previsit( const ApplicationExpr * appExpr ) {
-                                visit_children = false;
-                                if ( const DeclarationWithType * function = InitTweak::getFunction( appExpr ) ) {
-                                        if ( function->linkage == LinkageSpec::Intrinsic ) {
-                                                if ( function->name == "*?" || function->name == "?[?]" ) {
-                                                        // intrinsic dereference, subscript are pure, but need to recursively look for impurity
-                                                        visit_children = true;
-                                                        return;
+                                                }
+                                        }
+                                }
-                                maybeImpure = true;
+                        }
-                        void previsit( const UntypedExpr * ) { maybeImpure = true; visit_children = false; }
-                        void previsit( const UniqueExpr * ) {
-                                if ( ignoreUnique ) {
-                                        // bottom out at unique expression.
-                                        // The existence of a unique expression doesn't change the purity of an expression.
-                                        // That is, even if the wrapped expression is impure, the wrapper protects the rest of the expression.
-                                        visit_children = false;
-                                        return;
+                                }
+                        }
-                        bool maybeImpure = false;
-                        bool ignoreUnique;
-                };
-        } // namespace
-        bool maybeImpure( const Expression * expr ) {
-                PassVisitor<ImpurityDetector> detector( false );
-                expr->accept( detector );
-                return detector.pass.maybeImpure;
+        }
-        bool maybeImpureIgnoreUnique( const Expression * expr ) {
-                PassVisitor<ImpurityDetector> detector( true );
-                expr->accept( detector );
-                return detector.pass.maybeImpure;
+        }
 } // namespace Tuples

src/Tuples/Tuples.cc

-              r29d8c02
+              r74ec742
 // Created On       : Mon Jun 17 14:41:00 2019
 // Last Modified By : Andrew Beach
 // Last Modified On : Tue Jun 18  9:31:00 2019
 // Update Count     : 1
+// Last Modified On : Mon May 16 16:15:00 2022
+// Update Count     : 2
 //
 …
 #include "AST/Pass.hpp"
 #include "AST/LinkageSpec.hpp"
+#include "Common/PassVisitor.h"
 #include "InitTweak/InitTweak.h"
 …
 namespace {
+        /// Checks if impurity (read: side-effects) may exist in a piece of code.
+        /// Currently gives a very crude approximation, wherein any function
+        /// call expression means the code may be impure.
+        struct ImpurityDetector_old : public WithShortCircuiting {
+                bool const ignoreUnique;
+                bool maybeImpure;
+                ImpurityDetector_old( bool ignoreUnique ) :
+                        ignoreUnique( ignoreUnique ), maybeImpure( false )
+                {}
+                void previsit( const ApplicationExpr * appExpr ) {
+                        visit_children = false;
+                        if ( const DeclarationWithType * function =
+                                        InitTweak::getFunction( appExpr ) ) {
+                                if ( function->linkage == LinkageSpec::Intrinsic ) {
+                                        if ( function->name == "*?" || function->name == "?[?]" ) {
+                                                // intrinsic dereference, subscript are pure,
+                                                // but need to recursively look for impurity
+                                                visit_children = true;
+                                                return;
+                                        }
+                                }
+                        }
+                        maybeImpure = true;
+                }
+                void previsit( const UntypedExpr * ) {
+                        maybeImpure = true;
+                        visit_children = false;
+                }
+                void previsit( const UniqueExpr * ) {
+                        if ( ignoreUnique ) {
+                                // bottom out at unique expression.
+                                // The existence of a unique expression doesn't change the purity of an expression.
+                                // That is, even if the wrapped expression is impure, the wrapper protects the rest of the expression.
+                                visit_children = false;
+                                return;
+                        }
+                }
+        };
+        bool detectImpurity( const Expression * expr, bool ignoreUnique ) {
+                PassVisitor<ImpurityDetector_old> detector( ignoreUnique );
+                expr->accept( detector );
+                return detector.pass.maybeImpure;
+        }
         /// Determines if impurity (read: side-effects) may exist in a piece of code. Currently gives
         /// a very crude approximation, wherein any function call expression means the code may be
         /// impure.
     struct ImpurityDetector : public ast::WithShortCircuiting {
                 bool maybeImpure = false;
+                bool result = false;
                 void previsit( ast::ApplicationExpr const * appExpr ) {
 …
+                                }
+                        }
                         maybeImpure = true; visit_children = false;
+                        result = true; visit_children = false;
+                }
                 void previsit( ast::UntypedExpr const * ) {
                         maybeImpure = true; visit_children = false;
+                        result = true; visit_children = false;
+                }
         };
         struct ImpurityDetectorIgnoreUnique : public ImpurityDetector {
                 using ImpurityDetector::previsit;
 …
+                }
         };
-        template<typename Detector>
-        bool detectImpurity( const ast::Expr * expr ) {
-                ast::Pass<Detector> detector;
-                expr->accept( detector );
-                return detector.core.maybeImpure;
+        }
 } // namespace
 bool maybeImpure( const ast::Expr * expr ) {
         return detectImpurity<ImpurityDetector>( expr );
+        return ast::Pass<ImpurityDetector>::read( expr );
+}
 bool maybeImpureIgnoreUnique( const ast::Expr * expr ) {
+        return detectImpurity<ImpurityDetectorIgnoreUnique>( expr );
+        return ast::Pass<ImpurityDetectorIgnoreUnique>::read( expr );
+}
+bool maybeImpure( const Expression * expr ) {
+        return detectImpurity( expr, false );
+}
+bool maybeImpureIgnoreUnique( const Expression * expr ) {
+        return detectImpurity( expr, true );
+}

src/Tuples/module.mk

-              r29d8c02
+              r74ec742
 ## Author           : Richard C. Bilson
 ## Created On       : Mon Jun  1 17:49:17 2015
 ## Last Modified By : Henry Xue
 ## Last Modified On : Mon Aug 23 15:36:09 2021
 ## Update Count     : 2
+## Last Modified By : Andrew Beach
+## Last Modified On : Mon May 17 15:00:00 2022
+## Update Count     : 3
 ###############################################################################
 …
         Tuples/Tuples.h
+SRC += $(SRC_TUPLES)
-SRC += $(SRC_TUPLES)
 SRCDEMANGLE += $(SRC_TUPLES)

src/Validate/Autogen.cpp

-              r29d8c02
+              r74ec742
                 name,
                 std::move( type_params ),
+                std::move( assertions ),
                 std::move( params ),
                 std::move( returns ),
 …
                 // Auto-generated routines are inline to avoid conflicts.
                 ast::Function::Specs( ast::Function::Inline ) );
-        decl->assertions = std::move( assertions );
         decl->fixUniqueId();
         return decl;

src/Validate/ForallPointerDecay.cpp

-              r29d8c02
+              r74ec742
 // Created On       : Tue Dec  7 16:15:00 2021
 // Last Modified By : Andrew Beach
 // Last Modified On : Fri Feb 11 10:59:00 2022
 // Update Count     : 0
+// Last Modified On : Sat Apr 23 13:10:00 2022
+// Update Count     : 1
 //
 …
+}
+std::vector<ast::ptr<ast::DeclWithType>> expandAssertions(
+                std::vector<ast::ptr<ast::DeclWithType>> const & old ) {
+        return TraitExpander::expandAssertions( old );
+}
 } // namespace Validate

src/Validate/ForallPointerDecay.hpp

-              r29d8c02
+              r74ec742
 // Created On       : Tue Dec  7 16:15:00 2021
 // Last Modified By : Andrew Beach
 // Last Modified On : Tue Dec  8 11:50:00 2021
 // Update Count     : 0
+// Last Modified On : Sat Apr 23 13:13:00 2022
+// Update Count     : 1
 //
 #pragma once
+#include <vector>
+#include "AST/Node.hpp"
 namespace ast {
+        class DeclWithType;
         class TranslationUnit;
+}
 …
 void decayForallPointers( ast::TranslationUnit & transUnit );
+/// Expand all traits in an assertion list.
+std::vector<ast::ptr<ast::DeclWithType>> expandAssertions(
+        std::vector<ast::ptr<ast::DeclWithType>> const & );
+}

src/Validate/GenericParameter.cpp

-              r29d8c02
+              r74ec742
 // Created On       : Fri Mar 21 10:02:00 2022
 // Last Modified By : Andrew Beach
 // Last Modified On : Wed Apr 13 10:09:00 2022
 // Update Count     : 0
+// Last Modified On : Fri Apr 22 16:43:00 2022
+// Update Count     : 1
 //
 …
 #include "AST/TranslationUnit.hpp"
 #include "AST/Type.hpp"
+#include "Validate/NoIdSymbolTable.hpp"
 namespace Validate {
 …
 // --------------------------------------------------------------------------
+// A SymbolTable that only has the operations used in the Translate Dimension
+// pass. More importantly, it doesn't have some methods that should no be
+// called by the Pass template (lookupId and addId).
+class NoIdSymbolTable {
+        ast::SymbolTable base;
+public:
+#       define FORWARD_X( func, types_and_names, just_names ) \
+        inline auto func types_and_names -> decltype( base.func just_names ) { \
+                return base.func just_names ; \
+        }
+#       define FORWARD_0( func )         FORWARD_X( func, (),             () )
+#       define FORWARD_1( func, type )   FORWARD_X( func, (type arg),     (arg) )
+#       define FORWARD_2( func, t0, t1 ) FORWARD_X( func, (t0 a0, t1 a1), (a0, a1) )
+        FORWARD_0( enterScope )
+        FORWARD_0( leaveScope )
+        FORWARD_1( lookupType, const std::string &        )
+        FORWARD_1( addType   , const ast::NamedTypeDecl * )
+        FORWARD_1( addStruct , const ast::StructDecl *    )
+        FORWARD_1( addEnum   , const ast::EnumDecl *      )
+        FORWARD_1( addUnion  , const ast::UnionDecl *     )
+        FORWARD_1( addTrait  , const ast::TraitDecl *     )
+        FORWARD_2( addWith   , const std::vector< ast::ptr<ast::Expr> > &, const ast::Decl * )
+};
+struct TranslateDimensionCore : public ast::WithGuards {
+        NoIdSymbolTable symtab;
+struct TranslateDimensionCore :
+                public WithNoIdSymbolTable, public ast::WithGuards {
         // SUIT: Struct- or Union- InstType

src/Validate/module.mk

-              r29d8c02
+              r74ec742
 ## Author           : Rob Schluntz
 ## Created On       : Fri Jul 27 10:10:10 2018
 ## Last Modified By : Rob Schluntz
 ## Last Modified On : Fri Jul 27 10:10:26 2018
 ## Update Count     : 2
+## Last Modified By : Andrew Beach
+## Last Modified On : Tue May 17 14:59:00 2022
+## Update Count     : 3
 ###############################################################################
 SRC_VALIDATE = \
+        Validate/FindSpecialDecls.cc \
+        Validate/FindSpecialDecls.h
+SRC += $(SRC_VALIDATE) \
         Validate/Autogen.cpp \
         Validate/Autogen.hpp \
         Validate/CompoundLiteral.cpp \
         Validate/CompoundLiteral.hpp \
+        Validate/EliminateTypedef.cpp \
+        Validate/EliminateTypedef.hpp \
+        Validate/FindSpecialDeclsNew.cpp \
+        Validate/FixQualifiedTypes.cpp \
+        Validate/FixQualifiedTypes.hpp \
         Validate/ForallPointerDecay.cpp \
         Validate/ForallPointerDecay.hpp \
 …
         Validate/HandleAttributes.cc \
         Validate/HandleAttributes.h \
+        Validate/HoistStruct.cpp \
+        Validate/HoistStruct.hpp \
         Validate/InitializerLength.cpp \
         Validate/InitializerLength.hpp \
         Validate/LabelAddressFixer.cpp \
         Validate/LabelAddressFixer.hpp \
+        Validate/NoIdSymbolTable.hpp \
         Validate/ReturnCheck.cpp \
+        Validate/ReturnCheck.hpp \
+        Validate/FindSpecialDeclsNew.cpp \
+        Validate/FindSpecialDecls.cc \
+        Validate/FindSpecialDecls.h
+        Validate/ReturnCheck.hpp
-SRC += $(SRC_VALIDATE)
 SRCDEMANGLE += $(SRC_VALIDATE)

src/Virtual/module.mk

-              r29d8c02
+              r74ec742
 ## Created On       : Tus Jul 25 10:18:00 2017
 ## Last Modified By : Andrew Beach
 ## Last Modified On : Tus Jul 25 10:18:00 2017
 ## Update Count     : 0
+## Last Modified On : Tus May 17 14:59:00 2022
+## Update Count     : 1
 ###############################################################################
+SRC += Virtual/ExpandCasts.cc Virtual/ExpandCasts.h \
+        Virtual/Tables.cc Virtual/Tables.h
+SRCDEMANGLE += Virtual/Tables.cc
+SRC += \
+        Virtual/ExpandCasts.cc \
+        Virtual/ExpandCasts.h \
+        Virtual/Tables.cc \
+        Virtual/Tables.h

src/main.cc

-              r29d8c02
+              r74ec742
 // Created On       : Fri May 15 23:12:02 2015
 // Last Modified By : Andrew Beach
 // Last Modified On : Wed Apr 13 11:11:00 2022
 // Update Count     : 672
+// Last Modified On : Fri Apr 29  9:52:00 2022
+// Update Count     : 673
 //
 …
 #include "ResolvExpr/Resolver.h"            // for resolve
 #include "SymTab/Validate.h"                // for validate
+#include "SymTab/ValidateType.h"            // for linkReferenceToTypes
 #include "SynTree/LinkageSpec.h"            // for Spec, Cforall, Intrinsic
 #include "SynTree/Declaration.h"            // for Declaration
 …
 #include "Tuples/Tuples.h"                  // for expandMemberTuples, expan...
 #include "Validate/Autogen.hpp"             // for autogenerateRoutines
+#include "Validate/CompoundLiteral.hpp"     // for handleCompoundLiterals
+#include "Validate/EliminateTypedef.hpp"    // for eliminateTypedef
+#include "Validate/FindSpecialDecls.h"      // for findGlobalDecls
+#include "Validate/FixQualifiedTypes.hpp"   // for fixQualifiedTypes
+#include "Validate/ForallPointerDecay.hpp"  // for decayForallPointers
 #include "Validate/GenericParameter.hpp"    // for fillGenericParameters, tr...
+#include "Validate/FindSpecialDecls.h"      // for findGlobalDecls
+#include "Validate/ForallPointerDecay.hpp"  // for decayForallPointers
+#include "Validate/CompoundLiteral.hpp"     // for handleCompoundLiterals
+#include "Validate/HoistStruct.hpp"         // for hoistStruct
 #include "Validate/InitializerLength.hpp"   // for setLengthFromInitializer
 #include "Validate/LabelAddressFixer.hpp"   // for fixLabelAddresses
 …
                 // add the assignment statement after the initialization of a type parameter
                 PASS( "Validate-A", SymTab::validate_A( translationUnit ) );
+                PASS( "Validate-B", SymTab::validate_B( translationUnit ) );
+                // Must happen before auto-gen, because it uses the sized flag.
+                PASS( "Link Reference To Types", SymTab::linkReferenceToTypes( translationUnit ) );
                 CodeTools::fillLocations( translationUnit );
 …
                         forceFillCodeLocations( transUnit );
+                        // Must happen after Link References To Types,
+                        // because aggregate members are accessed.
+                        PASS( "Fix Qualified Types", Validate::fixQualifiedTypes( transUnit ) );
+                        PASS( "Hoist Struct", Validate::hoistStruct( transUnit ) );
+                        PASS( "Eliminate Typedef", Validate::eliminateTypedef( transUnit ) );
                         // Check as early as possible. Can't happen before
 …
                         translationUnit = convert( move( transUnit ) );
                 } else {
+                        PASS( "Validate-B", SymTab::validate_B( translationUnit ) );
                         PASS( "Validate-C", SymTab::validate_C( translationUnit ) );
                         PASS( "Validate-D", SymTab::validate_D( translationUnit ) );

tests/.expect/forall.txt

r29d8c02	r74ec742
1		forall.cfa:242:25: warning: Compiled
	1	forall.cfa:244:25: warning: Compiled

tests/forall.cfa

-              r29d8c02
+              r74ec742
 //
+//
 // Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
 //
 // The contents of this file are covered under the licence agreement in the
 // file "LICENCE" distributed with Cforall.
 //
 // forall.cfa --
 //
+//
+// forall.cfa --
+//
 // Author           : Peter A. Buhr
 // Created On       : Wed May  9 08:48:15 2018
 …
 // Last Modified On : Sat Jun  5 10:06:08 2021
 // Update Count     : 36
 //
+//
 void g1() {
 …
+}
+typedef forall ( T ) int (* f)( int );
+// commented this out since it is not clearly meaningful
+// and not really representable in the ast
+// typedef forall ( T ) int (* f)( int );
 forall( T )
 …
+}
 forall( T | { T ?+?( T, T ); } ) forall( S | { T ?+?( T, S ); } )
+forall( T | { T ?+?( T, T ); } ) forall( S | { T ?+?( T, S ); } )
 struct XW { T t; };
 XW(int,int) xww;

tests/include/.expect/includes.nast.txt

r29d8c02	r74ec742
1		include/includes.cfa:153:25: warning: Compiled
	1	include/includes.cfa:169:25: warning: Compiled

tests/include/includes.cfa

-              r29d8c02
+              r74ec742
 // Created On       : Wed May 27 17:56:53 2015
 // Last Modified By : Peter A. Buhr
 // Last Modified On : Thu Feb  3 22:06:07 2022
 // Update Count     : 774
+// Last Modified On : Tue May 10 16:36:44 2022
+// Update Count     : 776
 //
 …
 #include <crypt.h>
 #include <ctype.h>
+//#include <curses.h>                                                                           // may not be installed
+#if __has_include( "curses.h" )
+#include <curses.h>                                                                             // may not be installed
+#endif
 #include <dirent.h>
 #include <dlfcn.h>
 …
 #include <errno.h>
 #include <error.h>
+//#include <eti.h>                                                                              // may not be installed, comes with ncurses
+#if __has_include( "eti.h" )
+#include <eti.h>                                                                                // may not be installed, comes with ncurses
+#endif
 #include <execinfo.h>
 #include <expat.h>
 …
 #include <fmtmsg.h>
 #include <fnmatch.h>
+//#include <form.h>                                                                             // may not be installed, comes with ncurses
+#if __has_include( "form.h" )
+#include <form.h>                                                                               // may not be installed, comes with ncurses
+#endif
 #include <fstab.h>
 #include <fts.h>
 …
 #include <mcheck.h>
 #include <memory.h>
+//#include <menu.h>                                                                             // may not be installed, comes with ncurses
+#if __has_include( "menu.h" )
+#include <menu.h>                                                                               // may not be installed, comes with ncurses
+#endif
 #include <mntent.h>
 #include <monetary.h>
 #include <mqueue.h>
+//#include <ncurses_dll.h>                                                              // may not be installed, comes with ncurses
+#if __has_include( "ncurses_dll.h" )
+#include <ncurses_dll.h>                                                                // may not be installed, comes with ncurses
+#endif
 #include <netdb.h>
 #include <nl_types.h>
 #include <nss.h>
 #include <obstack.h>
+//#include <panel.h>                                                                            // may not be installed, comes with ncurses
+#if __has_include( "panel.h" )
+#include <panel.h>                                                                              // may not be installed, comes with ncurses
+#endif
 #include <paths.h>
 #include <poll.h>
 …
 #include <syslog.h>
 #include <tar.h>
+//#include <term.h>                                                                             // may not be installed, comes with ncurses
+//#include <termcap.h>                                                                  // may not be installed, comes with ncurses
+#if __has_include( "term.h" )
+#include <term.h>                                                                               // may not be installed, comes with ncurses
+#include <termcap.h>                                                                    // may not be installed, comes with ncurses
+#endif
 #include <termio.h>
 #include <termios.h>
 …
 #include <ucontext.h>
 #include <ulimit.h>
+//#include <unctrl.h>                                                                           // may not be installed, comes with ncurses
+#if __has_include( "unctrl.h" )
+#include <unctrl.h>                                                                             // may not be installed, comes with ncurses
+#endif
 #include <unistd.h>
 #include <utime.h>

tests/pybin/settings.py

r29d8c02	r74ec742
201	201	global output_width
202	202	output_width = max(map(lambda t: len(t.target()), tests))
	203	# 35 is the maximum width of the name field before we get line wrapping.
	204	output_width = min(output_width, 35)

tests/pybin/test_run.py

-              r29d8c02
+              r74ec742
                 return os.path.normpath( os.path.join(settings.BUILDDIR, self.path, self.name) )
+        def format_target(self, width):
+                target = self.target()
+                length = len(target)
+                if length < width:
+                        return '{0:{width}}'.format(target, width=width)
+                elif length == width:
+                        return target
+                else:
+                        return '...' + target[3-width:]
         @staticmethod
         def valid_name(name):

tests/test.py

-              r29d8c02
+              r74ec742
         parser.add_argument('--install', help='Run all tests based on installed binaries or tree binaries', type=comma_separated(yes_no), default='no')
         parser.add_argument('--continue', help='When multiple specifications are passed (debug/install/arch), sets whether or not to continue if the last specification failed', type=yes_no, default='yes', dest='continue_')
         parser.add_argument('--timeout', help='Maximum duration in seconds after a single test is considered to have timed out', type=int, default=120)
+        parser.add_argument('--timeout', help='Maximum duration in seconds after a single test is considered to have timed out', type=int, default=180)
         parser.add_argument('--global-timeout', help='Maximum cumulative duration in seconds after the ALL tests are considered to have timed out', type=int, default=7200)
         parser.add_argument('--timeout-with-gdb', help='Instead of killing the command when it times out, orphan it and print process id to allow gdb to attach', type=yes_no, default="no")
 …
         try :
                 # print formated name
                 name_txt = '{0:{width}}  '.format(t.target(), width=settings.output_width)
+                name_txt = t.format_target(width=settings.output_width) + '  '
                 retcode, error, duration = run_single_test(t)

tests/unified_locking/.expect/locks.txt

r29d8c02	r74ec742
23	23	Start Test 12: locked condition variable wait/notify with front()
24	24	Done Test 12
	25	Start Test 13: fast block lock and fast cond var single wait/notify
	26	Done Test 13

tests/unified_locking/locks.cfa

-              r29d8c02
+              r74ec742
 condition_variable( linear_backoff_then_block_lock ) c_l;
+fast_block_lock f;
+fast_cond_var( fast_block_lock ) f_c_f;
 thread T_C_M_WS1 {};
 …
+                }
                 unlock(l);
+        }
+}
+thread T_F_C_F_WS1 {};
+void main( T_F_C_F_WS1 & this ) {
+        for (unsigned int i = 0; i < num_times; i++) {
+                lock(f);
+                if(empty(f_c_f) && i != num_times - 1) {
+                        wait(f_c_f,f);
+                }else{
+                        notify_one(f_c_f);
+                        unlock(f);
+                }
+        }
+}
 …
+        }
         printf("Done Test 12\n");
+}
+        printf("Start Test 13: fast block lock and fast cond var single wait/notify\n");
+        {
+                T_F_C_F_WS1 t1[2];
+        }
+        printf("Done Test 13\n");
+}

Context Navigation

Legend:

Download in other formats: