- Timestamp:
- May 20, 2022, 10:36:45 AM (4 years ago)
- Branches:
- ADT, ast-experimental, master, pthread-emulation, qualifiedEnum
- Children:
- 25fa20a
- Parents:
- 29d8c02 (diff), 7831e8fb (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)links above to see all the changes relative to each parent. - Location:
- benchmark
- Files:
-
- 3 added
- 1 deleted
- 16 edited
-
Cargo.toml.in (modified) (1 diff)
-
Makefile.am (modified) (1 diff)
-
c.c (deleted)
-
plot.py (modified) (7 diffs)
-
process-mutilate.py (modified) (8 diffs)
-
readyQ/bench.go (modified) (1 diff)
-
readyQ/churn.cfa (modified) (8 diffs)
-
readyQ/churn.cpp (added)
-
readyQ/churn.go (added)
-
readyQ/churn.rs (added)
-
readyQ/cycle.cpp (modified) (1 diff)
-
readyQ/rq_bench.hpp (modified) (1 diff)
-
readyQ/transfer.cfa (modified) (6 diffs)
-
readyQ/transfer.cpp (modified) (8 diffs)
-
readyQ/transfer.go (modified) (11 diffs)
-
readyQ/transfer.rs (modified) (10 diffs)
-
readyQ/yield.cfa (modified) (1 diff)
-
readyQ/yield.cpp (modified) (1 diff)
-
readyQ/yield.rs (modified) (1 diff)
-
rmit.py (modified) (6 diffs)
Legend:
- Unmodified
- Added
- Removed
-
benchmark/Cargo.toml.in
r29d8c02 r74ec742 4 4 authors = ["Cforall"] 5 5 edition = "2018" 6 7 [[bin]] 8 name = "rdq-churn-tokio" 9 path = "@abs_srcdir@/readyQ/churn.rs" 6 10 7 11 [[bin]] -
benchmark/Makefile.am
r29d8c02 r74ec742 614 614 RDQBENCHES = \ 615 615 rdq-churn-cfa \ 616 rdq-churn-tokio \ 617 rdq-churn-go \ 618 rdq-churn-fibre \ 616 619 rdq-cycle-cfa \ 617 620 rdq-cycle-tokio \ -
benchmark/plot.py
r29d8c02 r74ec742 22 22 23 23 class Field: 24 def __init__(self, unit, _min, _log ):24 def __init__(self, unit, _min, _log, _name=None): 25 25 self.unit = unit 26 26 self.min = _min 27 27 self.log = _log 28 self.name = _name 28 29 29 30 field_names = { … … 32 33 "Ops per procs" : Field('Ops' , 0, False), 33 34 "Ops per threads" : Field('Ops' , 0, False), 34 "ns per ops/procs" : Field(' ns' , 0, False),35 "Number of threads" : Field(' thrd', 1, False),35 "ns per ops/procs" : Field('' , 0, False, _name = "Latency (ns $/$ (Processor $\\times$ Operation))" ), 36 "Number of threads" : Field('' , 1, False), 36 37 "Total Operations(ops)" : Field('Ops' , 0, False), 37 38 "Ops/sec/procs" : Field('Ops' , 0, False), 38 39 "Total blocks" : Field('Blocks', 0, False), 39 "Ops per second" : Field(' Ops' , 0, False),40 "Ops per second" : Field('' , 0, False), 40 41 "Cycle size (# thrds)" : Field('thrd' , 1, False), 41 42 "Duration (ms)" : Field('ms' , 0, False), 42 "Target QPS" : Field('QPS' , 0, False), 43 "Actual QPS" : Field('QPS' , 0, False), 43 "Target QPS" : Field('' , 0, False), 44 "Actual QPS" : Field('' , 0, False), 45 "Average Read Latency" : Field('us' , 0, True), 44 46 "Median Read Latency" : Field('us' , 0, True), 45 47 "Tail Read Latency" : Field('us' , 0, True), 48 "Average Update Latency": Field('us' , 0, True), 46 49 "Median Update Latency" : Field('us' , 0, True), 47 50 "Tail Update Latency" : Field('us' , 0, True), 51 "Update Ratio" : Field('\%' , 0, False), 48 52 } 49 53 50 def plot(in_data, x, y, o ut):54 def plot(in_data, x, y, options): 51 55 fig, ax = plt.subplots() 52 56 colors = itertools.cycle(['#0095e3','#006cb4','#69df00','#0aa000','#fb0300','#e30002','#fd8f00','#ff7f00','#8f00d6','#4b009a','#ffff00','#b13f00']) … … 92 96 print("Making Plots") 93 97 94 for name, data in s eries.items():98 for name, data in sorted(series.items()): 95 99 _col = next(colors) 96 100 plt.scatter(data['x'], data['y'], color=_col, label=name, marker='x') … … 106 110 print("Finishing Plots") 107 111 108 plt.ylabel( y)112 plt.ylabel(field_names[y].name if field_names[y].name else y) 109 113 # plt.xticks(range(1, math.ceil(mx) + 1)) 110 plt.xlabel( x)114 plt.xlabel(field_names[x].name if field_names[x].name else x) 111 115 plt.grid(b = True) 112 116 ax.xaxis.set_major_formatter( EngFormatter(unit=field_names[x].unit) ) 113 if field_names[x].log: 117 if options.logx: 118 ax.set_xscale('log') 119 elif field_names[x].log: 114 120 ax.set_xscale('log') 115 121 else: … … 117 123 118 124 ax.yaxis.set_major_formatter( EngFormatter(unit=field_names[y].unit) ) 119 if field_names[y].log: 125 if options.logy: 126 ax.set_yscale('log') 127 elif field_names[y].log: 120 128 ax.set_yscale('log') 121 129 else: 122 plt.ylim(field_names[y].min, my*1.2)130 plt.ylim(field_names[y].min, options.MaxY if options.MaxY else my*1.2) 123 131 124 132 plt.legend(loc='upper left') 125 133 126 134 print("Results Ready") 127 if o ut:128 plt.savefig(o ut)135 if options.out: 136 plt.savefig(options.out, bbox_inches='tight') 129 137 else: 130 138 plt.show() … … 139 147 parser.add_argument('-y', nargs='?', type=str, default="", help="Which field to use as the Y axis") 140 148 parser.add_argument('-x', nargs='?', type=str, default="", help="Which field to use as the X axis") 149 parser.add_argument('--logx', action='store_true', help="if set, makes the x-axis logscale") 150 parser.add_argument('--logy', action='store_true', help="if set, makes the y-axis logscale") 151 parser.add_argument('--MaxY', nargs='?', type=int, help="maximum value of the y-axis") 141 152 142 153 options = parser.parse_args() … … 182 193 183 194 184 plot(data, wantx, wanty, options .out)195 plot(data, wantx, wanty, options) -
benchmark/process-mutilate.py
r29d8c02 r74ec742 14 14 parser = argparse.ArgumentParser(description='Python Script to convert output from mutilate to rmit like output') 15 15 parser.add_argument('--out', nargs='?', type=argparse.FileType('w'), default=sys.stdout) 16 parser.add_argument('--var', nargs='?', type=str, default='Target QPS') 16 17 try: 17 18 options = parser.parse_args() … … 31 32 32 33 try: 34 latAvs = fields[1] 33 35 lat50s = fields[6] 34 36 lat99s = fields[9] … … 37 39 38 40 try: 41 latAv = locale.atof(latAvs) 39 42 lat50 = locale.atof(lat50s) 40 43 lat99 = locale.atof(lat99s) … … 42 45 raise Warning("Warning: \"{}\" \"{}\"! can't convert to float".format(lat50s, lat99s)) 43 46 44 return lat 50, lat9947 return latAv, lat50, lat99 45 48 46 49 def want0(line): … … 58 61 try: 59 62 if line.startswith("read"): 60 rlat 50, rlat99 = precentile(line)63 rlatAv, rlat50, rlat99 = precentile(line) 61 64 62 65 elif line.startswith("update"): 63 ulat 50, ulat99 = precentile(line)66 ulatAv, ulat50, ulat99 = precentile(line) 64 67 65 68 elif line.startswith("Total QPS"): … … 84 87 85 88 try: 89 out['Average Read Latency'] = rlatAv 86 90 out['Median Read Latency'] = rlat50 87 91 out['Tail Read Latency'] = rlat99 … … 90 94 91 95 try: 96 out['Average Update Latency'] = ulatAv 92 97 out['Median Update Latency'] = ulat50 93 98 out['Tail Update Latency'] = ulat99 … … 112 117 continue 113 118 114 d = { 'Target QPS': int(rate) }119 d = { options.var : int(rate) } 115 120 116 121 w = extract( f, d ) -
benchmark/readyQ/bench.go
r29d8c02 r74ec742 71 71 duration = 5 72 72 clock_mode = true 73 fmt.Printf("Running for %f seconds \n", duration)73 fmt.Printf("Running for %f seconds (default)\n", duration) 74 74 } 75 75 -
benchmark/readyQ/churn.cfa
r29d8c02 r74ec742 1 1 #include "rq_bench.hfa" 2 2 3 #include <locks.hfa> 4 3 5 unsigned spot_cnt = 2; 4 bench_sem * volatile * spots;6 semaphore * spots; 5 7 6 8 thread BThrd { 7 9 unsigned long long count; 8 10 unsigned long long blocks; 9 b ench_sem sem;11 bool skip; 10 12 }; 11 13 … … 14 16 this.count = 0; 15 17 this.blocks = 0; 18 this.skip = false; 16 19 } 17 20 … … 19 22 20 23 void main( BThrd & this ) with( this ) { 21 wait( sem);24 park(); 22 25 for() { 23 uint32_t r = prng(); 24 bench_sem * next = __atomic_exchange_n(&spots[r % spot_cnt], &sem, __ATOMIC_SEQ_CST); 25 if(next) post( *next ); 26 blocks += wait( sem ); 26 uint32_t r = prng(this); 27 semaphore & sem = spots[r % spot_cnt]; 28 if(!skip) V( sem ); 29 blocks += P( sem ); 30 skip = false; 31 27 32 count ++; 28 33 if( clock_mode && stop) break; … … 39 44 { 's', "spots", "Number of spots in the system", spot_cnt } 40 45 }; 41 BENCH_OPT_PARSE("cforall c yclebenchmark");46 BENCH_OPT_PARSE("cforall churn benchmark"); 42 47 43 48 { … … 49 54 spots = aalloc(spot_cnt); 50 55 for(i; spot_cnt) { 51 spots[i] = 0p;56 (spots[i]){ 0 }; 52 57 } 53 58 … … 55 60 BThrd * threads[nthreads]; 56 61 for(i; nthreads ) { 57 threads[i] = malloc(); 58 (*threads[i]){}; 62 BThrd & t = *(threads[i] = malloc()); 63 (t){}; 64 t.skip = i < spot_cnt; 59 65 } 60 66 printf("Starting\n"); … … 64 70 65 71 for(i; nthreads) { 66 post( threads[i]->sem);72 unpark( *threads[i] ); 67 73 } 68 74 wait(start, is_tty); … … 72 78 printf("\nDone\n"); 73 79 80 for(i; spot_cnt) { 81 for(10000) V( spots[i] ); 82 } 83 74 84 for(i; nthreads) { 75 post( threads[i]->sem );76 85 BThrd & thrd = join( *threads[i] ); 77 86 global_counter += thrd.count; -
benchmark/readyQ/cycle.cpp
r29d8c02 r74ec742 46 46 } 47 47 for(unsigned i = 0; i < tthreads; i++) { 48 threads[i] = new Fibre( reinterpret_cast<void (*)(void *)>(partner_main), &thddata[i] ); 48 threads[i] = new Fibre(); 49 threads[i]->run( partner_main, &thddata[i] ); 49 50 } 50 51 printf("Starting\n"); -
benchmark/readyQ/rq_bench.hpp
r29d8c02 r74ec742 143 143 } 144 144 145 if(strcmp(arg, "Y") == 0) { 146 value = true; 147 return true; 148 } 149 150 if(strcmp(arg, "y") == 0) { 151 value = true; 152 return true; 153 } 154 145 155 if(strcmp(arg, "no") == 0) { 156 value = false; 157 return true; 158 } 159 160 if(strcmp(arg, "N") == 0) { 161 value = false; 162 return true; 163 } 164 165 if(strcmp(arg, "n") == 0) { 146 166 value = false; 147 167 return true; -
benchmark/readyQ/transfer.cfa
r29d8c02 r74ec742 14 14 15 15 bool exhaust = false; 16 volatile bool estop = false; 17 16 18 17 19 thread$ * the_main; … … 35 37 static void waitgroup() { 36 38 Time start = timeHiRes(); 37 for(i; nthreads) {39 OUTER: for(i; nthreads) { 38 40 PRINT( sout | "Waiting for :" | i | "(" | threads[i]->idx | ")"; ) 39 41 while( threads[i]->idx != lead_idx ) { … … 42 44 print_stats_now( bench_cluster, CFA_STATS_READY_Q | CFA_STATS_IO ); 43 45 serr | "Programs has been blocked for more than 5 secs"; 44 exit(1); 46 estop = true; 47 unpark( the_main ); 48 break OUTER; 45 49 } 46 50 } … … 59 63 static void lead(MyThread & this) { 60 64 this.idx = ++lead_idx; 61 if(lead_idx > stop_count ) {65 if(lead_idx > stop_count || estop) { 62 66 PRINT( sout | "Leader" | this.id | "done"; ) 63 67 unpark( the_main ); … … 100 104 wait( this ); 101 105 } 102 if(lead_idx > stop_count ) break;106 if(lead_idx > stop_count || estop) break; 103 107 } 104 108 } … … 172 176 sout | "Number of processors : " | nprocs; 173 177 sout | "Number of threads : " | nthreads; 174 sout | "Total Operations(ops) : " | stop_count;178 sout | "Total Operations(ops) : " | lead_idx - 1; 175 179 sout | "Threads parking on wait : " | (exhaust ? "yes" : "no"); 176 180 sout | "Rechecking : " | rechecks; 181 sout | "ns per transfer : " | (end - start)`dms / lead_idx; 177 182 178 183 -
benchmark/readyQ/transfer.cpp
r29d8c02 r74ec742 12 12 13 13 bool exhaust = false; 14 volatile bool estop = false; 14 15 15 16 bench_sem the_main; … … 42 43 if( to_miliseconds(timeHiRes() - start) > 5'000 ) { 43 44 std::cerr << "Programs has been blocked for more than 5 secs" << std::endl; 44 std::exit(1); 45 estop = true; 46 the_main.post(); 47 goto END; 45 48 } 46 49 } 47 50 } 51 END:; 48 52 PRINT( std::cout | "Waiting done"; ) 49 53 } … … 59 63 void lead() { 60 64 this->idx = ++lead_idx; 61 if(lead_idx > stop_count ) {65 if(lead_idx > stop_count || estop) { 62 66 PRINT( std::cout << "Leader " << this->id << " done" << std::endl; ) 63 67 the_main.post(); … … 88 92 } 89 93 90 static void main( void * arg) {91 MyThread & self = * reinterpret_cast<MyThread*>(arg);94 static void main(MyThread * arg) { 95 MyThread & self = *arg; 92 96 self.park(); 93 97 … … 101 105 self.wait(); 102 106 } 103 if(lead_idx > stop_count ) break;107 if(lead_idx > stop_count || estop) break; 104 108 } 105 109 } … … 144 148 for(size_t i = 0; i < nthreads; i++) { 145 149 threads[i] = new MyThread( i ); 146 handles[i] = new Fibre( MyThread::main, threads[i] ); 150 handles[i] = new Fibre(); 151 handles[i]->run( MyThread::main, threads[i] ); 147 152 } 148 153 … … 164 169 PRINT( std::cout << i << " joined" << std::endl; ) 165 170 rechecks += thrd.rechecks; 166 // delete( handles[i] );167 171 delete( threads[i] ); 168 172 } … … 176 180 std::cout << "Number of processors : " << nprocs << std::endl; 177 181 std::cout << "Number of threads : " << nthreads << std::endl; 178 std::cout << "Total Operations(ops) : " << stop_count<< std::endl;182 std::cout << "Total Operations(ops) : " << (lead_idx - 1) << std::endl; 179 183 std::cout << "Threads parking on wait : " << (exhaust ? "yes" : "no") << std::endl; 180 184 std::cout << "Rechecking : " << rechecks << std::endl; 185 std::cout << "ns per transfer : " << std::fixed << (((double)(end - start)) / (lead_idx)) << std::endl; 181 186 182 187 -
benchmark/readyQ/transfer.go
r29d8c02 r74ec742 6 6 "math/rand" 7 7 "os" 8 "regexp" 8 9 "runtime" 9 10 "sync/atomic" … … 16 17 id uint64 17 18 idx uint64 19 estop uint64 18 20 seed uint64 19 21 } … … 34 36 35 37 func NewLeader(size uint64) (*LeaderInfo) { 36 this := &LeaderInfo{0, 0, uint64(os.Getpid())}38 this := &LeaderInfo{0, 0, 0, uint64(os.Getpid())} 37 39 38 40 r := rand.Intn(10) … … 51 53 } 52 54 53 func waitgroup( idx uint64, threads [] MyThread) {55 func waitgroup(leader * LeaderInfo, idx uint64, threads [] MyThread, main_sem chan struct {}) { 54 56 start := time.Now() 57 Outer: 55 58 for i := 0; i < len(threads); i++ { 56 59 // fmt.Fprintf(os.Stderr, "Waiting for :%d (%d)\n", threads[i].id, atomic.LoadUint64(&threads[i].idx) ); … … 61 64 if delta.Seconds() > 5 { 62 65 fmt.Fprintf(os.Stderr, "Programs has been blocked for more than 5 secs") 63 os.Exit(1) 66 atomic.StoreUint64(&leader.estop, 1); 67 main_sem <- (struct {}{}) 68 break Outer 64 69 } 65 70 } … … 74 79 if i != me { 75 80 // debug!( "Leader waking {}", i); 81 defer func() { 82 if err := recover(); err != nil { 83 fmt.Fprintf(os.Stderr, "Panic occurred: %s\n", err) 84 } 85 }() 76 86 threads[i].sem <- (struct {}{}) 77 87 } … … 84 94 atomic.StoreUint64(&leader.idx, nidx); 85 95 86 if nidx > stop_count {96 if nidx > stop_count || atomic.LoadUint64(&leader.estop) != 0 { 87 97 // debug!( "Leader {} done", this.id); 88 98 main_sem <- (struct {}{}) … … 92 102 // debug!( "====================\nLeader no {} : {}", nidx, this.id); 93 103 94 waitgroup( nidx, threads);104 waitgroup(leader, nidx, threads, main_sem); 95 105 96 106 leader.next( uint64(len(threads)) ); … … 146 156 waitleader( exhaust, leader, &threads[me], &r ) 147 157 } 148 if atomic.LoadUint64(&leader.idx) > stop_count { break; }158 if atomic.LoadUint64(&leader.idx) > stop_count || atomic.LoadUint64(&leader.estop) != 0 { break; } 149 159 } 150 160 … … 155 165 func main() { 156 166 // Benchmark specific command line arguments 157 exhaustOpt := flag. Bool("e", false, "Whether or not threads that have seen the new epoch should park instead of yielding.")167 exhaustOpt := flag.String("e", "no", "Whether or not threads that have seen the new epoch should park instead of yielding.") 158 168 159 169 // General benchmark initialization and deinitialization 160 defer bench_init()() 161 162 exhaust := *exhaustOpt; 170 bench_init() 171 172 exhaustVal := *exhaustOpt; 173 174 var exhaust bool 175 re_yes := regexp.MustCompile("[Yy]|[Yy][Ee][Ss]") 176 re_no := regexp.MustCompile("[Nn]|[Nn][Oo]") 177 if re_yes.Match([]byte(exhaustVal)) { 178 exhaust = true 179 } else if re_no.Match([]byte(exhaustVal)) { 180 exhaust = false 181 } else { 182 fmt.Fprintf(os.Stderr, "Unrecognized exhaust(-e) option '%s'\n", exhaustVal) 183 os.Exit(1) 184 } 185 163 186 if clock_mode { 164 fmt.Fprintf(os.Stderr, "Programs does not support fixed duration mode ")187 fmt.Fprintf(os.Stderr, "Programs does not support fixed duration mode\n") 165 188 os.Exit(1) 166 189 } … … 215 238 ws = "no" 216 239 } 217 p.Printf("Duration (ms) : % f\n", delta.Milliseconds() )240 p.Printf("Duration (ms) : %d\n", delta.Milliseconds() ) 218 241 p.Printf("Number of processors : %d\n", nprocs ) 219 242 p.Printf("Number of threads : %d\n", nthreads ) 220 p.Printf("Total Operations(ops) : %15d\n", stop_count)243 p.Printf("Total Operations(ops) : %15d\n", (leader.idx - 1) ) 221 244 p.Printf("Threads parking on wait : %s\n", ws) 222 245 p.Printf("Rechecking : %d\n", rechecks ) 223 } 246 p.Printf("ns per transfer : %f\n", float64(delta.Nanoseconds()) / float64(leader.idx) ) 247 } -
benchmark/readyQ/transfer.rs
r29d8c02 r74ec742 6 6 use std::hint; 7 7 use std::sync::Arc; 8 use std::sync::atomic::{Atomic Usize, Ordering};8 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; 9 9 use std::time::{Instant,Duration}; 10 10 … … 44 44 match val { 45 45 "yes" => true, 46 "Y" => true, 47 "y" => true, 46 48 "no" => false, 49 "N" => false, 50 "n" => false, 47 51 "maybe" | "I don't know" | "Can you repeat the question?" => { 48 52 eprintln!("Lines for 'Malcolm in the Middle' are not acceptable values of parameter 'exhaust'"); … … 64 68 id: AtomicUsize, 65 69 idx: AtomicUsize, 70 estop: AtomicBool, 66 71 seed: u128, 67 72 } … … 72 77 id: AtomicUsize::new(nthreads), 73 78 idx: AtomicUsize::new(0), 79 estop: AtomicBool::new(false), 74 80 seed: process::id() as u128 75 81 }; … … 100 106 } 101 107 102 fn waitgroup( idx: usize, threads: &Vec<Arc<MyThread>>) {108 fn waitgroup(leader: &LeaderInfo, idx: usize, threads: &Vec<Arc<MyThread>>, main_sem: &sync::Semaphore) { 103 109 let start = Instant::now(); 104 for t in threads {110 'outer: for t in threads { 105 111 debug!( "Waiting for :{} ({})", t.id, t.idx.load(Ordering::Relaxed) ); 106 112 while t.idx.load(Ordering::Relaxed) != idx { … … 108 114 if start.elapsed() > Duration::from_secs(5) { 109 115 eprintln!("Programs has been blocked for more than 5 secs"); 110 std::process::exit(1); 116 leader.estop.store(true, Ordering::Relaxed); 117 main_sem.add_permits(1); 118 break 'outer; 111 119 } 112 120 } … … 131 139 leader.idx.store(nidx, Ordering::Relaxed); 132 140 133 if nidx as u64 > exp.stop_count {141 if nidx as u64 > exp.stop_count || leader.estop.load(Ordering::Relaxed) { 134 142 debug!( "Leader {} done", this.id); 135 143 main_sem.add_permits(1); … … 139 147 debug!( "====================\nLeader no {} : {}", nidx, this.id); 140 148 141 waitgroup( nidx, threads);149 waitgroup(leader, nidx, threads, main_sem); 142 150 143 151 leader.next( threads.len() ); … … 192 200 wait( exhaust, &leader, &threads[me], &mut rechecks ).await; 193 201 } 194 if leader.idx.load(Ordering::Relaxed) as u64 > exp.stop_count { break; }202 if leader.idx.load(Ordering::Relaxed) as u64 > exp.stop_count || leader.estop.load(Ordering::Relaxed) { break; } 195 203 } 196 204 … … 273 281 println!("Number of processors : {}", (nprocs).to_formatted_string(&Locale::en)); 274 282 println!("Number of threads : {}", (nthreads).to_formatted_string(&Locale::en)); 275 println!("Total Operations(ops) : {:>15}", ( exp.stop_count).to_formatted_string(&Locale::en));283 println!("Total Operations(ops) : {:>15}", (leader.idx.load(Ordering::Relaxed) - 1).to_formatted_string(&Locale::en)); 276 284 println!("Threads parking on wait : {}", if exhaust { "yes" } else { "no" }); 277 285 println!("Rechecking : {}", rechecks ); 278 } 286 println!("ns per transfer : {}", ((duration.as_nanos() as f64) / leader.idx.load(Ordering::Relaxed) as f64)); 287 288 } -
benchmark/readyQ/yield.cfa
r29d8c02 r74ec742 1 #include <stdlib.h> 2 #include <stdio.h> 3 #include <string.h> 4 #include <limits.h> 5 6 extern "C" { 7 #include <locale.h> 8 #include <getopt.h> 9 } 10 11 #include <unistd.h> 12 13 #include <clock.hfa> 14 #include <time.hfa> 15 #include <stats.hfa> 16 17 #include "../benchcltr.hfa" 18 19 extern bool traceHeapOn(); 20 21 22 volatile bool run = false; 23 volatile unsigned long long global_counter; 1 #include "rq_bench.hfa" 24 2 25 3 thread __attribute__((aligned(128))) Yielder { 26 unsigned long long count er;4 unsigned long long count; 27 5 }; 28 6 void ?{}( Yielder & this ) { 29 this.counter = 0;30 ((thread&)this){ "Yielder Thread", *the_benchmark_cluster };7 ((thread&)this){ "Yielder Thread", bench_cluster }; 8 this.count = 0; 31 9 } 32 10 33 11 void main( Yielder & this ) { 34 12 park(); 35 /* paranoid */ assert( true == __atomic_load_n(&run, __ATOMIC_RELAXED) ); 13 for() { 14 yield(); 15 this.count++; 16 if( clock_mode && stop) break; 17 if(!clock_mode && this.count >= stop_count) break; 18 } 36 19 37 while(__atomic_load_n(&run, __ATOMIC_RELAXED)) { 38 yield(); 39 this.counter++; 40 } 41 __atomic_fetch_add(&global_counter, this.counter, __ATOMIC_SEQ_CST); 20 __atomic_fetch_add(&threads_left, -1, __ATOMIC_SEQ_CST); 42 21 } 43 22 44 23 int main(int argc, char * argv[]) { 45 unsigned num_io = 1;46 io_context_params params;47 48 24 cfa_option opt[] = { 49 BENCH_OPT _CFA25 BENCH_OPT 50 26 }; 51 int opt_cnt = sizeof(opt) / sizeof(cfa_option); 52 53 char **left; 54 parse_args( argc, argv, opt, opt_cnt, "[OPTIONS]...\ncforall yield benchmark", left ); 27 BENCH_OPT_PARSE("cforall yield benchmark"); 55 28 56 29 { 57 printf("Running %d threads on %d processors for %f seconds\n", nthreads, nprocs, duration);30 unsigned long long global_counter = 0; 58 31 59 32 Time start, end; 60 BenchCluster cl = { num_io, params, CFA_STATS_READY_Q};33 BenchCluster bc = { nprocs }; 61 34 { 62 BenchProc procs[nprocs]; 63 { 64 Yielder threads[nthreads]; 65 printf("Starting\n"); 35 threads_left = nthreads; 36 Yielder threads[nthreads]; 37 printf("Starting\n"); 66 38 67 bool is_tty = isatty(STDOUT_FILENO); 68 start = timeHiRes(); 69 run = true; 39 bool is_tty = isatty(STDOUT_FILENO); 40 start = timeHiRes(); 70 41 71 for(i; nthreads) {72 unpark( threads[i] );73 }74 wait(duration, start, end, is_tty);42 for(i; nthreads) { 43 unpark( threads[i] ); 44 } 45 wait(start, is_tty); 75 46 76 run = false; 77 end = timeHiRes(); 78 printf("\nDone\n"); 47 stop = true; 48 end = timeHiRes(); 49 printf("\nDone\n"); 50 51 for(i; nthreads) { 52 Yielder & y = join( threads[i] ); 53 global_counter += y.count; 79 54 } 80 55 } 81 56 82 printf("Duration (ms) : %'ld\n", (end - start)`dms); 83 printf("Number of processors: %'d\n", nprocs); 84 printf("Number of threads : %'d\n", nthreads); 85 printf("Total yields : %'15llu\n", global_counter); 86 printf("Yields per second : %'18.2lf\n", ((double)global_counter) / (end - start)`s); 87 printf("ns per yields : %'18.2lf\n", ((double)(end - start)`ns) / global_counter); 88 printf("Yields per procs : %'15llu\n", global_counter / nprocs); 89 printf("Yields/sec/procs : %'18.2lf\n", (((double)global_counter) / nprocs) / (end - start)`s); 90 printf("ns per yields/procs : %'18.2lf\n", ((double)(end - start)`ns) / (global_counter / nprocs)); 57 printf("Duration (ms) : %'ld\n", (end - start)`dms); 58 printf("Number of processors : %'d\n", nprocs); 59 printf("Number of threads : %'d\n", nthreads); 60 printf("Total Operations(ops): %'15llu\n", global_counter); 61 printf("Ops per second : %'18.2lf\n", ((double)global_counter) / (end - start)`s); 62 printf("ns per ops : %'18.2lf\n", (end - start)`dns / global_counter); 63 printf("Ops per threads : %'15llu\n", global_counter / nthreads); 64 printf("Ops per procs : %'15llu\n", global_counter / nprocs); 65 printf("Ops/sec/procs : %'18.2lf\n", (((double)global_counter) / nprocs) / (end - start)`s); 66 printf("ns per ops/procs : %'18.2lf\n", (end - start)`dns / (global_counter / nprocs)); 91 67 fflush(stdout); 92 68 } -
benchmark/readyQ/yield.cpp
r29d8c02 r74ec742 1 #include <cassert> 2 #include <cstdlib> 3 #include <cstdio> 4 #include <cstring> 5 #include <climits> 6 7 extern "C" { 8 #include <locale.h> 9 #include <getopt.h> 10 } 11 12 #include <unistd.h> 13 14 #include <chrono> 15 16 using Clock = std::chrono::high_resolution_clock; 17 using duration_t = std::chrono::duration<double>; 18 using std::chrono::nanoseconds; 19 20 21 template<typename Ratio, typename T> 22 T duration_cast(T seconds) { 23 return std::chrono::duration_cast<std::chrono::duration<T, Ratio>>(std::chrono::duration<T>(seconds)).count(); 24 } 1 #include "rq_bench.hpp" 2 #include <libfibre/fibre.h> 25 3 26 4 volatile bool run = false; 27 5 volatile unsigned long long global_counter; 28 6 29 #include "libfibre/fibre.h"30 7 31 FredBarrier * barrier; 32 struct __attribute__((aligned(128))) counter_t { 33 int value = 0; 34 }; 8 void fibre_main() { 9 fibre_park(); 10 unsigned long long count = 0; 11 for(;;) { 12 Fibre::yield(); 13 count++; 14 if( clock_mode && stop) break; 15 if(!clock_mode && count >= stop_count) break; 16 } 35 17 36 void fibre_main( counter_t * counter ) { 37 barrier->wait(); 38 // /* paranoid */ assert( true == __atomic_load_n(&run, __ATOMIC_RELAXED) ); 39 40 while(__atomic_load_n(&run, __ATOMIC_RELAXED)) { 41 Fibre::forceYield(); 42 // fibre_yield(); 43 counter->value++; 44 } 45 __atomic_fetch_add(&global_counter, counter->value, __ATOMIC_SEQ_CST); 18 __atomic_fetch_add(&global_counter, count, __ATOMIC_SEQ_CST); 19 __atomic_fetch_add(&threads_left, -1, __ATOMIC_SEQ_CST); 46 20 } 47 21 48 22 int main(int argc, char * argv[]) { 49 double duration = 5; 50 int nprocs = 1; 51 int nthreads = 1; 52 53 std::cout.imbue(std::locale("")); 54 setlocale(LC_ALL, ""); 55 56 for(;;) { 57 static struct option options[] = { 58 {"duration", required_argument, 0, 'd'}, 59 {"nprocs", required_argument, 0, 'p'}, 60 {"nthreads", required_argument, 0, 't'}, 61 {0, 0, 0, 0} 62 }; 63 64 int idx = 0; 65 int opt = getopt_long(argc, argv, "d:p:t:", options, &idx); 66 67 const char * arg = optarg ? optarg : ""; 68 char * end; 69 switch(opt) { 70 case -1: 71 goto run; 72 // Numeric Arguments 73 case 'd': 74 duration = strtod(arg, &end); 75 if(*end != '\0') { 76 fprintf(stderr, "Duration must be a valid double, was %s\n", arg); 77 goto usage; 78 } 79 break; 80 case 't': 81 nthreads = strtoul(arg, &end, 10); 82 if(*end != '\0' || nthreads < 1) { 83 fprintf(stderr, "Number of threads must be a positive integer, was %s\n", arg); 84 goto usage; 85 } 86 break; 87 case 'p': 88 nprocs = strtoul(arg, &end, 10); 89 if(*end != '\0' || nprocs < 1) { 90 fprintf(stderr, "Number of processors must be a positive integer, was %s\n", arg); 91 goto usage; 92 } 93 break; 94 // Other cases 95 default: /* ? */ 96 fprintf( stderr, "Unkown option '%c'\n", opt); 97 usage: 98 fprintf( stderr, "Usage: %s [options]\n", argv[0]); 99 fprintf( stderr, "\n" ); 100 fprintf( stderr, " -d, --duration=DURATION Duration of the experiment, in seconds\n" ); 101 fprintf( stderr, " -t, --nthreads=NTHREADS Number of kernel threads\n" ); 102 fprintf( stderr, " -q, --nqueues=NQUEUES Number of queues per threads\n" ); 103 exit(1); 104 } 105 } 106 run: 23 option_t opt[] = { 24 BENCH_OPT 25 }; 26 BENCH_OPT_PARSE("libfibre yield benchmark"); 107 27 108 28 { 109 29 printf("Running %d threads on %d processors for %lf seconds\n", nthreads, nprocs, duration); 110 30 111 FibreInit( );112 barrier = new FredBarrier(nthreads + 1);31 FibreInit(1, nprocs); 32 uint64_t start, end; 113 33 { 114 Context::CurrCluster().addWorkers(nprocs); 115 { 116 counter_t counters[nthreads]; 117 Fibre threads[nthreads]; 118 for(int i = 0; i < nthreads; i++) { 119 threads[i].run(fibre_main, &counters[i]); 120 } 121 printf("Starting\n"); 122 bool is_tty = isatty(STDOUT_FILENO); 123 auto before = Clock::now(); 124 run = true; 34 threads_left = nthreads; 35 Fibre * threads[nthreads]; 36 for(unsigned i = 0; i < nthreads; i++) { 37 threads[i] = new Fibre(); 38 threads[i]->run(fibre_main); 39 } 40 printf("Starting\n"); 41 bool is_tty = isatty(STDOUT_FILENO); 42 start = timeHiRes(); 125 43 126 barrier->wait(); 127 for(;;) { 128 usleep(500'000); 129 auto now = Clock::now(); 130 duration_t durr = now - before; 131 if( durr.count() > duration ) { 132 break; 133 } 134 if(is_tty) { 135 std::cout << "\r" << std::setprecision(4) << durr.count(); 136 std::cout.flush(); 137 } 138 } 44 for(unsigned i = 0; i < nthreads; i++ ) { 45 fibre_unpark( threads[i] ); 46 } 47 wait<Fibre>(start, is_tty); 139 48 140 auto after = Clock::now(); 141 duration_t durr = after - before; 142 duration = durr.count(); 143 run = false; 144 printf("\nDone\n"); 145 for(auto & thread : threads) { 146 thread.join(); 147 } 148 149 // for(const auto & counter : counters) { 150 // std::cout << counter.value << std::endl; 151 // } 49 stop = true; 50 end = timeHiRes(); 51 for(unsigned i = 0; i < nthreads; i++ ) { 52 fibre_join( threads[i], nullptr ); 152 53 } 153 54 } 154 55 155 auto dur_nano = duration_cast<std::nano>(duration);156 auto dur_dms = duration_cast<std::milli>(duration);157 158 printf(" Duration (ms) : %'.2lf\n", dur_dms);159 printf(" Total yields : %'15llu\n", global_counter);160 printf(" Yields per procs : %'15llu\n", global_counter / nprocs);161 printf(" Yields per second : %'18.2lf\n", ((double)global_counter) / duration);162 printf(" Yields/sec/procs : %'18.2lf\n", (((double)global_counter) / nprocs) / duration);163 printf(" ns per yields : %'18.2lf\n", dur_nano / global_counter);164 printf("ns per yields/procs : %'18.2lf\n", dur_nano / (global_counter / nprocs));165 56 printf("Duration (ms) : %'ld\n", to_miliseconds(end - start)); 57 printf("Number of processors : %'d\n", nprocs); 58 printf("Number of threads : %'d\n", nthreads); 59 printf("Total Operations(ops): %'15llu\n", global_counter); 60 printf("Ops per second : %'18.2lf\n", ((double)global_counter) / to_fseconds(end - start)); 61 printf("ns per ops : %'18.2lf\n", ((double)(end - start)) / global_counter); 62 printf("Ops per threads : %'15llu\n", global_counter / nthreads); 63 printf("Ops per procs : %'15llu\n", global_counter / nprocs); 64 printf("Ops/sec/procs : %'18.2lf\n", (((double)global_counter) / nprocs) / to_fseconds(end - start)); 65 printf("ns per ops/procs : %'18.2lf\n", ((double)(end - start)) / (global_counter / nprocs)); 66 fflush(stdout); 166 67 } 167 68 } -
benchmark/readyQ/yield.rs
r29d8c02 r74ec742 90 90 }); 91 91 92 println!("Duration (ms) : {}", (duration.as_millis()).to_formatted_string(&Locale::en));93 println!("Number of processors : {}", (nprocs).to_formatted_string(&Locale::en));94 println!("Number of threads : {}", (nthreads).to_formatted_string(&Locale::en));95 println!("Total yields: {:>15}", (global_counter).to_formatted_string(&Locale::en));96 println!(" Yields per second: {:>15}", (((global_counter as f64) / duration.as_secs() as f64) as u64).to_formatted_string(&Locale::en));97 println!("ns per yields: {:>15}", ((duration.as_nanos() as f64 / global_counter as f64) as u64).to_formatted_string(&Locale::en));98 println!(" Yields per threads: {:>15}", (global_counter / nthreads as u64).to_formatted_string(&Locale::en));99 println!(" Yields per procs: {:>15}", (global_counter / nprocs as u64).to_formatted_string(&Locale::en));100 println!(" Yields/sec/procs: {:>15}", ((((global_counter as f64) / nprocs as f64) / duration.as_secs() as f64) as u64).to_formatted_string(&Locale::en));101 println!("ns per yields/procs: {:>15}", ((duration.as_nanos() as f64 / (global_counter as f64 / nprocs as f64)) as u64).to_formatted_string(&Locale::en));92 println!("Duration (ms) : {}", (duration.as_millis()).to_formatted_string(&Locale::en)); 93 println!("Number of processors : {}", (nprocs).to_formatted_string(&Locale::en)); 94 println!("Number of threads : {}", (nthreads).to_formatted_string(&Locale::en)); 95 println!("Total Operations(ops): {:>15}", (global_counter).to_formatted_string(&Locale::en)); 96 println!("Ops per second : {:>15}", (((global_counter as f64) / duration.as_secs() as f64) as u64).to_formatted_string(&Locale::en)); 97 println!("ns per ops : {:>15}", ((duration.as_nanos() as f64 / global_counter as f64) as u64).to_formatted_string(&Locale::en)); 98 println!("Ops per threads : {:>15}", (global_counter / nthreads as u64).to_formatted_string(&Locale::en)); 99 println!("Ops per procs : {:>15}", (global_counter / nprocs as u64).to_formatted_string(&Locale::en)); 100 println!("Ops/sec/procs : {:>15}", ((((global_counter as f64) / nprocs as f64) / duration.as_secs() as f64) as u64).to_formatted_string(&Locale::en)); 101 println!("ns per ops/procs : {:>15}", ((duration.as_nanos() as f64 / (global_counter as f64 / nprocs as f64)) as u64).to_formatted_string(&Locale::en)); 102 102 } -
benchmark/rmit.py
r29d8c02 r74ec742 46 46 pass 47 47 48 if re.search("^[0-9-,]+$", values): 48 if values.startswith('\\'): 49 return key, values[1:].split(',') 50 elif re.search("^[0-9-,]+$", values): 49 51 values = parse_range(values) 50 52 return key, [v for v in values] … … 63 65 return eval(fmt) 64 66 67 # Evaluate all the options 68 # options can be of the for key = val or key = some_math(other_key) 69 # produce a list of all the options to replace some_math(other_key) with actual value 65 70 def eval_options(opts): 71 # Find all the options with dependencies 66 72 dependents = [d for d in opts.values() if type(d) is DependentOpt] 73 74 # we need to find all the straglers 67 75 processed = [] 68 nopts = [] 76 77 # extract all the necessary inputs 78 input_keys = {} 69 79 for d in dependents: 80 # Mark the dependent as seen 70 81 processed.append(d.key) 71 lists = [] 82 83 # process each of the dependencies 72 84 for dvar in d.vars: 85 # Check that it depends on something that exists 73 86 if not dvar in opts.keys(): 74 87 print('ERROR: extra pattern option {}:{} uses unknown key {}'.format(d.key,d.value,dvar), file=sys.stderr) 75 88 sys.exit(1) 76 89 77 lists.append([(dvar, o) for o in opts[dvar]]) 90 # Check that it's not nested 91 if type(dvar) is DependentOpt: 92 print('ERROR: dependent options cannot be nested {}:{} uses key {}'.format(d.key,d.value,dvar), file=sys.stderr) 93 sys.exit(1) 94 95 # Add the values to the input keys 96 if dvar not in input_keys: 97 input_keys[dvar] = opts[dvar] 98 else : 99 if input_keys[dvar] != opts[dvar]: 100 print('INTERNAL ERROR: repeat input do not match {}:{} vs {}'.format(dvar,opts[dvar],input_keys[dvar]), file=sys.stderr) 101 sys.exit(1) 102 103 # Mark the input as seen 78 104 processed.append(dvar) 79 105 80 kopt = [] 81 for vals in list(itertools.product(*lists)): 82 res = ['-{}'.format(d.key), "{}".format(eval_one(d.value, vals))] 83 for k, v in vals: 84 res.extend(['-{}'.format(k), "{}".format(v)]) 85 kopt.append(res) 86 nopts.append(kopt) 87 88 89 for k, vals in opts.items(): 90 if k not in processed: 91 kopt = [] 92 for v in vals: 93 kopt.append(['-{}'.format(k), "{}".format(v)]) 94 nopts.append(kopt) 95 96 return nopts 106 # add in all the straglers they should cause too many problems 107 for k, v in opts.items(): 108 if type(v) is DependentOpt: 109 continue 110 111 if k in processed: 112 # consistency check 113 if k not in input_keys: 114 print('INTERNAL ERROR: key \'{}\' marked as processed but not in input_keys'.format(k), file=sys.stderr) 115 sys.exit(1) 116 continue 117 118 # consistency check 119 if k in input_keys: 120 print('INTERNAL ERROR: key \'{}\' in input_keys but not marked as processed'.format(k), file=sys.stderr) 121 sys.exit(1) 122 123 # add the straggler 124 input_keys[k] = v 125 126 # flatten the dict into a list of pairs so it's easier to work with 127 input_list = [] 128 for k, v in input_keys.items(): 129 input_list.append([(k, o) for o in v]) 130 131 # evaluate all the dependents 132 # they are not allowed to produce new values so it's a one-to-one mapping from here 133 evaluated = [] 134 for inputs in list(itertools.product(*input_list)): 135 this_eval = list(inputs) 136 for d in dependents: 137 this_eval.append((d.key, eval_one(d.value, inputs))) 138 139 evaluated.append(this_eval) 140 141 # reformat everything to a list of arguments 142 formated = [] 143 for o in evaluated: 144 inner = [] 145 for k,v in o: 146 inner.append("-{}".format(k)) 147 inner.append("{}".format(v)) 148 149 # print(inner) 150 formated.append(inner) 151 152 return formated 97 153 98 154 # returns the first option with key 'opt' … … 122 178 known_hosts = { 123 179 "jax": { 124 range( 1, 2 4) : "48-71",125 range( 25, 4 8) : "48-71,144-167",126 range( 49, 9 6) : "48-95,144-191",127 range( 97, 14 4) : "24-95,120-191",128 range(145, 19 2) : "0-95,96-191",180 range( 1, 25) : "48-71", 181 range( 25, 49) : "48-71,144-167", 182 range( 49, 97) : "48-95,144-191", 183 range( 97, 145) : "24-95,120-191", 184 range(145, 193) : "0-95,96-191", 129 185 }, 130 186 } … … 184 240 185 241 except: 186 print('ERROR: invalid arguments', file=sys.stderr)187 parser.print_help(sys.stderr)188 242 sys.exit(1) 189 243 … … 215 269 # Figure out all the combinations to run 216 270 actions = [] 217 for p in itertools.product(range(options.trials), commands, *opts):271 for p in itertools.product(range(options.trials), commands, opts): 218 272 act = [p[1]] 219 273 for o in p[2:]: … … 281 335 282 336 if options.file != sys.stdout: 283 print("Done ");")337 print("Done ")
Note:
See TracChangeset
for help on using the changeset viewer.