#include "rq_bench.hfa" extern bool traceHeapOn(); volatile bool run = false; volatile unsigned long long global_counter; thread __attribute__((aligned(128))) Yielder { unsigned long long counter; }; void ?{}( Yielder & this ) { this.counter = 0; ((thread&)this){ "Yielder Thread" }; } void main( Yielder & this ) { park(); /* paranoid */ assert( true == __atomic_load_n(&run, __ATOMIC_RELAXED) ); while(__atomic_load_n(&run, __ATOMIC_RELAXED)) { yield(); this.counter++; } __atomic_fetch_add(&global_counter, this.counter, __ATOMIC_SEQ_CST); } int main(int argc, char * argv[]) { cfa_option opt[] = { BENCH_OPT }; BENCH_OPT_PARSE("cforall yield benchmark"); { printf("Running %d threads on %d processors for %f seconds\n", nthreads, nprocs, duration); Time start, end; BenchCluster bc = { nprocs }; { Yielder threads[nthreads]; printf("Starting\n"); bool is_tty = isatty(STDOUT_FILENO); start = timeHiRes(); run = true; for(i; nthreads) { unpark( threads[i] ); } wait(start, is_tty); run = false; end = timeHiRes(); printf("\nDone\n"); } printf("Duration (ms) : %'ld\n", (end - start)`dms); printf("Number of processors : %'d\n", nprocs); printf("Number of threads : %'d\n", nthreads); printf("Total Operations(ops): %'15llu\n", global_counter); printf("Ops per second : %'18.2lf\n", ((double)global_counter) / (end - start)`s); printf("ns per ops : %'18.2lf\n", (end - start)`dns / global_counter); printf("Ops per threads : %'15llu\n", global_counter / nthreads); printf("Ops per procs : %'15llu\n", global_counter / nprocs); printf("Ops/sec/procs : %'18.2lf\n", (((double)global_counter) / nprocs) / (end - start)`s); printf("ns per ops/procs : %'18.2lf\n", (end - start)`dns / (global_counter / nprocs)); fflush(stdout); } }