| 1 | #include "rq_bench.hfa"
 | 
|---|
| 2 | 
 | 
|---|
| 3 | struct Partner {
 | 
|---|
| 4 |         unsigned long long count;
 | 
|---|
| 5 |         unsigned long long blocks;
 | 
|---|
| 6 |         bench_sem self;
 | 
|---|
| 7 |         bench_sem * next;
 | 
|---|
| 8 | };
 | 
|---|
| 9 | 
 | 
|---|
| 10 | void ?{}( Partner & this ) {
 | 
|---|
| 11 |         this.count = this.blocks = 0;
 | 
|---|
| 12 | }
 | 
|---|
| 13 | 
 | 
|---|
| 14 | thread BThrd {
 | 
|---|
| 15 |         Partner & partner;
 | 
|---|
| 16 | };
 | 
|---|
| 17 | 
 | 
|---|
| 18 | void ?{}( BThrd & this, Partner * partner ) {
 | 
|---|
| 19 |         ((thread&)this){ bench_cluster };
 | 
|---|
| 20 |         &this.partner = partner;
 | 
|---|
| 21 | }
 | 
|---|
| 22 | 
 | 
|---|
| 23 | void ^?{}( BThrd & mutex this ) {}
 | 
|---|
| 24 | 
 | 
|---|
| 25 | void main( BThrd & thrd ) with(thrd.partner) {
 | 
|---|
| 26 |         count = 0;
 | 
|---|
| 27 |         for() {
 | 
|---|
| 28 |                 blocks += wait( self );
 | 
|---|
| 29 |                 post( *next );
 | 
|---|
| 30 |                 count ++;
 | 
|---|
| 31 |                 if( clock_mode && stop) break;
 | 
|---|
| 32 |                 if(!clock_mode && count >= stop_count) break;
 | 
|---|
| 33 |         }
 | 
|---|
| 34 | 
 | 
|---|
| 35 |         __atomic_fetch_add(&threads_left, -1, __ATOMIC_SEQ_CST);
 | 
|---|
| 36 | }
 | 
|---|
| 37 | 
 | 
|---|
| 38 | int main(int argc, char * argv[]) {
 | 
|---|
| 39 |         unsigned ring_size = 2;
 | 
|---|
| 40 |         cfa_option opt[] = {
 | 
|---|
| 41 |                 BENCH_OPT,
 | 
|---|
| 42 |                 { 'r', "ringsize", "Number of threads in a cycle", ring_size }
 | 
|---|
| 43 |         };
 | 
|---|
| 44 |         BENCH_OPT_PARSE("cforall cycle benchmark");
 | 
|---|
| 45 | 
 | 
|---|
| 46 |         {
 | 
|---|
| 47 |                 unsigned long long global_counter = 0;
 | 
|---|
| 48 |                 unsigned long long global_blocks  = 0;
 | 
|---|
| 49 |                 unsigned tthreads = nthreads * ring_size;
 | 
|---|
| 50 |                 Time start, end;
 | 
|---|
| 51 |                 BenchCluster bc = { nprocs };
 | 
|---|
| 52 |                 {
 | 
|---|
| 53 |                         threads_left = tthreads;
 | 
|---|
| 54 |                         BThrd **  threads = alloc(tthreads);
 | 
|---|
| 55 |                         Partner * thddata = alloc(tthreads);
 | 
|---|
| 56 |                         for(i; tthreads) {
 | 
|---|
| 57 |                                 (thddata[i]){};
 | 
|---|
| 58 |                                 unsigned pi = (i + nthreads) % tthreads;
 | 
|---|
| 59 |                                 thddata[i].next = &thddata[pi].self;
 | 
|---|
| 60 |                         }
 | 
|---|
| 61 |                         for(int i = 0; i < tthreads; i++) {
 | 
|---|
| 62 |                                 threads[i] = malloc();
 | 
|---|
| 63 |                                 (*threads[i]){ &thddata[i] };
 | 
|---|
| 64 |                         }
 | 
|---|
| 65 |                         printf("Starting\n");
 | 
|---|
| 66 | 
 | 
|---|
| 67 |                         bool is_tty = isatty(STDOUT_FILENO);
 | 
|---|
| 68 |                         start = timeHiRes();
 | 
|---|
| 69 | 
 | 
|---|
| 70 |                         for(i; nthreads) {
 | 
|---|
| 71 |                                 post( thddata[i].self );
 | 
|---|
| 72 |                         }
 | 
|---|
| 73 |                         wait(start, is_tty);
 | 
|---|
| 74 | 
 | 
|---|
| 75 |                         stop = true;
 | 
|---|
| 76 |                         end = timeHiRes();
 | 
|---|
| 77 |                         printf("\nDone\n");
 | 
|---|
| 78 | 
 | 
|---|
| 79 |                         for(i; tthreads) {
 | 
|---|
| 80 |                                 post( thddata[i].self );
 | 
|---|
| 81 |                                 Partner & partner = join( *threads[i] ).partner;
 | 
|---|
| 82 |                                 global_counter += partner.count;
 | 
|---|
| 83 |                                 global_blocks  += partner.blocks;
 | 
|---|
| 84 |                                 delete(threads[i]);
 | 
|---|
| 85 |                         }
 | 
|---|
| 86 |                         free(threads);
 | 
|---|
| 87 |                         free(thddata);
 | 
|---|
| 88 |                 }
 | 
|---|
| 89 | 
 | 
|---|
| 90 |                 printf("Duration (ms)        : %'lf\n", (end - start)`dms);
 | 
|---|
| 91 |                 printf("Number of processors : %'d\n", nprocs);
 | 
|---|
| 92 |                 printf("Number of threads    : %'d\n", tthreads);
 | 
|---|
| 93 |                 printf("Cycle size (# thrds) : %'d\n", ring_size);
 | 
|---|
| 94 |                 printf("Total Operations(ops): %'15llu\n", global_counter);
 | 
|---|
| 95 |                 printf("Total blocks         : %'15llu\n", global_blocks);
 | 
|---|
| 96 |                 printf("Ops per second       : %'18.2lf\n", ((double)global_counter) / (end - start)`ds);
 | 
|---|
| 97 |                 printf("ns per ops           : %'18.2lf\n", (end - start)`dns / global_counter);
 | 
|---|
| 98 |                 printf("Ops per threads      : %'15llu\n", global_counter / tthreads);
 | 
|---|
| 99 |                 printf("Ops per procs        : %'15llu\n", global_counter / nprocs);
 | 
|---|
| 100 |                 printf("Ops/sec/procs        : %'18.2lf\n", (((double)global_counter) / nprocs) / (end - start)`ds);
 | 
|---|
| 101 |                 printf("ns per ops/procs     : %'18.2lf\n", (end - start)`dns / (global_counter / nprocs));
 | 
|---|
| 102 |                 fflush(stdout);
 | 
|---|
| 103 |         }
 | 
|---|
| 104 | 
 | 
|---|
| 105 |         return 0;
 | 
|---|
| 106 | }
 | 
|---|