| 1 | #include "rq_bench.hpp"
 | 
|---|
| 2 | 
 | 
|---|
| 3 | #include <pthread.h>
 | 
|---|
| 4 | #include <semaphore.h>
 | 
|---|
| 5 | #include <sched.h>
 | 
|---|
| 6 | #include <unistd.h>
 | 
|---|
| 7 | 
 | 
|---|
| 8 | struct Pthread {
 | 
|---|
| 9 |         static int usleep(useconds_t usec) {
 | 
|---|
| 10 |                 return ::usleep(usec);
 | 
|---|
| 11 |         }
 | 
|---|
| 12 | };
 | 
|---|
| 13 | 
 | 
|---|
| 14 | struct Partner {
 | 
|---|
| 15 |         unsigned long long count  = 0;
 | 
|---|
| 16 |         unsigned long long blocks = 0;
 | 
|---|
| 17 |         sem_t self;
 | 
|---|
| 18 |         sem_t * next;
 | 
|---|
| 19 | };
 | 
|---|
| 20 | 
 | 
|---|
| 21 | void partner_main( Partner * self ) {
 | 
|---|
| 22 |         self->count = 0;
 | 
|---|
| 23 |         for(;;) {
 | 
|---|
| 24 |                 sem_wait(&self->self);
 | 
|---|
| 25 |                 sem_post(self->next);
 | 
|---|
| 26 |                 self->count ++;
 | 
|---|
| 27 |                 if( clock_mode && stop) break;
 | 
|---|
| 28 |                 if(!clock_mode && self->count >= stop_count) break;
 | 
|---|
| 29 | 
 | 
|---|
| 30 |                 int sval;
 | 
|---|
| 31 |                 sem_getvalue(&self->self, &sval);
 | 
|---|
| 32 |                 if(sval > 1) std::abort();
 | 
|---|
| 33 |                 if(sval < 0) std::abort();
 | 
|---|
| 34 |         }
 | 
|---|
| 35 | 
 | 
|---|
| 36 |         __atomic_fetch_add(&threads_left, -1, __ATOMIC_SEQ_CST);
 | 
|---|
| 37 | }
 | 
|---|
| 38 | 
 | 
|---|
| 39 | int main(int argc, char * argv[]) {
 | 
|---|
| 40 |         unsigned ring_size = 2;
 | 
|---|
| 41 |         option_t opt[] = {
 | 
|---|
| 42 |                 BENCH_OPT,
 | 
|---|
| 43 |                 { 'r', "ringsize", "Number of threads in a cycle", ring_size }
 | 
|---|
| 44 |         };
 | 
|---|
| 45 |         BENCH_OPT_PARSE("cforall cycle benchmark");
 | 
|---|
| 46 | 
 | 
|---|
| 47 |         {
 | 
|---|
| 48 |                 unsigned long long global_counter = 0;
 | 
|---|
| 49 |                 unsigned long long global_blocks  = 0;
 | 
|---|
| 50 |                 unsigned tthreads = nthreads * ring_size;
 | 
|---|
| 51 |                 uint64_t start, end;
 | 
|---|
| 52 | 
 | 
|---|
| 53 |                 {
 | 
|---|
| 54 |                         cpu_set_t cpuset;
 | 
|---|
| 55 |                         int ret = pthread_getaffinity_np( pthread_self(), sizeof(cpuset), &cpuset );
 | 
|---|
| 56 |                         if(ret != 0) std::abort();
 | 
|---|
| 57 | 
 | 
|---|
| 58 |                         unsigned cnt = CPU_COUNT_S(sizeof(cpuset), &cpuset);
 | 
|---|
| 59 |                         if(cnt > nprocs) {
 | 
|---|
| 60 |                                 unsigned extras = cnt - nprocs;
 | 
|---|
| 61 |                                 for(int i = 0; i < CPU_SETSIZE && extras > 0; i++) {
 | 
|---|
| 62 |                                         if(CPU_ISSET_S(i, sizeof(cpuset), &cpuset)) {
 | 
|---|
| 63 |                                                 CPU_CLR_S(i, sizeof(cpuset), &cpuset);
 | 
|---|
| 64 |                                                 extras--;
 | 
|---|
| 65 |                                         }
 | 
|---|
| 66 |                                 }
 | 
|---|
| 67 | 
 | 
|---|
| 68 |                                 ret = pthread_setaffinity_np( pthread_self(), sizeof(cpuset), &cpuset );
 | 
|---|
| 69 |                                 if(ret != 0) std::abort();
 | 
|---|
| 70 |                         }
 | 
|---|
| 71 |                 }
 | 
|---|
| 72 | 
 | 
|---|
| 73 |                 {
 | 
|---|
| 74 |                         threads_left = tthreads;
 | 
|---|
| 75 |                         pthread_t threads[tthreads];
 | 
|---|
| 76 |                         Partner thddata[tthreads];
 | 
|---|
| 77 |                         for(int i = 0; i < tthreads; i++) {
 | 
|---|
| 78 |                                 int ret = sem_init( &thddata[i].self, false, 0 );
 | 
|---|
| 79 |                                 if(ret != 0) std::abort();
 | 
|---|
| 80 | 
 | 
|---|
| 81 |                                 unsigned pi = (i + nthreads) % tthreads;
 | 
|---|
| 82 |                                 thddata[i].next = &thddata[pi].self;
 | 
|---|
| 83 |                         }
 | 
|---|
| 84 |                         for(int i = 0; i < tthreads; i++) {
 | 
|---|
| 85 |                                 int ret = pthread_create( &threads[i], nullptr, reinterpret_cast<void * (*)(void *)>(partner_main), &thddata[i] );
 | 
|---|
| 86 |                                 if(ret != 0) std::abort();
 | 
|---|
| 87 |                         }
 | 
|---|
| 88 |                         printf("Starting\n");
 | 
|---|
| 89 | 
 | 
|---|
| 90 |                         bool is_tty = isatty(STDOUT_FILENO);
 | 
|---|
| 91 |                         start = timeHiRes();
 | 
|---|
| 92 | 
 | 
|---|
| 93 |                         for(int i = 0; i < nthreads; i++) {
 | 
|---|
| 94 |                                 sem_post(&thddata[i].self);
 | 
|---|
| 95 |                         }
 | 
|---|
| 96 |                         wait<Pthread>(start, is_tty);
 | 
|---|
| 97 | 
 | 
|---|
| 98 |                         stop = true;
 | 
|---|
| 99 |                         end = timeHiRes();
 | 
|---|
| 100 |                         printf("\nDone\n");
 | 
|---|
| 101 | 
 | 
|---|
| 102 |                         for(int i = 0; i < tthreads; i++) {
 | 
|---|
| 103 |                                 sem_post(&thddata[i].self);
 | 
|---|
| 104 |                                 int ret = pthread_join( threads[i], nullptr );
 | 
|---|
| 105 |                                 if(ret != 0) std::abort();
 | 
|---|
| 106 |                                 global_counter += thddata[i].count;
 | 
|---|
| 107 |                                 global_blocks  += thddata[i].blocks;
 | 
|---|
| 108 |                         }
 | 
|---|
| 109 | 
 | 
|---|
| 110 |                         for(int i = 0; i < tthreads; i++) {
 | 
|---|
| 111 |                                 int ret = sem_destroy( &thddata[i].self );
 | 
|---|
| 112 |                                 if(ret != 0) std::abort();
 | 
|---|
| 113 |                         }
 | 
|---|
| 114 |                 }
 | 
|---|
| 115 | 
 | 
|---|
| 116 |                 printf("Duration (ms)        : %'ld\n", to_miliseconds(end - start));
 | 
|---|
| 117 |                 printf("Number of processors : %'d\n", nprocs);
 | 
|---|
| 118 |                 printf("Number of threads    : %'d\n", tthreads);
 | 
|---|
| 119 |                 printf("Cycle size (# thrds) : %'d\n", ring_size);
 | 
|---|
| 120 |                 printf("Total Operations(ops): %'15llu\n", global_counter);
 | 
|---|
| 121 |                 printf("Total blocks         : %'15llu\n", global_blocks);
 | 
|---|
| 122 |                 printf("Ops per second       : %'18.2lf\n", ((double)global_counter) / to_fseconds(end - start));
 | 
|---|
| 123 |                 printf("ns per ops           : %'18.2lf\n", ((double)(end - start)) / global_counter);
 | 
|---|
| 124 |                 printf("Ops per threads      : %'15llu\n", global_counter / tthreads);
 | 
|---|
| 125 |                 printf("Ops per procs        : %'15llu\n", global_counter / nprocs);
 | 
|---|
| 126 |                 printf("Ops/sec/procs        : %'18.2lf\n", (((double)global_counter) / nprocs) / to_fseconds(end - start));
 | 
|---|
| 127 |                 printf("ns per ops/procs     : %'18.2lf\n", ((double)(end - start)) / (global_counter / nprocs));
 | 
|---|
| 128 |                 fflush(stdout);
 | 
|---|
| 129 |         }
 | 
|---|
| 130 | 
 | 
|---|
| 131 |         return 0;
 | 
|---|
| 132 | }
 | 
|---|