| 1 | #include <cassert> | 
|---|
| 2 | #include <cstdlib> | 
|---|
| 3 | #include <cstdio> | 
|---|
| 4 | #include <cstring> | 
|---|
| 5 | #include <climits> | 
|---|
| 6 |  | 
|---|
| 7 | extern "C" { | 
|---|
| 8 | #include <locale.h> | 
|---|
| 9 | #include <getopt.h> | 
|---|
| 10 | } | 
|---|
| 11 |  | 
|---|
| 12 | #include <unistd.h> | 
|---|
| 13 |  | 
|---|
| 14 | #include <chrono> | 
|---|
| 15 |  | 
|---|
| 16 | using Clock = std::chrono::high_resolution_clock; | 
|---|
| 17 | using duration_t = std::chrono::duration<double>; | 
|---|
| 18 | using std::chrono::nanoseconds; | 
|---|
| 19 |  | 
|---|
| 20 |  | 
|---|
| 21 | template<typename Ratio, typename T> | 
|---|
| 22 | T duration_cast(T seconds) { | 
|---|
| 23 | return std::chrono::duration_cast<std::chrono::duration<T, Ratio>>(std::chrono::duration<T>(seconds)).count(); | 
|---|
| 24 | } | 
|---|
| 25 |  | 
|---|
| 26 | volatile bool run = false; | 
|---|
| 27 | volatile unsigned long long global_counter; | 
|---|
| 28 |  | 
|---|
| 29 | #include "libfibre/fibre.h" | 
|---|
| 30 |  | 
|---|
| 31 | FredBarrier * barrier; | 
|---|
| 32 | struct __attribute__((aligned(128))) counter_t { | 
|---|
| 33 | int value = 0; | 
|---|
| 34 | }; | 
|---|
| 35 |  | 
|---|
| 36 | void fibre_main( counter_t * counter ) { | 
|---|
| 37 | barrier->wait(); | 
|---|
| 38 | // /* paranoid */ assert( true == __atomic_load_n(&run, __ATOMIC_RELAXED) ); | 
|---|
| 39 |  | 
|---|
| 40 | while(__atomic_load_n(&run, __ATOMIC_RELAXED)) { | 
|---|
| 41 | Fibre::forceYield(); | 
|---|
| 42 | // fibre_yield(); | 
|---|
| 43 | counter->value++; | 
|---|
| 44 | } | 
|---|
| 45 | __atomic_fetch_add(&global_counter, counter->value, __ATOMIC_SEQ_CST); | 
|---|
| 46 | } | 
|---|
| 47 |  | 
|---|
| 48 | int main(int argc, char * argv[]) { | 
|---|
| 49 | double duration = 5; | 
|---|
| 50 | int nprocs = 1; | 
|---|
| 51 | int nthreads = 1; | 
|---|
| 52 |  | 
|---|
| 53 | std::cout.imbue(std::locale("")); | 
|---|
| 54 | setlocale(LC_ALL, ""); | 
|---|
| 55 |  | 
|---|
| 56 | for(;;) { | 
|---|
| 57 | static struct option options[] = { | 
|---|
| 58 | {"duration",  required_argument, 0, 'd'}, | 
|---|
| 59 | {"nprocs",    required_argument, 0, 'p'}, | 
|---|
| 60 | {"nthreads",  required_argument, 0, 't'}, | 
|---|
| 61 | {0, 0, 0, 0} | 
|---|
| 62 | }; | 
|---|
| 63 |  | 
|---|
| 64 | int idx = 0; | 
|---|
| 65 | int opt = getopt_long(argc, argv, "d:p:t:", options, &idx); | 
|---|
| 66 |  | 
|---|
| 67 | const char * arg = optarg ? optarg : ""; | 
|---|
| 68 | char * end; | 
|---|
| 69 | switch(opt) { | 
|---|
| 70 | case -1: | 
|---|
| 71 | goto run; | 
|---|
| 72 | // Numeric Arguments | 
|---|
| 73 | case 'd': | 
|---|
| 74 | duration = strtod(arg, &end); | 
|---|
| 75 | if(*end != '\0') { | 
|---|
| 76 | fprintf(stderr, "Duration must be a valid double, was %s\n", arg); | 
|---|
| 77 | goto usage; | 
|---|
| 78 | } | 
|---|
| 79 | break; | 
|---|
| 80 | case 't': | 
|---|
| 81 | nthreads = strtoul(arg, &end, 10); | 
|---|
| 82 | if(*end != '\0' || nthreads < 1) { | 
|---|
| 83 | fprintf(stderr, "Number of threads must be a positive integer, was %s\n", arg); | 
|---|
| 84 | goto usage; | 
|---|
| 85 | } | 
|---|
| 86 | break; | 
|---|
| 87 | case 'p': | 
|---|
| 88 | nprocs = strtoul(arg, &end, 10); | 
|---|
| 89 | if(*end != '\0' || nprocs < 1) { | 
|---|
| 90 | fprintf(stderr, "Number of processors must be a positive integer, was %s\n", arg); | 
|---|
| 91 | goto usage; | 
|---|
| 92 | } | 
|---|
| 93 | break; | 
|---|
| 94 | // Other cases | 
|---|
| 95 | default: /* ? */ | 
|---|
| 96 | fprintf( stderr, "Unkown option '%c'\n", opt); | 
|---|
| 97 | usage: | 
|---|
| 98 | fprintf( stderr, "Usage: %s [options]\n", argv[0]); | 
|---|
| 99 | fprintf( stderr, "\n" ); | 
|---|
| 100 | fprintf( stderr, "  -d, --duration=DURATION  Duration of the experiment, in seconds\n" ); | 
|---|
| 101 | fprintf( stderr, "  -t, --nthreads=NTHREADS  Number of kernel threads\n" ); | 
|---|
| 102 | fprintf( stderr, "  -q, --nqueues=NQUEUES    Number of queues per threads\n" ); | 
|---|
| 103 | exit(1); | 
|---|
| 104 | } | 
|---|
| 105 | } | 
|---|
| 106 | run: | 
|---|
| 107 |  | 
|---|
| 108 | { | 
|---|
| 109 | printf("Running %d threads on %d processors for %lf seconds\n", nthreads, nprocs, duration); | 
|---|
| 110 |  | 
|---|
| 111 | FibreInit(); | 
|---|
| 112 | barrier = new FredBarrier(nthreads + 1); | 
|---|
| 113 | { | 
|---|
| 114 | Context::CurrCluster().addWorkers(nprocs); | 
|---|
| 115 | { | 
|---|
| 116 | counter_t counters[nthreads]; | 
|---|
| 117 | Fibre threads[nthreads]; | 
|---|
| 118 | for(int i = 0; i < nthreads; i++) { | 
|---|
| 119 | threads[i].run(fibre_main, &counters[i]); | 
|---|
| 120 | } | 
|---|
| 121 | printf("Starting\n"); | 
|---|
| 122 | bool is_tty = isatty(STDOUT_FILENO); | 
|---|
| 123 | auto before = Clock::now(); | 
|---|
| 124 | run = true; | 
|---|
| 125 |  | 
|---|
| 126 | barrier->wait(); | 
|---|
| 127 | for(;;) { | 
|---|
| 128 | usleep(500'000); | 
|---|
| 129 | auto now = Clock::now(); | 
|---|
| 130 | duration_t durr = now - before; | 
|---|
| 131 | if( durr.count() > duration ) { | 
|---|
| 132 | break; | 
|---|
| 133 | } | 
|---|
| 134 | if(is_tty) { | 
|---|
| 135 | std::cout << "\r" << std::setprecision(4) << durr.count(); | 
|---|
| 136 | std::cout.flush(); | 
|---|
| 137 | } | 
|---|
| 138 | } | 
|---|
| 139 |  | 
|---|
| 140 | auto after = Clock::now(); | 
|---|
| 141 | duration_t durr = after - before; | 
|---|
| 142 | duration = durr.count(); | 
|---|
| 143 | run = false; | 
|---|
| 144 | printf("\nDone\n"); | 
|---|
| 145 | for(auto & thread : threads) { | 
|---|
| 146 | thread.join(); | 
|---|
| 147 | } | 
|---|
| 148 |  | 
|---|
| 149 | // for(const auto & counter : counters) { | 
|---|
| 150 | //      std::cout << counter.value << std::endl; | 
|---|
| 151 | // } | 
|---|
| 152 | } | 
|---|
| 153 | } | 
|---|
| 154 |  | 
|---|
| 155 | auto dur_nano = duration_cast<std::nano>(duration); | 
|---|
| 156 | auto dur_dms  = duration_cast<std::milli>(duration); | 
|---|
| 157 |  | 
|---|
| 158 | printf("Duration (ms)       : %'.2lf\n", dur_dms ); | 
|---|
| 159 | printf("Total yields        : %'15llu\n", global_counter ); | 
|---|
| 160 | printf("Yields per procs    : %'15llu\n", global_counter / nprocs ); | 
|---|
| 161 | printf("Yields per second   : %'18.2lf\n", ((double)global_counter) / duration ); | 
|---|
| 162 | printf("Yields/sec/procs    : %'18.2lf\n", (((double)global_counter) / nprocs) / duration ); | 
|---|
| 163 | printf("ns per yields       : %'18.2lf\n", dur_nano / global_counter ); | 
|---|
| 164 | printf("ns per yields/procs : %'18.2lf\n", dur_nano / (global_counter / nprocs) ); | 
|---|
| 165 |  | 
|---|
| 166 | } | 
|---|
| 167 | } | 
|---|