| [90ecade] | 1 | package main
 | 
|---|
 | 2 | 
 | 
|---|
 | 3 | import (
 | 
|---|
| [2dd0689] | 4 |         "context"
 | 
|---|
| [90ecade] | 5 |         "flag"
 | 
|---|
 | 6 |         "fmt"
 | 
|---|
 | 7 |         "math/rand"
 | 
|---|
 | 8 |         "os"
 | 
|---|
| [aa1d13c] | 9 |         "syscall"
 | 
|---|
| [90ecade] | 10 |         "sync/atomic"
 | 
|---|
 | 11 |         "time"
 | 
|---|
| [2dd0689] | 12 |         "unsafe"
 | 
|---|
 | 13 |         "golang.org/x/sync/semaphore"
 | 
|---|
| [90ecade] | 14 |         "golang.org/x/text/language"
 | 
|---|
 | 15 |         "golang.org/x/text/message"
 | 
|---|
 | 16 | )
 | 
|---|
 | 17 | 
 | 
|---|
| [aa1d13c] | 18 | // ==================================================
 | 
|---|
 | 19 | type MyData struct {
 | 
|---|
| [3d19ae6] | 20 |         _p1 [16]uint64 // padding
 | 
|---|
| [aa1d13c] | 21 |         ttid int
 | 
|---|
 | 22 |         id int
 | 
|---|
 | 23 |         data [] uint64
 | 
|---|
| [3d19ae6] | 24 |         _p2 [16]uint64 // padding
 | 
|---|
| [aa1d13c] | 25 | }
 | 
|---|
 | 26 | 
 | 
|---|
 | 27 | func NewData(id int, size uint64) (*MyData) {
 | 
|---|
 | 28 |         var data [] uint64
 | 
|---|
 | 29 |         data = make([]uint64, size)
 | 
|---|
 | 30 |         for i := uint64(0); i < size; i++ {
 | 
|---|
 | 31 |                 data[i] = 0
 | 
|---|
 | 32 |         }
 | 
|---|
| [3d19ae6] | 33 |         return &MyData{[16]uint64{0}, syscall.Gettid(), id, data,[16]uint64{0}}
 | 
|---|
| [aa1d13c] | 34 | }
 | 
|---|
 | 35 | 
 | 
|---|
 | 36 | func (this * MyData) moved( ttid int ) (uint64) {
 | 
|---|
 | 37 |         if this.ttid == ttid {
 | 
|---|
 | 38 |                 return 0
 | 
|---|
 | 39 |         }
 | 
|---|
 | 40 |         this.ttid = ttid
 | 
|---|
 | 41 |         return 1
 | 
|---|
 | 42 | }
 | 
|---|
 | 43 | 
 | 
|---|
 | 44 | func (this * MyData) access( idx uint64 ) {
 | 
|---|
 | 45 |         this.data[idx % uint64(len(this.data))] += 1
 | 
|---|
 | 46 | }
 | 
|---|
 | 47 | 
 | 
|---|
 | 48 | // ==================================================
 | 
|---|
 | 49 | type MyCtx struct {
 | 
|---|
| [3d19ae6] | 50 |         _p1 [16]uint64 // padding
 | 
|---|
| [2dd0689] | 51 |         s * semaphore.Weighted
 | 
|---|
| [34687d3] | 52 |         d unsafe.Pointer
 | 
|---|
| [024fa4b] | 53 |         c context.Context
 | 
|---|
| [aa1d13c] | 54 |         ttid int
 | 
|---|
| [34687d3] | 55 |         id int
 | 
|---|
| [3d19ae6] | 56 |         _p2 [16]uint64 // padding
 | 
|---|
| [2dd0689] | 57 | }
 | 
|---|
 | 58 | 
 | 
|---|
| [c5a98f3] | 59 | func NewCtx( data * MyData, id int ) (MyCtx) {
 | 
|---|
| [3d19ae6] | 60 |         r := MyCtx{[16]uint64{0},semaphore.NewWeighted(1), unsafe.Pointer(data), context.Background(), syscall.Gettid(), id,[16]uint64{0}}
 | 
|---|
| [c5a98f3] | 61 |         r.s.Acquire(context.Background(), 1)
 | 
|---|
 | 62 |         return r
 | 
|---|
| [aa1d13c] | 63 | }
 | 
|---|
 | 64 | 
 | 
|---|
 | 65 | func (this * MyCtx) moved( ttid int ) (uint64) {
 | 
|---|
 | 66 |         if this.ttid == ttid {
 | 
|---|
 | 67 |                 return 0
 | 
|---|
 | 68 |         }
 | 
|---|
 | 69 |         this.ttid = ttid
 | 
|---|
 | 70 |         return 1
 | 
|---|
 | 71 | }
 | 
|---|
 | 72 | 
 | 
|---|
 | 73 | // ==================================================
 | 
|---|
| [c5a98f3] | 74 | // Atomic object where a single thread can wait
 | 
|---|
 | 75 | // May exchanges data
 | 
|---|
| [2dd0689] | 76 | type Spot struct {
 | 
|---|
| [3f8baf4] | 77 |         _p1 [16]uint64 // padding
 | 
|---|
| [c5a98f3] | 78 |         ptr uintptr // atomic variable use fo MES
 | 
|---|
 | 79 |         id int      // id for debugging
 | 
|---|
| [3f8baf4] | 80 |         _p2 [16]uint64 // padding
 | 
|---|
| [2dd0689] | 81 | }
 | 
|---|
 | 82 | 
 | 
|---|
| [024fa4b] | 83 | // Main handshake of the code
 | 
|---|
 | 84 | // Single seat, first thread arriving waits
 | 
|---|
 | 85 | // Next threads unblocks current one and blocks in its place
 | 
|---|
 | 86 | // if share == true, exchange data in the process
 | 
|---|
| [aa1d13c] | 87 | func (this * Spot) put( ctx * MyCtx, data * MyData, share bool) (* MyData, bool) {
 | 
|---|
| [024fa4b] | 88 |         new := uintptr(unsafe.Pointer(ctx))
 | 
|---|
| [8235415] | 89 |         // old_d := ctx.d
 | 
|---|
| [c5a98f3] | 90 | 
 | 
|---|
 | 91 |         // Attempt to CAS our context into the seat
 | 
|---|
| [2dd0689] | 92 |         var raw uintptr
 | 
|---|
 | 93 |         for true {
 | 
|---|
 | 94 |                 raw = this.ptr
 | 
|---|
| [c5a98f3] | 95 |                 if raw == uintptr(1) { // Seat is closed, return
 | 
|---|
| [34687d3] | 96 |                         return nil, true
 | 
|---|
| [2dd0689] | 97 |                 }
 | 
|---|
| [024fa4b] | 98 |                 if atomic.CompareAndSwapUintptr(&this.ptr, raw, new) {
 | 
|---|
| [c5a98f3] | 99 |                         break // We got the seat
 | 
|---|
| [2dd0689] | 100 |                 }
 | 
|---|
| [90ecade] | 101 |         }
 | 
|---|
 | 102 | 
 | 
|---|
| [c5a98f3] | 103 |         // If we aren't the fist in, wake someone
 | 
|---|
| [2dd0689] | 104 |         if raw != uintptr(0) {
 | 
|---|
| [aa1d13c] | 105 |                 var val *MyCtx
 | 
|---|
 | 106 |                 val = (*MyCtx)(unsafe.Pointer(raw))
 | 
|---|
| [c5a98f3] | 107 | 
 | 
|---|
 | 108 |                 // If we are sharing, give them our data
 | 
|---|
| [90ecade] | 109 |                 if share {
 | 
|---|
| [34687d3] | 110 |                         // fmt.Printf("[%d] - %d update %d: %p -> %p\n", this.id, ctx.id, val.id, val.d, data)
 | 
|---|
 | 111 |                         atomic.StorePointer(&val.d, unsafe.Pointer(data))
 | 
|---|
| [90ecade] | 112 |                 }
 | 
|---|
| [2dd0689] | 113 | 
 | 
|---|
| [c5a98f3] | 114 |                 // Wake them up
 | 
|---|
| [34687d3] | 115 |                 // fmt.Printf("[%d] - %d release %d\n", this.id, ctx.id, val.id)
 | 
|---|
| [2dd0689] | 116 |                 val.s.Release(1)
 | 
|---|
 | 117 |         }
 | 
|---|
 | 118 | 
 | 
|---|
| [34687d3] | 119 |         // fmt.Printf("[%d] - %d enter\n", this.id, ctx.id)
 | 
|---|
| [c5a98f3] | 120 | 
 | 
|---|
 | 121 |         // Block once on the seat
 | 
|---|
| [024fa4b] | 122 |         ctx.s.Acquire(ctx.c, 1)
 | 
|---|
| [c5a98f3] | 123 | 
 | 
|---|
 | 124 |         // Someone woke us up, get the new data
 | 
|---|
| [aa1d13c] | 125 |         ret := (* MyData)(atomic.LoadPointer(&ctx.d))
 | 
|---|
| [34687d3] | 126 |         // fmt.Printf("[%d] - %d leave: %p -> %p\n", this.id, ctx.id, ret, old_d)
 | 
|---|
 | 127 | 
 | 
|---|
 | 128 |         return ret, false
 | 
|---|
| [2dd0689] | 129 | }
 | 
|---|
 | 130 | 
 | 
|---|
| [c5a98f3] | 131 | // Shutdown the spot
 | 
|---|
 | 132 | // Wake current thread and mark seat as closed
 | 
|---|
| [2dd0689] | 133 | func (this * Spot) release() {
 | 
|---|
| [aa1d13c] | 134 |         val := (*MyCtx)(unsafe.Pointer(atomic.SwapUintptr(&this.ptr, uintptr(1))))
 | 
|---|
| [2dd0689] | 135 |         if val == nil {
 | 
|---|
 | 136 |                 return
 | 
|---|
| [90ecade] | 137 |         }
 | 
|---|
| [2dd0689] | 138 | 
 | 
|---|
| [c5a98f3] | 139 |         // Someone was there, release them
 | 
|---|
| [2dd0689] | 140 |         val.s.Release(1)
 | 
|---|
| [90ecade] | 141 | }
 | 
|---|
 | 142 | 
 | 
|---|
| [aa1d13c] | 143 | // ==================================================
 | 
|---|
| [c5a98f3] | 144 | // Struct for result, Go doesn't support passing tuple in channels
 | 
|---|
| [aa1d13c] | 145 | type Result struct {
 | 
|---|
 | 146 |         count uint64
 | 
|---|
 | 147 |         gmigs uint64
 | 
|---|
 | 148 |         dmigs uint64
 | 
|---|
 | 149 | }
 | 
|---|
 | 150 | 
 | 
|---|
 | 151 | func NewResult() (Result) {
 | 
|---|
 | 152 |         return Result{0, 0, 0}
 | 
|---|
 | 153 | }
 | 
|---|
 | 154 | 
 | 
|---|
 | 155 | // ==================================================
 | 
|---|
| [c5a98f3] | 156 | // Random number generator, Go's native one is to slow and global
 | 
|---|
| [fd84538] | 157 | func __xorshift64( state * uint64 ) (uint64) {
 | 
|---|
 | 158 |         x := *state
 | 
|---|
 | 159 |         x ^= x << 13
 | 
|---|
 | 160 |         x ^= x >> 7
 | 
|---|
 | 161 |         x ^= x << 17
 | 
|---|
 | 162 |         *state = x
 | 
|---|
 | 163 |         return x
 | 
|---|
 | 164 | }
 | 
|---|
| [2dd0689] | 165 | 
 | 
|---|
| [c5a98f3] | 166 | // ==================================================
 | 
|---|
 | 167 | // Do some work by accessing 'cnt' cells in the array
 | 
|---|
| [aa1d13c] | 168 | func work(data * MyData, cnt uint64, state * uint64) {
 | 
|---|
| [94d93510] | 169 |         for i := uint64(0); i < cnt; i++ {
 | 
|---|
| [aa1d13c] | 170 |                 data.access(__xorshift64(state))
 | 
|---|
| [94d93510] | 171 |         }
 | 
|---|
 | 172 | }
 | 
|---|
 | 173 | 
 | 
|---|
| [c5a98f3] | 174 | // Main body of the threads
 | 
|---|
| [aa1d13c] | 175 | func local(result chan Result, start chan struct{}, size uint64, cnt uint64, channels [] Spot, share bool, id int) {
 | 
|---|
| [c5a98f3] | 176 |         // Initialize some data
 | 
|---|
 | 177 |         state := rand.Uint64()    // RNG state
 | 
|---|
 | 178 |         data := NewData(id, size) // Starting piece of data
 | 
|---|
 | 179 |         ctx := NewCtx(data, id)   // Goroutine local context
 | 
|---|
| [2dd0689] | 180 | 
 | 
|---|
| [c5a98f3] | 181 |         // Prepare results
 | 
|---|
| [aa1d13c] | 182 |         r := NewResult()
 | 
|---|
| [c5a98f3] | 183 | 
 | 
|---|
 | 184 |         // Wait for start
 | 
|---|
| [90ecade] | 185 |         <- start
 | 
|---|
| [c5a98f3] | 186 | 
 | 
|---|
 | 187 |         // Main loop
 | 
|---|
| [90ecade] | 188 |         for true {
 | 
|---|
| [c5a98f3] | 189 |                 // Touch our current data, write to invalidate remote cache lines
 | 
|---|
| [aa1d13c] | 190 |                 work(data, cnt, &state)
 | 
|---|
| [90ecade] | 191 | 
 | 
|---|
| [c5a98f3] | 192 |                 // Wait on a random spot
 | 
|---|
| [fd84538] | 193 |                 i := __xorshift64(&state) % uint64(len(channels))
 | 
|---|
| [34687d3] | 194 |                 var closed bool
 | 
|---|
 | 195 |                 data, closed = channels[i].put(&ctx, data, share)
 | 
|---|
| [90ecade] | 196 | 
 | 
|---|
| [c5a98f3] | 197 |                 // Check if the experiment is over
 | 
|---|
 | 198 |                 if closed { break }                                       // yes, spot was closed
 | 
|---|
 | 199 |                 if  clock_mode && atomic.LoadInt32(&stop) == 1 { break }  // yes, time's up
 | 
|---|
 | 200 |                 if !clock_mode && r.count >= stop_count { break }         // yes, iterations reached
 | 
|---|
| [aa1d13c] | 201 | 
 | 
|---|
| [c5a98f3] | 202 |                 // Check everything is consistent
 | 
|---|
 | 203 |                 if uint64(len(data.data)) != size { panic("Data has weird size") }
 | 
|---|
 | 204 | 
 | 
|---|
 | 205 |                 // write down progress and check migrations
 | 
|---|
| [aa1d13c] | 206 |                 ttid := syscall.Gettid()
 | 
|---|
 | 207 |                 r.count += 1
 | 
|---|
 | 208 |                 r.gmigs += ctx .moved(ttid)
 | 
|---|
 | 209 |                 r.dmigs += data.moved(ttid)
 | 
|---|
| [90ecade] | 210 |         }
 | 
|---|
 | 211 | 
 | 
|---|
| [c5a98f3] | 212 |         // Mark goroutine as done
 | 
|---|
| [90ecade] | 213 |         atomic.AddInt64(&threads_left, -1);
 | 
|---|
| [c5a98f3] | 214 | 
 | 
|---|
 | 215 |         // return result
 | 
|---|
| [aa1d13c] | 216 |         result <- r
 | 
|---|
| [90ecade] | 217 | }
 | 
|---|
 | 218 | 
 | 
|---|
| [c5a98f3] | 219 | // ==================================================
 | 
|---|
 | 220 | // Program main
 | 
|---|
| [90ecade] | 221 | func main() {
 | 
|---|
| [c5a98f3] | 222 |         // Benchmark specific command line arguments
 | 
|---|
| [f03209d3] | 223 |         nspotsOpt    := flag.Int   ("n", 0    , "Number of spots where threads sleep (nthreads - nspots are active at the same time)")
 | 
|---|
| [f4f79dd] | 224 |         work_sizeOpt := flag.Uint64("w", 2    , "Size of the array for each threads, in words (64bit)")
 | 
|---|
 | 225 |         countOpt     := flag.Uint64("c", 2    , "Number of words to touch when working (random pick, cells can be picked more than once)")
 | 
|---|
| [90ecade] | 226 |         shareOpt     := flag.Bool  ("s", false, "Pass the work data to the next thread when blocking")
 | 
|---|
 | 227 | 
 | 
|---|
| [c5a98f3] | 228 |         // General benchmark initialization and deinitialization
 | 
|---|
| [2dd0689] | 229 |         defer bench_init()()
 | 
|---|
| [90ecade] | 230 | 
 | 
|---|
| [c5a98f3] | 231 |         // Eval command line arguments
 | 
|---|
| [f03209d3] | 232 |         nspots:= *nspotsOpt
 | 
|---|
| [90ecade] | 233 |         size  := *work_sizeOpt
 | 
|---|
 | 234 |         cnt   := *countOpt
 | 
|---|
 | 235 |         share := *shareOpt
 | 
|---|
 | 236 | 
 | 
|---|
| [f03209d3] | 237 |         if nspots == 0 { nspots = nthreads - nprocs; }
 | 
|---|
 | 238 | 
 | 
|---|
| [c5a98f3] | 239 |         // Check params
 | 
|---|
| [90ecade] | 240 |         if ! (nthreads > nprocs) {
 | 
|---|
 | 241 |                 fmt.Fprintf(os.Stderr, "Must have more threads than procs\n")
 | 
|---|
 | 242 |                 os.Exit(1)
 | 
|---|
 | 243 |         }
 | 
|---|
 | 244 | 
 | 
|---|
| [c5a98f3] | 245 |         // Make global data
 | 
|---|
 | 246 |         barrierStart := make(chan struct{})         // Barrier used at the start
 | 
|---|
| [f03209d3] | 247 |         threads_left = int64(nthreads - nspots)                // Counter for active threads (not 'nthreads' because at all times 'nthreads - nprocs' are blocked)
 | 
|---|
| [c5a98f3] | 248 |         result  := make(chan Result)                // Channel for results
 | 
|---|
| [f03209d3] | 249 |         channels := make([]Spot, nspots) // Number of spots
 | 
|---|
| [90ecade] | 250 |         for i := range channels {
 | 
|---|
| [3f8baf4] | 251 |                 channels[i] = Spot{[16]uint64{0},uintptr(0), i,[16]uint64{0}}     // init spots
 | 
|---|
| [90ecade] | 252 |         }
 | 
|---|
 | 253 | 
 | 
|---|
| [c5a98f3] | 254 |         // start the goroutines
 | 
|---|
| [90ecade] | 255 |         for i := 0; i < nthreads; i++ {
 | 
|---|
| [34687d3] | 256 |                 go local(result, barrierStart, size, cnt, channels, share, i)
 | 
|---|
| [90ecade] | 257 |         }
 | 
|---|
 | 258 |         fmt.Printf("Starting\n");
 | 
|---|
 | 259 | 
 | 
|---|
| [2dd0689] | 260 |         atomic.StoreInt32(&stop, 0)
 | 
|---|
| [90ecade] | 261 |         start := time.Now()
 | 
|---|
| [c5a98f3] | 262 |         close(barrierStart) // release barrier
 | 
|---|
| [90ecade] | 263 | 
 | 
|---|
| [c5a98f3] | 264 |         wait(start, true);  // general benchmark wait
 | 
|---|
| [90ecade] | 265 | 
 | 
|---|
| [2dd0689] | 266 |         atomic.StoreInt32(&stop, 1)
 | 
|---|
| [90ecade] | 267 |         end := time.Now()
 | 
|---|
 | 268 |         delta := end.Sub(start)
 | 
|---|
 | 269 | 
 | 
|---|
 | 270 |         fmt.Printf("\nDone\n")
 | 
|---|
 | 271 | 
 | 
|---|
| [c5a98f3] | 272 |         // release all the blocked threads
 | 
|---|
| [2dd0689] | 273 |         for i := range channels {
 | 
|---|
 | 274 |                 channels[i].release()
 | 
|---|
 | 275 |         }
 | 
|---|
 | 276 | 
 | 
|---|
| [c5a98f3] | 277 |         // Join and accumulate results
 | 
|---|
| [f4f79dd] | 278 |         results := NewResult()
 | 
|---|
| [90ecade] | 279 |         for i := 0; i < nthreads; i++ {
 | 
|---|
| [024fa4b] | 280 |                 r := <- result
 | 
|---|
| [f4f79dd] | 281 |                 results.count += r.count
 | 
|---|
 | 282 |                 results.gmigs += r.gmigs
 | 
|---|
 | 283 |                 results.dmigs += r.dmigs
 | 
|---|
| [90ecade] | 284 |         }
 | 
|---|
 | 285 | 
 | 
|---|
| [c5a98f3] | 286 |         // Print with nice 's, i.e. 1'000'000 instead of 1000000
 | 
|---|
| [90ecade] | 287 |         p := message.NewPrinter(language.English)
 | 
|---|
| [aec2c022] | 288 |         p.Printf("Duration (ms)          : %d\n", delta.Milliseconds());
 | 
|---|
| [90ecade] | 289 |         p.Printf("Number of processors   : %d\n", nprocs);
 | 
|---|
 | 290 |         p.Printf("Number of threads      : %d\n", nthreads);
 | 
|---|
 | 291 |         p.Printf("Work size (64bit words): %d\n", size);
 | 
|---|
| [aec2c022] | 292 |         if share {
 | 
|---|
 | 293 |                 p.Printf("Data sharing           : On\n");
 | 
|---|
 | 294 |         } else {
 | 
|---|
 | 295 |                 p.Printf("Data sharing           : Off\n");
 | 
|---|
 | 296 |         }
 | 
|---|
| [f4f79dd] | 297 |         p.Printf("Total Operations(ops)  : %15d\n", results.count)
 | 
|---|
 | 298 |         p.Printf("Total G Migrations     : %15d\n", results.gmigs)
 | 
|---|
 | 299 |         p.Printf("Total D Migrations     : %15d\n", results.dmigs)
 | 
|---|
 | 300 |         p.Printf("Ops per second         : %18.2f\n", float64(results.count) / delta.Seconds())
 | 
|---|
 | 301 |         p.Printf("ns per ops             : %18.2f\n", float64(delta.Nanoseconds()) / float64(results.count))
 | 
|---|
 | 302 |         p.Printf("Ops per threads        : %15d\n", results.count / uint64(nthreads))
 | 
|---|
 | 303 |         p.Printf("Ops per procs          : %15d\n", results.count / uint64(nprocs))
 | 
|---|
 | 304 |         p.Printf("Ops/sec/procs          : %18.2f\n", (float64(results.count) / float64(nprocs)) / delta.Seconds())
 | 
|---|
 | 305 |         p.Printf("ns per ops/procs       : %18.2f\n", float64(delta.Nanoseconds()) / (float64(results.count) / float64(nprocs)))
 | 
|---|
| [90ecade] | 306 | }
 | 
|---|