Index: benchmark/readyQ/locality.go
===================================================================
--- benchmark/readyQ/locality.go	(revision aa1d13cda79dced5a40cd4a8bd7adef8a30e614d)
+++ benchmark/readyQ/locality.go	(revision c5a98f343fd19f7600bdc61b555a8be7bfcf318f)
@@ -53,6 +53,8 @@
 }
 
-func NewCtx( sem * semaphore.Weighted, data * MyData, id int ) (MyCtx) {
-	return MyCtx{sem, unsafe.Pointer(data), context.Background(), syscall.Gettid(), id}
+func NewCtx( data * MyData, id int ) (MyCtx) {
+	r := MyCtx{semaphore.NewWeighted(1), unsafe.Pointer(data), context.Background(), syscall.Gettid(), id}
+	r.s.Acquire(context.Background(), 1)
+	return r
 }
 
@@ -66,7 +68,9 @@
 
 // ==================================================
+// Atomic object where a single thread can wait
+// May exchanges data
 type Spot struct {
-	ptr uintptr
-	id int
+	ptr uintptr // atomic variable use fo MES
+	id int      // id for debugging
 }
 
@@ -78,18 +82,23 @@
 	new := uintptr(unsafe.Pointer(ctx))
 	// old_d := ctx.d
+
+	// Attempt to CAS our context into the seat
 	var raw uintptr
 	for true {
 		raw = this.ptr
-		if raw == uintptr(1) {
+		if raw == uintptr(1) { // Seat is closed, return
 			return nil, true
 		}
 		if atomic.CompareAndSwapUintptr(&this.ptr, raw, new) {
-			break
+			break // We got the seat
 		}
 	}
 
+	// If we aren't the fist in, wake someone
 	if raw != uintptr(0) {
 		var val *MyCtx
 		val = (*MyCtx)(unsafe.Pointer(raw))
+
+		// If we are sharing, give them our data
 		if share {
 			// fmt.Printf("[%d] - %d update %d: %p -> %p\n", this.id, ctx.id, val.id, val.d, data)
@@ -97,4 +106,5 @@
 		}
 
+		// Wake them up
 		// fmt.Printf("[%d] - %d release %d\n", this.id, ctx.id, val.id)
 		val.s.Release(1)
@@ -102,5 +112,9 @@
 
 	// fmt.Printf("[%d] - %d enter\n", this.id, ctx.id)
+
+	// Block once on the seat
 	ctx.s.Acquire(ctx.c, 1)
+
+	// Someone woke us up, get the new data
 	ret := (* MyData)(atomic.LoadPointer(&ctx.d))
 	// fmt.Printf("[%d] - %d leave: %p -> %p\n", this.id, ctx.id, ret, old_d)
@@ -109,4 +123,6 @@
 }
 
+// Shutdown the spot
+// Wake current thread and mark seat as closed
 func (this * Spot) release() {
 	val := (*MyCtx)(unsafe.Pointer(atomic.SwapUintptr(&this.ptr, uintptr(1))))
@@ -115,8 +131,10 @@
 	}
 
+	// Someone was there, release them
 	val.s.Release(1)
 }
 
 // ==================================================
+// Struct for result, Go doesn't support passing tuple in channels
 type Result struct {
 	count uint64
@@ -130,4 +148,5 @@
 
 // ==================================================
+// Random number generator, Go's native one is to slow and global
 func __xorshift64( state * uint64 ) (uint64) {
 	x := *state
@@ -139,4 +158,6 @@
 }
 
+// ==================================================
+// Do some work by accessing 'cnt' cells in the array
 func work(data * MyData, cnt uint64, state * uint64) {
 	for i := uint64(0); i < cnt; i++ {
@@ -145,28 +166,36 @@
 }
 
+// Main body of the threads
 func local(result chan Result, start chan struct{}, size uint64, cnt uint64, channels [] Spot, share bool, id int) {
-    	state := rand.Uint64()
-
-	data := NewData(id, size)
-	sem := semaphore.NewWeighted(1)
-	sem.Acquire(context.Background(), 1)
-	ctx := NewCtx(sem, data, id)
-
+	// Initialize some data
+    	state := rand.Uint64()    // RNG state
+	data := NewData(id, size) // Starting piece of data
+	ctx := NewCtx(data, id)   // Goroutine local context
+
+	// Prepare results
 	r := NewResult()
+
+	// Wait for start
 	<- start
+
+	// Main loop
 	for true {
+		// Touch our current data, write to invalidate remote cache lines
 		work(data, cnt, &state)
 
+		// Wait on a random spot
 		i := __xorshift64(&state) % uint64(len(channels))
 		var closed bool
 		data, closed = channels[i].put(&ctx, data, share)
 
-		if closed { break }
-		if  clock_mode && atomic.LoadInt32(&stop) == 1 { break }
-		if !clock_mode && r.count >= stop_count { break }
-		if uint64(len(data.data)) != size {
-			panic("Data has weird size")
-		}
-
+		// Check if the experiment is over
+		if closed { break }                                       // yes, spot was closed
+		if  clock_mode && atomic.LoadInt32(&stop) == 1 { break }  // yes, time's up
+		if !clock_mode && r.count >= stop_count { break }         // yes, iterations reached
+
+		// Check everything is consistent
+		if uint64(len(data.data)) != size { panic("Data has weird size") }
+
+		// write down progress and check migrations
 		ttid := syscall.Gettid()
 		r.count += 1
@@ -175,19 +204,28 @@
 	}
 
+	// Mark goroutine as done
 	atomic.AddInt64(&threads_left, -1);
+
+	// return result
 	result <- r
 }
 
+// ==================================================
+// Program main
 func main() {
+	// Benchmark specific command line arguments
 	work_sizeOpt := flag.Uint64("w", 2    , "Number of words (uint64) per threads")
 	countOpt     := flag.Uint64("c", 2    , "Number of words (uint64) to touch")
 	shareOpt     := flag.Bool  ("s", false, "Pass the work data to the next thread when blocking")
 
+	// General benchmark initialization and deinitialization
 	defer bench_init()()
 
+	// Eval command line arguments
 	size  := *work_sizeOpt
 	cnt   := *countOpt
 	share := *shareOpt
 
+	// Check params
 	if ! (nthreads > nprocs) {
 		fmt.Fprintf(os.Stderr, "Must have more threads than procs\n")
@@ -195,12 +233,14 @@
 	}
 
-	barrierStart := make(chan struct{})
-	threads_left = int64(nprocs)
-	result  := make(chan Result)
-	channels := make([]Spot, nthreads - nprocs)
+	// Make global data
+	barrierStart := make(chan struct{})         // Barrier used at the start
+	threads_left = int64(nprocs)                // Counter for active threads (not 'nthreads' because at all times 'nthreads - nprocs' are blocked)
+	result  := make(chan Result)                // Channel for results
+	channels := make([]Spot, nthreads - nprocs) // Number of spots
 	for i := range channels {
-		channels[i] = Spot{uintptr(0), i}
-	}
-
+		channels[i] = Spot{uintptr(0), i}     // init spots
+	}
+
+	// start the goroutines
 	for i := 0; i < nthreads; i++ {
 		go local(result, barrierStart, size, cnt, channels, share, i)
@@ -210,7 +250,7 @@
 	atomic.StoreInt32(&stop, 0)
 	start := time.Now()
-	close(barrierStart)
-
-	wait(start, true);
+	close(barrierStart) // release barrier
+
+	wait(start, true);  // general benchmark wait
 
 	atomic.StoreInt32(&stop, 1)
@@ -220,8 +260,10 @@
 	fmt.Printf("\nDone\n")
 
+	// release all the blocked threads
 	for i := range channels {
 		channels[i].release()
 	}
 
+	// Join and accumulate results
 	global_result := NewResult()
 	for i := 0; i < nthreads; i++ {
@@ -232,4 +274,5 @@
 	}
 
+	// Print with nice 's, i.e. 1'000'000 instead of 1000000
 	p := message.NewPrinter(language.English)
 	p.Printf("Duration (ms)          : %f\n", delta.Seconds());
