Index: benchmark/readyQ/locality.go
===================================================================
--- benchmark/readyQ/locality.go	(revision 024fa4b4a110581c37421b7d8ae2b0c4de095704)
+++ benchmark/readyQ/locality.go	(revision 34687d372580cca1968a23a81295c4891a48761b)
@@ -17,10 +17,12 @@
 type GoCtx struct {
 	s * semaphore.Weighted
-	d * [] uint64
+	d unsafe.Pointer
 	c context.Context
+	id int
 }
 
 type Spot struct {
 	ptr uintptr
+	id int
 }
 
@@ -29,12 +31,12 @@
 // Next threads unblocks current one and blocks in its place
 // if share == true, exchange data in the process
-func (this * Spot) put( ctx * GoCtx, data * [] uint64, share bool) (* [] uint64) {
+func (this * Spot) put( ctx * GoCtx, data * [] uint64, share bool) (* [] uint64, bool) {
 	new := uintptr(unsafe.Pointer(ctx))
-	// fmt.Printf("Enter with %p\n", data)
+	old_d := ctx.d
 	var raw uintptr
 	for true {
 		raw = this.ptr
 		if raw == uintptr(1) {
-			return nil
+			return nil, true
 		}
 		if atomic.CompareAndSwapUintptr(&this.ptr, raw, new) {
@@ -47,13 +49,18 @@
 		val = (*GoCtx)(unsafe.Pointer(raw))
 		if share {
-			val.d = data
+			// fmt.Printf("[%d] - %d update %d: %p -> %p\n", this.id, ctx.id, val.id, val.d, data)
+			atomic.StorePointer(&val.d, unsafe.Pointer(data))
 		}
 
+		// fmt.Printf("[%d] - %d release %d\n", this.id, ctx.id, val.id)
 		val.s.Release(1)
 	}
 
+	// fmt.Printf("[%d] - %d enter\n", this.id, ctx.id)
 	ctx.s.Acquire(ctx.c, 1)
-	// fmt.Printf("Leave with %p (was %p)\n", ctx.d, data)
-	return ctx.d
+	ret := (* [] uint64)(atomic.LoadPointer(&ctx.d))
+	// fmt.Printf("[%d] - %d leave: %p -> %p\n", this.id, ctx.id, ret, old_d)
+
+	return ret, false
 }
 
@@ -76,5 +83,5 @@
 }
 
-func local(result chan uint64, start chan struct{}, size uint64, cnt uint64, channels [] Spot, share bool) {
+func local(result chan uint64, start chan struct{}, size uint64, cnt uint64, channels [] Spot, share bool, id int) {
     	state := rand.Uint64()
 	var my_data [] uint64
@@ -87,5 +94,5 @@
 	sem := semaphore.NewWeighted(1)
 	sem.Acquire(context.Background(), 1)
-	ctx := GoCtx{sem, data, context.Background()}
+	ctx := GoCtx{sem, unsafe.Pointer(data), context.Background(), id}
 
 	count := uint64(0)
@@ -97,8 +104,9 @@
 
 		i := __xorshift64(&state) % uint64(len(channels))
-		// data = channels[i].put(sem, data, share)
-		data = channels[i].put(&ctx, data, share)
+		var closed bool
+		data, closed = channels[i].put(&ctx, data, share)
 		count += 1
 
+		if closed { break }
 		if  clock_mode && atomic.LoadInt32(&stop) == 1 { break }
 		if !clock_mode && count >= stop_count { break }
@@ -129,13 +137,13 @@
 
 	barrierStart := make(chan struct{})
-	threads_left = int64(nthreads)
+	threads_left = int64(nprocs)
 	result  := make(chan uint64)
 	channels := make([]Spot, nthreads - nprocs)
 	for i := range channels {
-		channels[i] = Spot{uintptr(0)}
+		channels[i] = Spot{uintptr(0), i}
 	}
 
 	for i := 0; i < nthreads; i++ {
-		go local(result, barrierStart, size, cnt, channels, share)
+		go local(result, barrierStart, size, cnt, channels, share, i)
 	}
 	fmt.Printf("Starting\n");
