Index: benchmark/readyQ/locality.go
===================================================================
--- benchmark/readyQ/locality.go	(revision 94d93510adbab4156d0ad3db925fcf5f1efbcb83)
+++ benchmark/readyQ/locality.go	(revision aa1d13cda79dced5a40cd4a8bd7adef8a30e614d)
@@ -7,4 +7,5 @@
 	"math/rand"
 	"os"
+	"syscall"
 	"sync/atomic"
 	"time"
@@ -15,11 +16,54 @@
 )
 
-type GoCtx struct {
+// ==================================================
+type MyData struct {
+	ttid int
+	id int
+	data [] uint64
+}
+
+func NewData(id int, size uint64) (*MyData) {
+	var data [] uint64
+	data = make([]uint64, size)
+	for i := uint64(0); i < size; i++ {
+		data[i] = 0
+	}
+	return &MyData{syscall.Gettid(), id, data}
+}
+
+func (this * MyData) moved( ttid int ) (uint64) {
+	if this.ttid == ttid {
+		return 0
+	}
+	this.ttid = ttid
+	return 1
+}
+
+func (this * MyData) access( idx uint64 ) {
+	this.data[idx % uint64(len(this.data))] += 1
+}
+
+// ==================================================
+type MyCtx struct {
 	s * semaphore.Weighted
 	d unsafe.Pointer
 	c context.Context
+	ttid int
 	id int
 }
 
+func NewCtx( sem * semaphore.Weighted, data * MyData, id int ) (MyCtx) {
+	return MyCtx{sem, unsafe.Pointer(data), context.Background(), syscall.Gettid(), id}
+}
+
+func (this * MyCtx) moved( ttid int ) (uint64) {
+	if this.ttid == ttid {
+		return 0
+	}
+	this.ttid = ttid
+	return 1
+}
+
+// ==================================================
 type Spot struct {
 	ptr uintptr
@@ -31,5 +75,5 @@
 // Next threads unblocks current one and blocks in its place
 // if share == true, exchange data in the process
-func (this * Spot) put( ctx * GoCtx, data * [] uint64, share bool) (* [] uint64, bool) {
+func (this * Spot) put( ctx * MyCtx, data * MyData, share bool) (* MyData, bool) {
 	new := uintptr(unsafe.Pointer(ctx))
 	// old_d := ctx.d
@@ -46,6 +90,6 @@
 
 	if raw != uintptr(0) {
-		var val *GoCtx
-		val = (*GoCtx)(unsafe.Pointer(raw))
+		var val *MyCtx
+		val = (*MyCtx)(unsafe.Pointer(raw))
 		if share {
 			// fmt.Printf("[%d] - %d update %d: %p -> %p\n", this.id, ctx.id, val.id, val.d, data)
@@ -59,5 +103,5 @@
 	// fmt.Printf("[%d] - %d enter\n", this.id, ctx.id)
 	ctx.s.Acquire(ctx.c, 1)
-	ret := (* [] uint64)(atomic.LoadPointer(&ctx.d))
+	ret := (* MyData)(atomic.LoadPointer(&ctx.d))
 	// fmt.Printf("[%d] - %d leave: %p -> %p\n", this.id, ctx.id, ret, old_d)
 
@@ -66,5 +110,5 @@
 
 func (this * Spot) release() {
-	val := (*GoCtx)(unsafe.Pointer(atomic.SwapUintptr(&this.ptr, uintptr(1))))
+	val := (*MyCtx)(unsafe.Pointer(atomic.SwapUintptr(&this.ptr, uintptr(1))))
 	if val == nil {
 		return
@@ -74,4 +118,16 @@
 }
 
+// ==================================================
+type Result struct {
+	count uint64
+	gmigs uint64
+	dmigs uint64
+}
+
+func NewResult() (Result) {
+	return Result{0, 0, 0}
+}
+
+// ==================================================
 func __xorshift64( state * uint64 ) (uint64) {
 	x := *state
@@ -83,43 +139,42 @@
 }
 
-func work(data * [] uint64, size uint64, cnt uint64, state * uint64) {
+func work(data * MyData, cnt uint64, state * uint64) {
 	for i := uint64(0); i < cnt; i++ {
-		(*data)[__xorshift64(state) % size] += 1
-	}
-}
-
-func local(result chan uint64, start chan struct{}, size uint64, cnt uint64, channels [] Spot, share bool, id int) {
+		data.access(__xorshift64(state))
+	}
+}
+
+func local(result chan Result, start chan struct{}, size uint64, cnt uint64, channels [] Spot, share bool, id int) {
     	state := rand.Uint64()
-	var my_data [] uint64
-	my_data = make([]uint64, size)
-	for i := uint64(0); i < size; i++ {
-		my_data[i] = 0
-	}
-	data := &my_data
-
+
+	data := NewData(id, size)
 	sem := semaphore.NewWeighted(1)
 	sem.Acquire(context.Background(), 1)
-	ctx := GoCtx{sem, unsafe.Pointer(data), context.Background(), id}
-
-	count := uint64(0)
+	ctx := NewCtx(sem, data, id)
+
+	r := NewResult()
 	<- start
 	for true {
-		work(data, size, cnt, &state)
+		work(data, cnt, &state)
 
 		i := __xorshift64(&state) % uint64(len(channels))
 		var closed bool
 		data, closed = channels[i].put(&ctx, data, share)
-		count += 1
 
 		if closed { break }
 		if  clock_mode && atomic.LoadInt32(&stop) == 1 { break }
-		if !clock_mode && count >= stop_count { break }
-		if uint64(len(*data)) != size {
+		if !clock_mode && r.count >= stop_count { break }
+		if uint64(len(data.data)) != size {
 			panic("Data has weird size")
 		}
+
+		ttid := syscall.Gettid()
+		r.count += 1
+		r.gmigs += ctx .moved(ttid)
+		r.dmigs += data.moved(ttid)
 	}
 
 	atomic.AddInt64(&threads_left, -1);
-	result <- count
+	result <- r
 }
 
@@ -142,5 +197,5 @@
 	barrierStart := make(chan struct{})
 	threads_left = int64(nprocs)
-	result  := make(chan uint64)
+	result  := make(chan Result)
 	channels := make([]Spot, nthreads - nprocs)
 	for i := range channels {
@@ -169,9 +224,10 @@
 	}
 
-	global_counter := uint64(0)
+	global_result := NewResult()
 	for i := 0; i < nthreads; i++ {
 		r := <- result
-		global_counter += r
-		fmt.Printf("%d\n", r)
+		global_result.count += r.count
+		global_result.gmigs += r.gmigs
+		global_result.dmigs += r.dmigs
 	}
 
@@ -181,10 +237,12 @@
 	p.Printf("Number of threads      : %d\n", nthreads);
 	p.Printf("Work size (64bit words): %d\n", size);
-	p.Printf("Total Operations(ops)  : %15d\n", global_counter)
-	p.Printf("Ops per second         : %18.2f\n", float64(global_counter) / delta.Seconds())
-	p.Printf("ns per ops             : %18.2f\n", float64(delta.Nanoseconds()) / float64(global_counter))
-	p.Printf("Ops per threads        : %15d\n", global_counter / uint64(nthreads))
-	p.Printf("Ops per procs          : %15d\n", global_counter / uint64(nprocs))
-	p.Printf("Ops/sec/procs          : %18.2f\n", (float64(global_counter) / float64(nprocs)) / delta.Seconds())
-	p.Printf("ns per ops/procs       : %18.2f\n", float64(delta.Nanoseconds()) / (float64(global_counter) / float64(nprocs)))
-}
+	p.Printf("Total Operations(ops)  : %15d\n", global_result.count)
+	p.Printf("Total G Migrations     : %15d\n", global_result.gmigs)
+	p.Printf("Total D Migrations     : %15d\n", global_result.dmigs)
+	p.Printf("Ops per second         : %18.2f\n", float64(global_result.count) / delta.Seconds())
+	p.Printf("ns per ops             : %18.2f\n", float64(delta.Nanoseconds()) / float64(global_result.count))
+	p.Printf("Ops per threads        : %15d\n", global_result.count / uint64(nthreads))
+	p.Printf("Ops per procs          : %15d\n", global_result.count / uint64(nprocs))
+	p.Printf("Ops/sec/procs          : %18.2f\n", (float64(global_result.count) / float64(nprocs)) / delta.Seconds())
+	p.Printf("ns per ops/procs       : %18.2f\n", float64(delta.Nanoseconds()) / (float64(global_result.count) / float64(nprocs)))
+}
