Index: benchmark/readyQ/bench.go
===================================================================
--- benchmark/readyQ/bench.go	(revision c5a98f343fd19f7600bdc61b555a8be7bfcf318f)
+++ benchmark/readyQ/bench.go	(revision f4f79ddd5cc150690ec64cb0721043799106ab58)
@@ -5,6 +5,8 @@
 	"flag"
 	"fmt"
+	"log"
 	"os"
 	"runtime"
+	"runtime/pprof"
 	"sync/atomic"
 	"time"
@@ -43,9 +45,10 @@
 }
 
-func bench_init() {
+func bench_init() func() {
 	nprocsOpt := flag.Int("p", 1, "The number of processors")
 	nthreadsOpt := flag.Int("t", 1, "The number of threads")
 	durationOpt := flag.Float64("d", 0, "Duration of the experiment in seconds")
 	stopOpt := flag.Uint64("i", 0, "Duration of the experiment in iterations")
+	cpuprofile := flag.String("cpuprofile", "", "write cpu profile to file")
 
 	flag.Parse()
@@ -72,3 +75,17 @@
 
 	runtime.GOMAXPROCS(nprocs)
+
+	if (*cpuprofile) != "" {
+		f, err := os.Create(*cpuprofile)
+		if err != nil {
+		    log.Fatal(err)
+		}
+		pprof.StartCPUProfile(f)
+	}
+
+	return func() {
+		if (*cpuprofile) != "" {
+			pprof.StopCPUProfile()
+		}
+	}
 }
Index: benchmark/readyQ/cycle.rs
===================================================================
--- benchmark/readyQ/cycle.rs	(revision c5a98f343fd19f7600bdc61b555a8be7bfcf318f)
+++ benchmark/readyQ/cycle.rs	(revision f4f79ddd5cc150690ec64cb0721043799106ab58)
@@ -14,11 +14,8 @@
 use tokio::time;
 
-extern crate isatty;
 use isatty::stdout_isatty;
 
-extern crate num_format;
 use num_format::{Locale, ToFormattedString};
 
-extern crate clap;
 use clap::{Arg, App};
 
Index: benchmark/readyQ/locality.go
===================================================================
--- benchmark/readyQ/locality.go	(revision c5a98f343fd19f7600bdc61b555a8be7bfcf318f)
+++ benchmark/readyQ/locality.go	(revision f4f79ddd5cc150690ec64cb0721043799106ab58)
@@ -215,6 +215,6 @@
 func main() {
 	// Benchmark specific command line arguments
-	work_sizeOpt := flag.Uint64("w", 2    , "Number of words (uint64) per threads")
-	countOpt     := flag.Uint64("c", 2    , "Number of words (uint64) to touch")
+	work_sizeOpt := flag.Uint64("w", 2    , "Size of the array for each threads, in words (64bit)")
+	countOpt     := flag.Uint64("c", 2    , "Number of words to touch when working (random pick, cells can be picked more than once)")
 	shareOpt     := flag.Bool  ("s", false, "Pass the work data to the next thread when blocking")
 
@@ -266,10 +266,10 @@
 
 	// Join and accumulate results
-	global_result := NewResult()
+	results := NewResult()
 	for i := 0; i < nthreads; i++ {
 		r := <- result
-		global_result.count += r.count
-		global_result.gmigs += r.gmigs
-		global_result.dmigs += r.dmigs
+		results.count += r.count
+		results.gmigs += r.gmigs
+		results.dmigs += r.dmigs
 	}
 
@@ -280,12 +280,12 @@
 	p.Printf("Number of threads      : %d\n", nthreads);
 	p.Printf("Work size (64bit words): %d\n", size);
-	p.Printf("Total Operations(ops)  : %15d\n", global_result.count)
-	p.Printf("Total G Migrations     : %15d\n", global_result.gmigs)
-	p.Printf("Total D Migrations     : %15d\n", global_result.dmigs)
-	p.Printf("Ops per second         : %18.2f\n", float64(global_result.count) / delta.Seconds())
-	p.Printf("ns per ops             : %18.2f\n", float64(delta.Nanoseconds()) / float64(global_result.count))
-	p.Printf("Ops per threads        : %15d\n", global_result.count / uint64(nthreads))
-	p.Printf("Ops per procs          : %15d\n", global_result.count / uint64(nprocs))
-	p.Printf("Ops/sec/procs          : %18.2f\n", (float64(global_result.count) / float64(nprocs)) / delta.Seconds())
-	p.Printf("ns per ops/procs       : %18.2f\n", float64(delta.Nanoseconds()) / (float64(global_result.count) / float64(nprocs)))
-}
+	p.Printf("Total Operations(ops)  : %15d\n", results.count)
+	p.Printf("Total G Migrations     : %15d\n", results.gmigs)
+	p.Printf("Total D Migrations     : %15d\n", results.dmigs)
+	p.Printf("Ops per second         : %18.2f\n", float64(results.count) / delta.Seconds())
+	p.Printf("ns per ops             : %18.2f\n", float64(delta.Nanoseconds()) / float64(results.count))
+	p.Printf("Ops per threads        : %15d\n", results.count / uint64(nthreads))
+	p.Printf("Ops per procs          : %15d\n", results.count / uint64(nprocs))
+	p.Printf("Ops/sec/procs          : %18.2f\n", (float64(results.count) / float64(nprocs)) / delta.Seconds())
+	p.Printf("ns per ops/procs       : %18.2f\n", float64(delta.Nanoseconds()) / (float64(results.count) / float64(nprocs)))
+}