Index: benchmark/readyQ/cycle.rs
===================================================================
--- benchmark/readyQ/cycle.rs	(revision b2e0df3b43fc7a0c8e3a02bf57f4f45e97fbca4e)
+++ benchmark/readyQ/cycle.rs	(revision b2e0df3b43fc7a0c8e3a02bf57f4f45e97fbca4e)
@@ -0,0 +1,204 @@
+#[cfg(any(
+	feature = "sync time rt-threaded",
+  ))]
+
+extern crate tokio;
+
+use std::io::{self, Write};
+use std::sync::Arc;
+use std::sync::atomic::{AtomicU64, AtomicBool,Ordering};
+use std::time::{Instant,Duration};
+
+use tokio::runtime::Builder;
+use tokio::sync;
+use tokio::time;
+
+extern crate isatty;
+use isatty::stdout_isatty;
+
+extern crate num_format;
+use num_format::{Locale, ToFormattedString};
+
+extern crate clap;
+use clap::{Arg, App};
+
+use std::cell::UnsafeCell;
+use std::mem::MaybeUninit;
+use std::ops;
+
+pub struct InitializeCell<T> {
+    inner: UnsafeCell<MaybeUninit<T>>,
+}
+
+unsafe impl<T> Sync for InitializeCell<T> {}
+
+impl<T> InitializeCell<T> {
+    pub const unsafe fn new_uninitialized() -> InitializeCell<T> {
+	  InitializeCell {
+		inner: UnsafeCell::new(MaybeUninit::uninit()),
+	  }
+    }
+    pub const fn new(init: T) -> InitializeCell<T> {
+	  InitializeCell {
+		inner: UnsafeCell::new(MaybeUninit::new(init)),
+	  }
+    }
+    pub unsafe fn init(&self, init: T) {
+	  (*self.inner.get()) = MaybeUninit::new(init);
+    }
+}
+
+impl<T> ops::Deref for InitializeCell<T> {
+    type Target = T;
+    fn deref(&self) -> &T {
+	  unsafe {
+		&*(*self.inner.get()).as_ptr()
+	  }
+    }
+}
+
+static CLOCK_MODE: InitializeCell<bool> = unsafe { InitializeCell::new_uninitialized() };
+static STOP_COUNT: InitializeCell<u64>  = unsafe { InitializeCell::new_uninitialized() };
+static DURATION: InitializeCell<f64>    = unsafe { InitializeCell::new_uninitialized() };
+static STOP         : AtomicBool = AtomicBool::new(false);
+static THREADS_LEFT : AtomicU64  = AtomicU64 ::new(10);
+
+struct Partner {
+	sem: sync::Semaphore,
+	next: usize,
+}
+
+async fn partner_main(result: sync::oneshot::Sender<u64>, idx: usize, others: Arc<Vec<Arc<Partner>>> ) {
+	let this = &others[idx];
+	let mut count:u64 = 0;
+	loop {
+		this.sem.acquire().await.forget();
+		others[this.next].sem.add_permits(1);
+		count += 1;
+
+		if  *CLOCK_MODE && STOP.load(Ordering::Relaxed) { break; }
+		if !*CLOCK_MODE && count >= *STOP_COUNT { break; }
+	}
+
+	THREADS_LEFT.fetch_sub(1, Ordering::SeqCst);
+	result.send( count ).unwrap();
+}
+
+fn prep(nthreads: usize, tthreads: usize) -> Vec<Arc<Partner>> {
+	let mut thddata = Vec::with_capacity(tthreads);
+	for i in 0..tthreads {
+		let pi = (i + nthreads) % tthreads;
+		thddata.push(Arc::new(Partner{
+			sem: sync::Semaphore::new(0),
+			next: pi,
+		}));
+	}
+	return thddata;
+}
+
+async fn wait(start: &Instant, is_tty: bool) {
+	loop {
+		time::sleep(Duration::from_micros(100000)).await;
+		let delta = start.elapsed();
+		if is_tty {
+			print!(" {:.1}\r", delta.as_secs_f32());
+			io::stdout().flush().unwrap();
+		}
+		if *CLOCK_MODE && delta >= Duration::from_secs_f64(*DURATION)  {
+			break;
+		}
+		else if !*CLOCK_MODE && THREADS_LEFT.load(Ordering::Relaxed) == 0 {
+			break;
+		}
+	}
+}
+
+fn main() {
+	let options = App::new("Cycle Tokio")
+		.arg(Arg::with_name("duration")  .short("d").long("duration")  .takes_value(true).default_value("5").help("Duration of the experiments in seconds"))
+		.arg(Arg::with_name("iterations").short("i").long("iterations").takes_value(true).conflicts_with("duration").help("Number of iterations of the experiments"))
+		.arg(Arg::with_name("nthreads")  .short("t").long("nthreads")  .takes_value(true).default_value("1").help("Number of threads to use"))
+		.arg(Arg::with_name("nprocs")    .short("p").long("nprocs")    .takes_value(true).default_value("1").help("Number of processors to use"))
+		.arg(Arg::with_name("ringsize")  .short("r").long("ringsize")  .takes_value(true).default_value("1").help("Number of threads in a cycle"))
+		.get_matches();
+
+	let ring_size = options.value_of("ringsize").unwrap().parse::<usize>().unwrap();
+	let nthreads  = options.value_of("nthreads").unwrap().parse::<usize>().unwrap();
+	let nprocs    = options.value_of("nprocs").unwrap().parse::<usize>().unwrap();
+
+	if options.is_present("iterations") {
+		unsafe{
+			CLOCK_MODE.init( false );
+			STOP_COUNT.init( options.value_of("iterations").unwrap().parse::<u64>().unwrap() );
+		}
+	}
+	else {
+		unsafe{
+			CLOCK_MODE.init(true);
+			DURATION  .init(options.value_of("duration").unwrap().parse::<f64>().unwrap());
+		}
+	}
+
+	let s = (1000000 as u64).to_formatted_string(&Locale::en);
+	assert_eq!(&s, "1,000,000");
+
+
+	let tthreads = nthreads * ring_size;
+	THREADS_LEFT.store(tthreads as u64, Ordering::SeqCst);
+	let thddata = Arc::new(prep(nthreads, tthreads));
+
+	let mut global_counter :u64 = 0;
+	let mut duration : std::time::Duration = std::time::Duration::from_secs(0);
+	let runtime = Builder::new_multi_thread()
+		.worker_threads(nprocs)
+		.enable_all()
+		.build()
+		.unwrap();
+
+	runtime.block_on(async {
+		let mut result  : Vec<sync::oneshot::Receiver::<u64>> = Vec::with_capacity(tthreads);
+		{
+			let mut threads = Vec::with_capacity(tthreads);
+			for i in 0..tthreads {
+				let (s, r) = sync::oneshot::channel::<u64>();
+				result.push(r);
+				threads.push(tokio::spawn(partner_main(s, i, thddata.clone())));
+			}
+			println!("Starting");
+
+			let is_tty = stdout_isatty();
+			let start = Instant::now();
+
+			for i in 0..nthreads {
+				thddata[i].sem.add_permits(1);
+			}
+
+			wait(&start, is_tty).await;
+
+			STOP.store(true, Ordering::SeqCst);
+			duration = start.elapsed();
+
+			println!("\nDone");
+
+			for i in 0..tthreads {
+				thddata[i].sem.add_permits(1);
+			}
+
+			for _ in 0..tthreads {
+				global_counter += result.pop().unwrap().await.unwrap();
+			}
+		}
+	});
+
+	println!("Duration (ms)        : {}", (duration.as_millis()).to_formatted_string(&Locale::en));
+	println!("Number of processors : {}", (nprocs).to_formatted_string(&Locale::en));
+	println!("Number of threads    : {}", (tthreads).to_formatted_string(&Locale::en));
+	println!("Cycle size (# thrds) : {}", (ring_size).to_formatted_string(&Locale::en));
+	println!("Total Operations(ops): {:>15}", (global_counter).to_formatted_string(&Locale::en));
+	println!("Ops per second       : {:>15}", (((global_counter as f64) / duration.as_secs() as f64) as u64).to_formatted_string(&Locale::en));
+	println!("ns per ops           : {:>15}", ((duration.as_nanos() as f64 / global_counter as f64) as u64).to_formatted_string(&Locale::en));
+	println!("Ops per threads      : {:>15}", (global_counter / tthreads as u64).to_formatted_string(&Locale::en));
+	println!("Ops per procs        : {:>15}", (global_counter / nprocs as u64).to_formatted_string(&Locale::en));
+	println!("Ops/sec/procs        : {:>15}", ((((global_counter as f64) / nprocs as f64) / duration.as_secs() as f64) as u64).to_formatted_string(&Locale::en));
+	println!("ns per ops/procs     : {:>15}", ((duration.as_nanos() as f64 / (global_counter as f64 / nprocs as f64)) as u64).to_formatted_string(&Locale::en));
+}
