package main

import (
	"bufio"
	"flag"
	"fmt"
	"os"
	"runtime"
	"sync/atomic"
	"time"
	"golang.org/x/text/language"
	"golang.org/x/text/message"
)

var clock_mode bool
var threads_left int64
var stop int32
var duration float64
var stop_count uint64

func fflush(f *bufio.Writer) {
	defer f.Flush()
	f.Write([]byte("\r"))
}

func wait(start time.Time, is_tty bool) {
	f := bufio.NewWriter(os.Stdout)
	tdur := time.Duration(duration)
	for true {
		time.Sleep(100 * time.Millisecond)
		end := time.Now()
		delta := end.Sub(start)
		if is_tty {
			fmt.Printf(" %.1f",delta.Seconds())
			fflush(f)
		}
		if clock_mode && delta >= (tdur * time.Second) {
			break
		} else if !clock_mode && atomic.LoadInt64(&threads_left) == 0 {
			break
		}
	}
}

func partner(result chan uint64, mine chan int, next chan int) {
	count := uint64(0)
	for true {
		<- mine
		next <- 0
		count += 1
		if  clock_mode && atomic.LoadInt32(&stop) == 1 { break }
		if !clock_mode && count >= stop_count { break }
	}

	atomic.AddInt64(&threads_left, -1);
	result <- count
}

func main() {
	var nprocs int
	var nthreads int
	var ring_size int

	nprocsOpt := flag.Int("p", 1, "The number of processors")
	nthreadsOpt := flag.Int("t", 1, "The number of threads")
	ring_sizeOpt := flag.Int("r", 2, "The number of threads per cycles")
	durationOpt := flag.Float64("d", 0, "Duration of the experiment in seconds")
	stopOpt := flag.Uint64("i", 0, "Duration of the experiment in iterations")

	flag.Parse()

	nprocs = *nprocsOpt
	nthreads = *nthreadsOpt
	ring_size = *ring_sizeOpt
	duration = *durationOpt
	stop_count = *stopOpt

	if duration > 0 && stop_count > 0 {
		panic(fmt.Sprintf("--duration and --iterations cannot be used together\n"))
	} else if duration > 0 {
		clock_mode = true
		stop_count = 0xFFFFFFFFFFFFFFFF
		fmt.Printf("Running for %f seconds\n", duration)
	} else if stop_count > 0 {
		clock_mode = false
		fmt.Printf("Running for %d iterations\n", stop_count)
	} else {
		duration = 5
		clock_mode = true
		fmt.Printf("Running for %f seconds\n", duration)
	}

	runtime.GOMAXPROCS(nprocs)
	tthreads := nthreads * ring_size
	threads_left = int64(tthreads)

	result := make(chan uint64)
	channels := make([]chan int, tthreads)
	for i := range channels {
		channels[i] = make(chan int, 1)
	}

	for i := 0; i < tthreads; i++ {
		pi := (i + nthreads) % tthreads
		go partner(result, channels[i], channels[pi])
	}
	fmt.Printf("Starting\n");

	atomic.StoreInt32(&stop, 0)
	start := time.Now()
	for i := 0; i < nthreads; i++ {
		channels[i] <- 0
	}
	wait(start, true);

	atomic.StoreInt32(&stop, 1)
	end := time.Now()
	delta := end.Sub(start)

	fmt.Printf("\nDone\n")

	global_counter := uint64(0)
	for i := 0; i < tthreads; i++ {
		global_counter += <- result
	}

	p := message.NewPrinter(language.English)
	p.Printf("Duration (ms)       : %f\n", delta.Seconds());
	p.Printf("Number of processors: %d\n", nprocs);
	p.Printf("Number of threads   : %d\n", tthreads);
	p.Printf("Cycle size (# thrds): %d\n", ring_size);
	p.Printf("Yields per second   : %18.2f\n", float64(global_counter) / delta.Seconds())
	p.Printf("ns per yields       : %18.2f\n", float64(delta.Nanoseconds()) / float64(global_counter))
	p.Printf("Total yields        : %15d\n", global_counter)
	p.Printf("Yields per threads  : %15d\n", global_counter / uint64(tthreads))
	p.Printf("Yields per procs    : %15d\n", global_counter / uint64(nprocs))
	p.Printf("Yields/sec/procs    : %18.2f\n", (float64(global_counter) / float64(nprocs)) / delta.Seconds())
	p.Printf("ns per yields/procs : %18.2f\n", float64(delta.Nanoseconds()) / (float64(global_counter) / float64(nprocs)))

}