import pandas as pd
import numpy as np
import os
import sys
import math

sys.path.insert(0, os.path.dirname(__file__))
from ListCommon import *

# The range from 0.9759 to 1.0247 (which is 1.05 x wide) has 1.0 in its centre.
# This is the bucket with key 0.
# Logs of values in this bucket go from -0.5 to +0.5.
# Rounding a log value to the nearest integer gives the key.
# Exponentiating a key directly gives the centre of its bucket.
# Exponentiating a key less 0.5 gives the bottom of its bucket.
# Gnuplot expects the latter.

bucketMin = 0.25
bucketMax = 4.0
bucketGrain = 1.05
bktKeyLo = math.floor( math.log(bucketMin, bucketGrain) )
bktKeyHi = math.ceil( math.log(bucketMax, bucketGrain) )

def bktKeyOfVal( relDur ):
    distance = math.log(relDur, bucketGrain)
    key = round( distance )
    return key

def bktIxOfVal( relDur ):
    return bktKeyToIx( bktKeyOfVal( relDur ) )

def botValOfBucketK( key ):
    return bucketGrain ** ( key - 0.5 )

def topValOfBucketBotVal( botVal ):
    return bucketGrain * botVal

def bktKeyToIx( key ):
    return key - bktKeyLo

def bktIxToKey( ix ):
    return ix + bktKeyLo

def botOfBucketOfVal( relDur ):
    return botValOfBucketK( bktKeyOfVal( relDur ) )

buckets = [ botValOfBucketK(key) for key in range(bktKeyLo, bktKeyHi) ]

# printSingleDetail
def printHistos(*,
    tgtMovement = 'all',
    tgtPolarity = 'all',
    tgtAccessor = 'all',
    tgtInterleave = 0.0,
    marginalizeOn=['fx'] ):

    # watch out for filtering too early here; need everything sticking around until baselines are applies
    # ie, maybe I should get rid of all the tgt parms at the pre-benchmark layers
    timings = getSingleResults(
        tgtMovement = tgtMovement,
        tgtPolarity = tgtPolarity,
        tgtAccessor = tgtAccessor,
        tgtInterleave = tgtInterleave)
    timings = getJustCanon( timings,
                  fxInc = ['cfa-cfa', 'lq-tailq', 'upp-upp', 'lq-list'],
                  szInc = ['SM', 'ML'],
                  sExcl = [1] )


#    annotateBaselines(timings)


    options = timings.groupby(explanations)

    aggregated = options.agg(
        mean_op_dur_ns = ('mean_op_dur_ns', gMeanNoOutlr)
    ).reset_index()


    annotateBaseline(aggregated, marginalizeOn)
#    annotateCommonBaselines(aggregated)


    # if examining "why CFA slow" need both
    # - getVariousCfa inplace of getJust Canon
    # - do annotate-then-filter because baseline needs to stay cfa-tailq-upp
    # (filter-then-annotate is fine for general cases (where all three canons are included) and good for build time)


    c_measure = c('OpDurRel', marginalizeOn)
    # options = timings.groupby(explanations)

    # aggregated = options.agg(
    #     **{measure:(measure,gMeanNoOutlr)}
    # ).reset_index()

    c_measureBkt = 'BUCKET_' + c_measure
    aggregated[ c_measureBkt ] = aggregated[c_measure].apply( botOfBucketOfVal )

    marggrp = aggregated.groupby(marginalizeOn)


    # print(f'measure is {measure}')
    # print()
    # print()

    for mkey, mgroup in marggrp:
#       print(mgroup, file=sys.stderr)

        histo_raw = mgroup[ c_measureBkt ].value_counts()
        for b in buckets:
            if b not in histo_raw.keys():
#                print( f"{b} := 0", file=sys.stderr )
                histo_raw[b] = 0
        histo_raw = histo_raw.sort_index()

        histo = histo_raw.rename("count").reset_index()
        histo = histo.rename(columns={c_measureBkt: "y_lo"})
        y_lo_col_loc = histo.columns.get_loc("y_lo")
        histo.insert(y_lo_col_loc + 1, "y_hi", histo["y_lo"].apply(topValOfBucketBotVal))

        header = str.join(', ', mkey)
        print(f'"{header}"')
        text = histo.to_csv(header=False, index=False, sep='\t')
        print(text)
        print()
        print()

        # print(f'"{header}" FULL')
        # text = group.to_csv(header=False, index=True, sep='\t')
        # print(text)
        # print()
        # print()

    # print(f'"RAW"')
    # text = timings.to_csv(header=False, index=True, sep='\t')
    # print(text)
    

printHistos(
    tgtMovement = 'all',
    tgtPolarity = 'all',
    tgtAccessor = 'all',
    marginalizeOn=['machine', 'SizeZone'] )