# Read thesis-append-pbv.csv
# Output for string-graph-peq-cppemu.dat

# Project details
# Filter operation=peq
# Split "series" goups of sut; only those in the "pretty" list
# Assert one row per string-length
# output:
# string-len op-duration
# in chunks, each headed by pertty(sut)

import pandas as pd
import numpy as np
import os

infile = os.path.dirname(os.path.abspath(__file__)) + '/../benchmarks/string/result-append-pbv.csv'

prettyFieldNames = {
    "cfa-ll-noshare-fresh": "{/Helvetica=15 C{/Symbol \\42} +=} noshare fresh",
    "cfa-ll-noshare-reuse": "{/Helvetica=15 C{/Symbol \\42} +=} noshare reuse",
    "stl-na-na-fresh": "STL {/Helvetica=15 +=} fresh",
    "stl-na-na-reuse": "STL {/Helvetica=15 +=} reuse",
}

timings = pd.read_csv(
    infile,
    names=['test', 'corpus', 'concatsPerReset', 'corpusItemCount', 'corpusMeanLenChars', 'concatDoneActualCount', 'execTimeActualSec'],
    dtype={'test':                  str,
           'corpus':                str,
           'concatsPerReset':       'Int64', # allows missing; https://stackoverflow.com/a/70626154
           'corpusItemCount':       np.int64,
           'corpusMeanLenChars':    np.float64,
           'concatDoneActualCount': np.int64,
           'execTimeActualSec':     np.float64},
    na_values=['xxx'],
)
# print(timings.head())


# project: parse executable and corpus names

timings[['test-slug',
     'sut-platform',
     'operation',
     'sut-cfa-level',
     'sut-cfa-sharing',
     'op-alloc']] = timings['test'].str.strip().str.split('-', expand=True)
timings['sut'] = timings[['sut-platform',
                    'sut-cfa-level',
                    'sut-cfa-sharing',
                    'op-alloc']].agg('-'.join, axis=1)

timings[['corpus-basename',
     'corpus-ext']] = timings['corpus'].str.strip().str.split('.', expand=True)
timings[['corpus-slug',
     'corpus-nstrs',
     'corpus-meanlen',
     'corpus-runid']] = timings['corpus-basename'].str.strip().str.split('-', expand=True)
timings["corpus-nstrs"] = pd.to_numeric(timings["corpus-nstrs"])
timings["corpus-meanlen"] = pd.to_numeric(timings["corpus-meanlen"])
timings["corpus-runid"] = pd.to_numeric(timings["corpus-runid"])


# project: calculate fact

timings['op-duration-s'] = timings['execTimeActualSec'] / timings['concatDoneActualCount']
timings['op-duration-ns'] = timings['op-duration-s'] * 1000 * 1000 * 1000


# Filter operation=peq

groupedOp = timings.groupby('operation')
tgtOpTimings = groupedOp.get_group('peq')


# Emit in groups

groupedSut = tgtOpTimings.groupby('sut')

for sut, sgroup in groupedSut:

    if sut in prettyFieldNames:

        sgroup_sorted = sgroup.sort_values(by='corpus-meanlen')

        print('"{header}"'.format(header=prettyFieldNames[sut]))
        text = sgroup_sorted[['corpus-meanlen', 'op-duration-ns']].to_csv(header=False, index=False, sep='\t')
        print(text)
        print()
