# Read thesis-append-pbv.csv # Output for string-graph-peq-cppemu.dat # Project details # Filter operation=peq # Split "series" goups of sut; only those in the "pretty" list # Assert one row per string-length # output: # string-len op-duration # in chunks, each headed by pertty(sut) import pandas as pd import numpy as np import os infile = os.path.dirname(os.path.abspath(__file__)) + '/../benchmarks/string/result-append-pbv.csv' prettyFieldNames = { "cfa-ll-noshare-fresh": "{/Helvetica=15 C{/Symbol \\42} +=} noshare fresh", "cfa-ll-noshare-reuse": "{/Helvetica=15 C{/Symbol \\42} +=} noshare reuse", "stl-na-na-fresh": "STL {/Helvetica=15 +=} fresh", "stl-na-na-reuse": "STL {/Helvetica=15 +=} reuse", } timings = pd.read_csv( infile, names=['test', 'corpus', 'concatsPerReset', 'corpusItemCount', 'corpusMeanLenChars', 'concatDoneActualCount', 'execTimeActualSec'], dtype={'test': str, 'corpus': str, 'concatsPerReset': 'Int64', # allows missing; https://stackoverflow.com/a/70626154 'corpusItemCount': np.int64, 'corpusMeanLenChars': np.float64, 'concatDoneActualCount': np.int64, 'execTimeActualSec': np.float64}, na_values=['xxx'], ) # print(timings.head()) # project: parse executable and corpus names timings[['test-slug', 'sut-platform', 'operation', 'sut-cfa-level', 'sut-cfa-sharing', 'op-alloc']] = timings['test'].str.strip().str.split('-', expand=True) timings['sut'] = timings[['sut-platform', 'sut-cfa-level', 'sut-cfa-sharing', 'op-alloc']].agg('-'.join, axis=1) timings[['corpus-basename', 'corpus-ext']] = timings['corpus'].str.strip().str.split('.', expand=True) timings[['corpus-slug', 'corpus-nstrs', 'corpus-meanlen', 'corpus-runid']] = timings['corpus-basename'].str.strip().str.split('-', expand=True) timings["corpus-nstrs"] = pd.to_numeric(timings["corpus-nstrs"]) timings["corpus-meanlen"] = pd.to_numeric(timings["corpus-meanlen"]) timings["corpus-runid"] = pd.to_numeric(timings["corpus-runid"]) # project: calculate fact timings['op-duration-s'] = timings['execTimeActualSec'] / timings['concatDoneActualCount'] timings['op-duration-ns'] = timings['op-duration-s'] * 1000 * 1000 * 1000 # Filter operation=peq groupedOp = timings.groupby('operation') tgtOpTimings = groupedOp.get_group('peq') # Emit in groups groupedSut = tgtOpTimings.groupby('sut') for sut, sgroup in groupedSut: if sut in prettyFieldNames: sgroup_sorted = sgroup.sort_values(by='corpus-meanlen') print('"{header}"'.format(header=prettyFieldNames[sut])) text = sgroup_sorted[['corpus-meanlen', 'op-duration-ns']].to_csv(header=False, index=False, sep='\t') print(text) print()