[f85de47] | 1 | # Read thesis-append-pbv.csv
|
---|
| 2 | # Output for string-graph-peq-cppemu.dat
|
---|
| 3 |
|
---|
| 4 | # Project details
|
---|
| 5 | # Filter operation=peq
|
---|
| 6 | # Split "series" goups of sut; only those in the "pretty" list
|
---|
| 7 | # Assert one row per string-length
|
---|
| 8 | # output:
|
---|
| 9 | # string-len op-duration
|
---|
| 10 | # in chunks, each headed by pertty(sut)
|
---|
| 11 |
|
---|
| 12 | import pandas as pd
|
---|
| 13 | import numpy as np
|
---|
| 14 | import os
|
---|
| 15 |
|
---|
| 16 | infile = os.path.dirname(os.path.abspath(__file__)) + '/../benchmarks/string/result-append-pbv.csv'
|
---|
| 17 |
|
---|
| 18 | prettyFieldNames = {
|
---|
| 19 | "cfa-ll-noshare-fresh": "{/Helvetica=15 C{/Symbol \\42} +=} noshare fresh",
|
---|
| 20 | "cfa-ll-noshare-reuse": "{/Helvetica=15 C{/Symbol \\42} +=} noshare reuse",
|
---|
| 21 | "stl-na-na-fresh": "STL {/Helvetica=15 +=} fresh",
|
---|
| 22 | "stl-na-na-reuse": "STL {/Helvetica=15 +=} reuse",
|
---|
| 23 | }
|
---|
| 24 |
|
---|
| 25 | timings = pd.read_csv(
|
---|
| 26 | infile,
|
---|
| 27 | names=['test', 'corpus', 'concatsPerReset', 'corpusItemCount', 'corpusMeanLenChars', 'concatDoneActualCount', 'execTimeActualSec'],
|
---|
| 28 | dtype={'test': str,
|
---|
| 29 | 'corpus': str,
|
---|
| 30 | 'concatsPerReset': 'Int64', # allows missing; https://stackoverflow.com/a/70626154
|
---|
| 31 | 'corpusItemCount': np.int64,
|
---|
| 32 | 'corpusMeanLenChars': np.float64,
|
---|
| 33 | 'concatDoneActualCount': np.int64,
|
---|
| 34 | 'execTimeActualSec': np.float64},
|
---|
| 35 | na_values=['xxx'],
|
---|
| 36 | )
|
---|
| 37 | # print(timings.head())
|
---|
| 38 |
|
---|
| 39 |
|
---|
| 40 | # project: parse executable and corpus names
|
---|
| 41 |
|
---|
| 42 | timings[['test-slug',
|
---|
| 43 | 'sut-platform',
|
---|
| 44 | 'operation',
|
---|
| 45 | 'sut-cfa-level',
|
---|
| 46 | 'sut-cfa-sharing',
|
---|
| 47 | 'op-alloc']] = timings['test'].str.strip().str.split('-', expand=True)
|
---|
| 48 | timings['sut'] = timings[['sut-platform',
|
---|
| 49 | 'sut-cfa-level',
|
---|
| 50 | 'sut-cfa-sharing',
|
---|
| 51 | 'op-alloc']].agg('-'.join, axis=1)
|
---|
| 52 |
|
---|
| 53 | timings[['corpus-basename',
|
---|
| 54 | 'corpus-ext']] = timings['corpus'].str.strip().str.split('.', expand=True)
|
---|
| 55 | timings[['corpus-slug',
|
---|
| 56 | 'corpus-nstrs',
|
---|
| 57 | 'corpus-meanlen',
|
---|
| 58 | 'corpus-runid']] = timings['corpus-basename'].str.strip().str.split('-', expand=True)
|
---|
| 59 | timings["corpus-nstrs"] = pd.to_numeric(timings["corpus-nstrs"])
|
---|
| 60 | timings["corpus-meanlen"] = pd.to_numeric(timings["corpus-meanlen"])
|
---|
| 61 | timings["corpus-runid"] = pd.to_numeric(timings["corpus-runid"])
|
---|
| 62 |
|
---|
| 63 |
|
---|
| 64 | # project: calculate fact
|
---|
| 65 |
|
---|
| 66 | timings['op-duration-s'] = timings['execTimeActualSec'] / timings['concatDoneActualCount']
|
---|
| 67 | timings['op-duration-ns'] = timings['op-duration-s'] * 1000 * 1000 * 1000
|
---|
| 68 |
|
---|
| 69 |
|
---|
| 70 | # Filter operation=peq
|
---|
| 71 |
|
---|
| 72 | groupedOp = timings.groupby('operation')
|
---|
| 73 | tgtOpTimings = groupedOp.get_group('peq')
|
---|
| 74 |
|
---|
| 75 |
|
---|
| 76 | # Emit in groups
|
---|
| 77 |
|
---|
| 78 | groupedSut = tgtOpTimings.groupby('sut')
|
---|
| 79 |
|
---|
| 80 | for sut, sgroup in groupedSut:
|
---|
| 81 |
|
---|
| 82 | if sut in prettyFieldNames:
|
---|
| 83 |
|
---|
| 84 | sgroup_sorted = sgroup.sort_values(by='corpus-meanlen')
|
---|
| 85 |
|
---|
| 86 | print('"{header}"'.format(header=prettyFieldNames[sut]))
|
---|
| 87 | text = sgroup_sorted[['corpus-meanlen', 'op-duration-ns']].to_csv(header=False, index=False, sep='\t')
|
---|
| 88 | print(text)
|
---|
| 89 | print()
|
---|