| 1 | # Read thesis-append-pbv.csv
|
|---|
| 2 | # Output for string-graph-peq-cppemu.dat
|
|---|
| 3 |
|
|---|
| 4 | # Project details
|
|---|
| 5 | # Filter operation=peq
|
|---|
| 6 | # Split "series" goups of sut; only those in the "pretty" list
|
|---|
| 7 | # Assert one row per string-length
|
|---|
| 8 | # output:
|
|---|
| 9 | # string-len op-duration
|
|---|
| 10 | # in chunks, each headed by pertty(sut)
|
|---|
| 11 |
|
|---|
| 12 | import pandas as pd
|
|---|
| 13 | import numpy as np
|
|---|
| 14 | import os
|
|---|
| 15 |
|
|---|
| 16 | infile = os.path.dirname(os.path.abspath(__file__)) + '/../benchmarks/string/result-append-pbv.csv'
|
|---|
| 17 |
|
|---|
| 18 | prettyFieldNames = {
|
|---|
| 19 | "cfa-ll-noshare-fresh": "{/Helvetica=15 C{/Symbol \\42} +=} noshare fresh",
|
|---|
| 20 | "cfa-ll-noshare-reuse": "{/Helvetica=15 C{/Symbol \\42} +=} noshare reuse",
|
|---|
| 21 | "stl-na-na-fresh": "STL {/Helvetica=15 +=} fresh",
|
|---|
| 22 | "stl-na-na-reuse": "STL {/Helvetica=15 +=} reuse",
|
|---|
| 23 | }
|
|---|
| 24 |
|
|---|
| 25 | timings = pd.read_csv(
|
|---|
| 26 | infile,
|
|---|
| 27 | names=['test', 'corpus', 'concatsPerReset', 'corpusItemCount', 'corpusMeanLenChars', 'concatDoneActualCount', 'execTimeActualSec'],
|
|---|
| 28 | dtype={'test': str,
|
|---|
| 29 | 'corpus': str,
|
|---|
| 30 | 'concatsPerReset': 'Int64', # allows missing; https://stackoverflow.com/a/70626154
|
|---|
| 31 | 'corpusItemCount': np.int64,
|
|---|
| 32 | 'corpusMeanLenChars': np.float64,
|
|---|
| 33 | 'concatDoneActualCount': np.int64,
|
|---|
| 34 | 'execTimeActualSec': np.float64},
|
|---|
| 35 | na_values=['xxx'],
|
|---|
| 36 | )
|
|---|
| 37 | # print(timings.head())
|
|---|
| 38 |
|
|---|
| 39 |
|
|---|
| 40 | # project: parse executable and corpus names
|
|---|
| 41 |
|
|---|
| 42 | timings[['test-slug',
|
|---|
| 43 | 'sut-platform',
|
|---|
| 44 | 'operation',
|
|---|
| 45 | 'sut-cfa-level',
|
|---|
| 46 | 'sut-cfa-sharing',
|
|---|
| 47 | 'op-alloc']] = timings['test'].str.strip().str.split('-', expand=True)
|
|---|
| 48 | timings['sut'] = timings[['sut-platform',
|
|---|
| 49 | 'sut-cfa-level',
|
|---|
| 50 | 'sut-cfa-sharing',
|
|---|
| 51 | 'op-alloc']].agg('-'.join, axis=1)
|
|---|
| 52 |
|
|---|
| 53 | timings[['corpus-basename',
|
|---|
| 54 | 'corpus-ext']] = timings['corpus'].str.strip().str.split('.', expand=True)
|
|---|
| 55 | timings[['corpus-slug',
|
|---|
| 56 | 'corpus-nstrs',
|
|---|
| 57 | 'corpus-meanlen',
|
|---|
| 58 | 'corpus-runid']] = timings['corpus-basename'].str.strip().str.split('-', expand=True)
|
|---|
| 59 | timings["corpus-nstrs"] = pd.to_numeric(timings["corpus-nstrs"])
|
|---|
| 60 | timings["corpus-meanlen"] = pd.to_numeric(timings["corpus-meanlen"])
|
|---|
| 61 | timings["corpus-runid"] = pd.to_numeric(timings["corpus-runid"])
|
|---|
| 62 |
|
|---|
| 63 |
|
|---|
| 64 | # project: calculate fact
|
|---|
| 65 |
|
|---|
| 66 | timings['op-duration-s'] = timings['execTimeActualSec'] / timings['concatDoneActualCount']
|
|---|
| 67 | timings['op-duration-ns'] = timings['op-duration-s'] * 1000 * 1000 * 1000
|
|---|
| 68 |
|
|---|
| 69 |
|
|---|
| 70 | # Filter operation=peq
|
|---|
| 71 |
|
|---|
| 72 | groupedOp = timings.groupby('operation')
|
|---|
| 73 | tgtOpTimings = groupedOp.get_group('peq')
|
|---|
| 74 |
|
|---|
| 75 |
|
|---|
| 76 | # Emit in groups
|
|---|
| 77 |
|
|---|
| 78 | groupedSut = tgtOpTimings.groupby('sut')
|
|---|
| 79 |
|
|---|
| 80 | for sut, sgroup in groupedSut:
|
|---|
| 81 |
|
|---|
| 82 | if sut in prettyFieldNames:
|
|---|
| 83 |
|
|---|
| 84 | sgroup_sorted = sgroup.sort_values(by='corpus-meanlen')
|
|---|
| 85 |
|
|---|
| 86 | print('"{header}"'.format(header=prettyFieldNames[sut]))
|
|---|
| 87 | text = sgroup_sorted[['corpus-meanlen', 'op-duration-ns']].to_csv(header=False, index=False, sep='\t')
|
|---|
| 88 | print(text)
|
|---|
| 89 | print()
|
|---|