1 | import pandas as pd
|
---|
2 | import numpy as np
|
---|
3 | import sys
|
---|
4 | import os
|
---|
5 |
|
---|
6 | def parseTestCorpus(dt):
|
---|
7 | dt[['test-slug',
|
---|
8 | 'sut-platform',
|
---|
9 | 'operation',
|
---|
10 | 'sut-cfa-level',
|
---|
11 | 'sut-cfa-sharing',
|
---|
12 | 'op-alloc']] = dt['test'].str.strip().str.split('-', expand=True)
|
---|
13 | dt['sut'] = dt[['sut-platform',
|
---|
14 | 'sut-cfa-level',
|
---|
15 | 'sut-cfa-sharing',
|
---|
16 | 'op-alloc']].agg('-'.join, axis=1)
|
---|
17 |
|
---|
18 | dt[['corpus-basename',
|
---|
19 | 'corpus-ext']] = dt['corpus'].str.strip().str.rsplit('.', n=1, expand=True)
|
---|
20 | dt[['corpus-slug',
|
---|
21 | 'corpus-nstrs-tgt',
|
---|
22 | 'corpus-meanlen-tgt',
|
---|
23 | 'corpus-runid']] = dt['corpus-basename'].str.strip().str.split('-', expand=True)
|
---|
24 | dt["corpus-nstrs-tgt"] = pd.to_numeric(dt["corpus-nstrs-tgt"])
|
---|
25 | dt["corpus-meanlen-tgt"] = pd.to_numeric(dt["corpus-meanlen-tgt"])
|
---|
26 | dt[['corpus-relscale',
|
---|
27 | 'corpus-seed',
|
---|
28 | 'corpus-offset-instr']] = dt['corpus-runid'].str.strip().str.split('+', expand=True)
|
---|
29 | dt["corpus-relscale"] = pd.to_numeric(dt["corpus-relscale"])
|
---|
30 |
|
---|
31 | def loadParseTimingData( infileLocal, xClasNames=[], xClasDtypes={}, xFactNames=[], xFactDtypes={} ):
|
---|
32 |
|
---|
33 | infile = os.path.dirname(os.path.abspath(__file__)) + '/../benchmarks/string/' + infileLocal
|
---|
34 |
|
---|
35 | timings = pd.read_csv(
|
---|
36 | infile,
|
---|
37 | names=['test', 'corpus'] + xClasNames + [ 'concatsPerReset', 'corpusItemCountAct', 'corpusMeanLenCharsAct', 'concatDoneActualCount', 'execTimeActualSec'] + xFactNames,
|
---|
38 | dtype={**xClasDtypes, **xFactDtypes, **{
|
---|
39 | 'test': str,
|
---|
40 | 'corpus': str,
|
---|
41 | 'concatsPerReset': 'Int64', # allows missing; https://stackoverflow.com/a/70626154
|
---|
42 | 'corpusItemCountAct': np.int64,
|
---|
43 | 'corpusMeanLenCharsAct': np.float64,
|
---|
44 | 'concatDoneActualCount': np.int64,
|
---|
45 | 'execTimeActualSec': np.float64,
|
---|
46 | 'Q': np.int64}},
|
---|
47 |
|
---|
48 | na_values=['xxx'],
|
---|
49 | )
|
---|
50 | # print(timings.head())
|
---|
51 |
|
---|
52 |
|
---|
53 | # project: parse executable and corpus names
|
---|
54 |
|
---|
55 | parseTestCorpus(timings)
|
---|
56 |
|
---|
57 | # project: calculate fact
|
---|
58 |
|
---|
59 | timings['op-duration-s'] = timings['execTimeActualSec'] / timings['concatDoneActualCount']
|
---|
60 | timings['op-duration-ns'] = timings['op-duration-s'] * 1000 * 1000 * 1000
|
---|
61 |
|
---|
62 | return timings
|
---|
63 |
|
---|
64 | def loadParseSizingData( infileLocal, xClasNames=[], xClasDtypes={}, xFactNames=[], xFactDtypes={} ):
|
---|
65 |
|
---|
66 | infile = os.path.dirname(os.path.abspath(__file__)) + '/../benchmarks/string/' + infileLocal
|
---|
67 |
|
---|
68 | sizings = pd.read_csv(
|
---|
69 | infile,
|
---|
70 | sep=' ',
|
---|
71 | names=['test', 'corpus'] + xClasNames + ['ppid', 'pid', 'malloc_count', 'free_count',
|
---|
72 | 'calloc_count', 'realloc_count',
|
---|
73 | 'requsted_mem(B)', 'current_req_mem(B)', 'hw_cur_req_mem(B)', 'text', 'heap', 'mmap_so', 'mmap',
|
---|
74 | 'stack', 'vvar', 'vdso', 'vsyscall', 'unfigured', 'total_dynamic',
|
---|
75 | 'epoch_timestamp(ms)'] + xFactNames,
|
---|
76 | dtype={**xClasDtypes, **xFactDtypes, **{
|
---|
77 | 'test': str,
|
---|
78 | 'corpus': str,
|
---|
79 | 'ppid': np.int64, 'pid': np.int64, 'malloc_count': np.int64, 'free_count': np.int64,
|
---|
80 | 'calloc_count': np.int64, 'realloc_count': np.int64,
|
---|
81 | 'requsted_mem(B)': np.int64, 'current_req_mem(B)': np.int64,
|
---|
82 | 'hw_cur_req_mem(B)': np.int64, 'text': np.int64, 'heap': np.int64,
|
---|
83 | 'mmap_so': np.int64, 'mmap': np.int64,
|
---|
84 | 'stack': np.int64, 'vvar': np.int64, 'vdso': np.int64, 'vsyscall': np.int64, 'unfigured': np.int64, 'total_dynamic': np.int64,
|
---|
85 | 'epoch_timestamp(ms)': np.int64}}
|
---|
86 | )
|
---|
87 |
|
---|
88 | parseTestCorpus(sizings)
|
---|
89 |
|
---|
90 | return sizings
|
---|
91 |
|
---|
92 |
|
---|
93 | def loadParseAttribData( infileLocal ):
|
---|
94 |
|
---|
95 | infile = os.path.dirname(os.path.abspath(__file__)) + '/../benchmarks/string/' + infileLocal
|
---|
96 |
|
---|
97 | attribs = pd.read_csv(
|
---|
98 | infile,
|
---|
99 | sep=' ',
|
---|
100 | names=[
|
---|
101 | "test", "corpus", "expansion", "category", "samples_in_category", "total_samples",
|
---|
102 | "fraction", "sources"],
|
---|
103 | dtype={
|
---|
104 | "test": str,
|
---|
105 | "corpus": str,
|
---|
106 | "expansion": np.float64,
|
---|
107 | "category": str,
|
---|
108 | "samples_in_category": np.int64,
|
---|
109 | "total_samples": np.int64,
|
---|
110 | "fraction": np.float64,
|
---|
111 | "sources": str}
|
---|
112 | )
|
---|
113 |
|
---|
114 | parseTestCorpus(attribs)
|
---|
115 |
|
---|
116 | return attribs
|
---|