1 | # Read thesis-append-pbv.csv
|
---|
2 | # Output for string-graph-peq-sharing.dat
|
---|
3 |
|
---|
4 | # Project details
|
---|
5 | # Filter operation=peq
|
---|
6 | # Split "series" goups of sut; only those in the "pretty" list
|
---|
7 | # Assert one row per string-length
|
---|
8 | # output:
|
---|
9 | # string-len op-duration
|
---|
10 | # in chunks, each headed by pertty(sut)
|
---|
11 |
|
---|
12 | import pandas as pd
|
---|
13 | import numpy as np
|
---|
14 | import os
|
---|
15 | import sys
|
---|
16 | from pathlib import Path
|
---|
17 |
|
---|
18 | sys.path.insert(0, os.path.dirname(__file__))
|
---|
19 | from common import *
|
---|
20 |
|
---|
21 | sizes_i_want = [50, 200] # [20, 50, 100, 200]
|
---|
22 | defaultExpansionCfa = 0.2
|
---|
23 |
|
---|
24 | metaFilename = os.path.dirname(os.path.abspath(__file__)) + '/string-allocn-attrib-meta.dat'
|
---|
25 | catOrder = Path(metaFilename).read_text()
|
---|
26 | catOrder = str.split(catOrder)
|
---|
27 |
|
---|
28 | cfatimings = loadParseTimingData('result-allocate-speed-cfa.csv',
|
---|
29 | xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'},
|
---|
30 | xFactNames=['topIters'], xFactDtypes={'topIters':np.int64})
|
---|
31 | cfatimings = cfatimings.groupby('expansion').get_group(defaultExpansionCfa)
|
---|
32 |
|
---|
33 | cfaattribs = loadParseAttribData('result-allocate-attrib-cfa.ssv')
|
---|
34 | cfaattribs = cfaattribs.groupby('expansion').get_group(defaultExpansionCfa)
|
---|
35 |
|
---|
36 | cfasizings = loadParseSizingData('result-allocate-space-cfa.ssv', xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'})
|
---|
37 | cfasizings = cfasizings.groupby('expansion').get_group(defaultExpansionCfa)
|
---|
38 |
|
---|
39 | stltimings = loadParseTimingData('result-allocate-speed-stl.csv',
|
---|
40 | xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'},
|
---|
41 | xFactNames=['topIters'], xFactDtypes={'topIters':np.int64})
|
---|
42 |
|
---|
43 | stlattribs = loadParseAttribData('result-allocate-attrib-stl.ssv')
|
---|
44 |
|
---|
45 | stlsizings = loadParseSizingData('result-allocate-space-stl.ssv', xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'})
|
---|
46 |
|
---|
47 | timings = pd.concat([cfatimings, stltimings])
|
---|
48 | attribs = pd.concat([cfaattribs, stlattribs])
|
---|
49 | sizings = pd.concat([cfasizings, stlsizings])
|
---|
50 |
|
---|
51 | # print("before join", timings.shape[0], attribs.shape[0])
|
---|
52 | # print(timings.to_csv(header=True, index=True, sep='\t', na_rep="0"))
|
---|
53 | # print(attribs.to_csv(header=True, index=True, sep='\t', na_rep="0"))
|
---|
54 | # print(sizings.to_csv(header=True, index=True, sep='\t', na_rep="0"))
|
---|
55 |
|
---|
56 | combined = pd.merge(
|
---|
57 | left=timings[['sut-platform', 'corpus', 'corpus-meanlen-tgt','expansion', 'op-duration-ns']],
|
---|
58 | right=attribs[['sut-platform', 'corpus', 'expansion', 'category', 'fraction']],
|
---|
59 | on=['sut-platform', 'corpus','expansion']
|
---|
60 | )
|
---|
61 |
|
---|
62 | combined = pd.merge(
|
---|
63 | left=combined,
|
---|
64 | right=sizings[['sut-platform', 'corpus','expansion','hw_cur_req_mem(B)']],
|
---|
65 | on=['sut-platform', 'corpus','expansion']
|
---|
66 | )
|
---|
67 |
|
---|
68 | # print("after join", combined.shape[0])
|
---|
69 | # print(combined.to_csv(header=True, index=True, sep='\t', na_rep="0"))
|
---|
70 |
|
---|
71 | combined['cat-duration-ns'] = combined['op-duration-ns'] * combined['fraction']
|
---|
72 | combined.drop(columns=['expansion', 'op-duration-ns', 'fraction', 'corpus'], inplace=True)
|
---|
73 |
|
---|
74 | # print("before summarize", combined.shape[0])
|
---|
75 | # print(combined.to_csv(header=True, index=True, sep='\t', na_rep="0"))
|
---|
76 |
|
---|
77 | summary = combined.pivot_table(
|
---|
78 | values=['hw_cur_req_mem(B)','cat-duration-ns'],
|
---|
79 | index=['corpus-meanlen-tgt', 'sut-platform', 'category'],
|
---|
80 | aggfunc={'hw_cur_req_mem(B)':'mean','cat-duration-ns':['mean', 'min', 'max']} )
|
---|
81 | summary = summary.reset_index()
|
---|
82 | summary.columns = summary.columns.to_flat_index()
|
---|
83 | summary.columns = [
|
---|
84 | '-'.join(filter(None, col)).replace(' ', '-') # replaces space with dash if needed
|
---|
85 | for col in summary.columns.to_flat_index()
|
---|
86 | ]
|
---|
87 |
|
---|
88 | # reorder columns with memory as first value (after sut-platform, which is last key)
|
---|
89 | # cols = summary.columns.tolist()
|
---|
90 | # cols.remove("hw_cur_req_mem(B)-mean")
|
---|
91 | # insert_after = cols.index("sut-platform") + 1
|
---|
92 | # cols.insert(insert_after, "hw_cur_req_mem(B)-mean")
|
---|
93 | # summary = summary[cols]
|
---|
94 |
|
---|
95 | summary = summary[[
|
---|
96 | 'corpus-meanlen-tgt',
|
---|
97 | 'sut-platform',
|
---|
98 | 'hw_cur_req_mem(B)-mean',
|
---|
99 | 'category',
|
---|
100 | 'cat-duration-ns-mean',
|
---|
101 | 'cat-duration-ns-max',
|
---|
102 | 'cat-duration-ns-min']]
|
---|
103 |
|
---|
104 | # print("after summarize", summary.shape[0])
|
---|
105 | # print(summary.to_csv(header=True, index=True, sep='\t', na_rep="0"))
|
---|
106 |
|
---|
107 |
|
---|
108 |
|
---|
109 | # Ensure 'category' follows the specified order
|
---|
110 | summary['category'] = pd.Categorical(summary['category'], categories=catOrder, ordered=True)
|
---|
111 |
|
---|
112 | # Sort the DataFrame to prepare for cumulative sum
|
---|
113 | summary_sorted = summary.sort_values(by=['corpus-meanlen-tgt', 'sut-platform', 'category'])
|
---|
114 |
|
---|
115 | # Group by the keys and compute exclusive running total
|
---|
116 | summary_sorted['grp-prior-duration-ns'] = (
|
---|
117 | summary_sorted
|
---|
118 | .groupby(['corpus-meanlen-tgt', 'sut-platform'])['cat-duration-ns-mean']
|
---|
119 | .transform(lambda s: s.cumsum().shift(fill_value=0))
|
---|
120 | )
|
---|
121 |
|
---|
122 | summary_sorted = summary_sorted.reset_index(drop=True)
|
---|
123 |
|
---|
124 |
|
---|
125 | # print("after accumulation", summary_sorted.shape[0])
|
---|
126 | # print(summary_sorted.to_csv(header=True, index=True, sep='\t', na_rep="0"))
|
---|
127 |
|
---|
128 |
|
---|
129 | filtered = summary_sorted[summary_sorted['corpus-meanlen-tgt'].isin(sizes_i_want)]
|
---|
130 |
|
---|
131 | # print("after filter", filtered.shape[0])
|
---|
132 |
|
---|
133 | print(filtered.to_csv(header=True, index=False, sep='\t', na_rep="0"))
|
---|