source: doc/theses/mike_brooks_MMath/plots/string-allocn-attrib.py@ e0350e0

Last change on this file since e0350e0 was e0350e0, checked in by Michael Brooks <mlbrooks@…>, 4 months ago

Recent rework of string benchmarks

  • Property mode set to 100644
File size: 4.9 KB
Line 
1# Read thesis-append-pbv.csv
2# Output for string-graph-peq-sharing.dat
3
4# Project details
5# Filter operation=peq
6# Split "series" goups of sut; only those in the "pretty" list
7# Assert one row per string-length
8# output:
9# string-len op-duration
10# in chunks, each headed by pertty(sut)
11
12import pandas as pd
13import numpy as np
14import os
15import sys
16from pathlib import Path
17
18sys.path.insert(0, os.path.dirname(__file__))
19from common import *
20
21sizes_i_want = [50, 200] # [20, 50, 100, 200]
22defaultExpansionCfa = 0.2
23
24metaFilename = os.path.dirname(os.path.abspath(__file__)) + '/string-allocn-attrib-meta.dat'
25catOrder = Path(metaFilename).read_text()
26catOrder = str.split(catOrder)
27
28cfatimings = loadParseTimingData('result-allocate-speed-cfa.csv',
29 xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'},
30 xFactNames=['topIters'], xFactDtypes={'topIters':np.int64})
31cfatimings = cfatimings.groupby('expansion').get_group(defaultExpansionCfa)
32
33cfaattribs = loadParseAttribData('result-allocate-attrib-cfa.ssv')
34cfaattribs = cfaattribs.groupby('expansion').get_group(defaultExpansionCfa)
35
36cfasizings = loadParseSizingData('result-allocate-space-cfa.ssv', xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'})
37cfasizings = cfasizings.groupby('expansion').get_group(defaultExpansionCfa)
38
39stltimings = loadParseTimingData('result-allocate-speed-stl.csv',
40 xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'},
41 xFactNames=['topIters'], xFactDtypes={'topIters':np.int64})
42
43stlattribs = loadParseAttribData('result-allocate-attrib-stl.ssv')
44
45stlsizings = loadParseSizingData('result-allocate-space-stl.ssv', xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'})
46
47timings = pd.concat([cfatimings, stltimings])
48attribs = pd.concat([cfaattribs, stlattribs])
49sizings = pd.concat([cfasizings, stlsizings])
50
51# print("before join", timings.shape[0], attribs.shape[0])
52# print(timings.to_csv(header=True, index=True, sep='\t', na_rep="0"))
53# print(attribs.to_csv(header=True, index=True, sep='\t', na_rep="0"))
54# print(sizings.to_csv(header=True, index=True, sep='\t', na_rep="0"))
55
56combined = pd.merge(
57 left=timings[['sut-platform', 'corpus', 'corpus-meanlen-tgt','expansion', 'op-duration-ns']],
58 right=attribs[['sut-platform', 'corpus', 'expansion', 'category', 'fraction']],
59 on=['sut-platform', 'corpus','expansion']
60)
61
62combined = pd.merge(
63 left=combined,
64 right=sizings[['sut-platform', 'corpus','expansion','hw_cur_req_mem(B)']],
65 on=['sut-platform', 'corpus','expansion']
66)
67
68# print("after join", combined.shape[0])
69# print(combined.to_csv(header=True, index=True, sep='\t', na_rep="0"))
70
71combined['cat-duration-ns'] = combined['op-duration-ns'] * combined['fraction']
72combined.drop(columns=['expansion', 'op-duration-ns', 'fraction', 'corpus'], inplace=True)
73
74# print("before summarize", combined.shape[0])
75# print(combined.to_csv(header=True, index=True, sep='\t', na_rep="0"))
76
77summary = combined.pivot_table(
78 values=['hw_cur_req_mem(B)','cat-duration-ns'],
79 index=['corpus-meanlen-tgt', 'sut-platform', 'category'],
80 aggfunc={'hw_cur_req_mem(B)':'mean','cat-duration-ns':['mean', 'min', 'max']} )
81summary = summary.reset_index()
82summary.columns = summary.columns.to_flat_index()
83summary.columns = [
84 '-'.join(filter(None, col)).replace(' ', '-') # replaces space with dash if needed
85 for col in summary.columns.to_flat_index()
86]
87
88# reorder columns with memory as first value (after sut-platform, which is last key)
89# cols = summary.columns.tolist()
90# cols.remove("hw_cur_req_mem(B)-mean")
91# insert_after = cols.index("sut-platform") + 1
92# cols.insert(insert_after, "hw_cur_req_mem(B)-mean")
93# summary = summary[cols]
94
95summary = summary[[
96 'corpus-meanlen-tgt',
97 'sut-platform',
98 'hw_cur_req_mem(B)-mean',
99 'category',
100 'cat-duration-ns-mean',
101 'cat-duration-ns-max',
102 'cat-duration-ns-min']]
103
104# print("after summarize", summary.shape[0])
105# print(summary.to_csv(header=True, index=True, sep='\t', na_rep="0"))
106
107
108
109# Ensure 'category' follows the specified order
110summary['category'] = pd.Categorical(summary['category'], categories=catOrder, ordered=True)
111
112# Sort the DataFrame to prepare for cumulative sum
113summary_sorted = summary.sort_values(by=['corpus-meanlen-tgt', 'sut-platform', 'category'])
114
115# Group by the keys and compute exclusive running total
116summary_sorted['grp-prior-duration-ns'] = (
117 summary_sorted
118 .groupby(['corpus-meanlen-tgt', 'sut-platform'])['cat-duration-ns-mean']
119 .transform(lambda s: s.cumsum().shift(fill_value=0))
120)
121
122summary_sorted = summary_sorted.reset_index(drop=True)
123
124
125# print("after accumulation", summary_sorted.shape[0])
126# print(summary_sorted.to_csv(header=True, index=True, sep='\t', na_rep="0"))
127
128
129filtered = summary_sorted[summary_sorted['corpus-meanlen-tgt'].isin(sizes_i_want)]
130
131# print("after filter", filtered.shape[0])
132
133print(filtered.to_csv(header=True, index=False, sep='\t', na_rep="0"))
Note: See TracBrowser for help on using the repository browser.