[2410424] | 1 | # Read thesis-append-pbv.csv
|
---|
| 2 | # Output for string-graph-peq-sharing.dat
|
---|
| 3 |
|
---|
| 4 | # Project details
|
---|
| 5 | # Filter operation=peq
|
---|
| 6 | # Split "series" goups of sut; only those in the "pretty" list
|
---|
| 7 | # Assert one row per string-length
|
---|
| 8 | # output:
|
---|
| 9 | # string-len op-duration
|
---|
| 10 | # in chunks, each headed by pertty(sut)
|
---|
| 11 |
|
---|
| 12 | import pandas as pd
|
---|
| 13 | import numpy as np
|
---|
| 14 | import os
|
---|
| 15 | import sys
|
---|
[e0350e0] | 16 | from pathlib import Path
|
---|
[2410424] | 17 |
|
---|
| 18 | sys.path.insert(0, os.path.dirname(__file__))
|
---|
| 19 | from common import *
|
---|
| 20 |
|
---|
| 21 | sizes_i_want = [50, 200] # [20, 50, 100, 200]
|
---|
[e0350e0] | 22 | defaultExpansionCfa = 0.2
|
---|
[2410424] | 23 |
|
---|
[e0350e0] | 24 | metaFilename = os.path.dirname(os.path.abspath(__file__)) + '/string-allocn-attrib-meta.dat'
|
---|
| 25 | catOrder = Path(metaFilename).read_text()
|
---|
| 26 | catOrder = str.split(catOrder)
|
---|
[2410424] | 27 |
|
---|
| 28 | cfatimings = loadParseTimingData('result-allocate-speed-cfa.csv',
|
---|
| 29 | xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'},
|
---|
| 30 | xFactNames=['topIters'], xFactDtypes={'topIters':np.int64})
|
---|
[e0350e0] | 31 | cfatimings = cfatimings.groupby('expansion').get_group(defaultExpansionCfa)
|
---|
[2410424] | 32 |
|
---|
| 33 | cfaattribs = loadParseAttribData('result-allocate-attrib-cfa.ssv')
|
---|
[e0350e0] | 34 | cfaattribs = cfaattribs.groupby('expansion').get_group(defaultExpansionCfa)
|
---|
| 35 |
|
---|
| 36 | cfasizings = loadParseSizingData('result-allocate-space-cfa.ssv', xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'})
|
---|
| 37 | cfasizings = cfasizings.groupby('expansion').get_group(defaultExpansionCfa)
|
---|
[2410424] | 38 |
|
---|
| 39 | stltimings = loadParseTimingData('result-allocate-speed-stl.csv',
|
---|
| 40 | xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'},
|
---|
| 41 | xFactNames=['topIters'], xFactDtypes={'topIters':np.int64})
|
---|
| 42 |
|
---|
| 43 | stlattribs = loadParseAttribData('result-allocate-attrib-stl.ssv')
|
---|
| 44 |
|
---|
[e0350e0] | 45 | stlsizings = loadParseSizingData('result-allocate-space-stl.ssv', xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'})
|
---|
| 46 |
|
---|
[2410424] | 47 | timings = pd.concat([cfatimings, stltimings])
|
---|
| 48 | attribs = pd.concat([cfaattribs, stlattribs])
|
---|
[e0350e0] | 49 | sizings = pd.concat([cfasizings, stlsizings])
|
---|
| 50 |
|
---|
| 51 | # print("before join", timings.shape[0], attribs.shape[0])
|
---|
| 52 | # print(timings.to_csv(header=True, index=True, sep='\t', na_rep="0"))
|
---|
| 53 | # print(attribs.to_csv(header=True, index=True, sep='\t', na_rep="0"))
|
---|
| 54 | # print(sizings.to_csv(header=True, index=True, sep='\t', na_rep="0"))
|
---|
| 55 |
|
---|
| 56 | combined = pd.merge(
|
---|
| 57 | left=timings[['sut-platform', 'corpus', 'corpus-meanlen-tgt','expansion', 'op-duration-ns']],
|
---|
| 58 | right=attribs[['sut-platform', 'corpus', 'expansion', 'category', 'fraction']],
|
---|
| 59 | on=['sut-platform', 'corpus','expansion']
|
---|
| 60 | )
|
---|
[2410424] | 61 |
|
---|
| 62 | combined = pd.merge(
|
---|
[e0350e0] | 63 | left=combined,
|
---|
| 64 | right=sizings[['sut-platform', 'corpus','expansion','hw_cur_req_mem(B)']],
|
---|
| 65 | on=['sut-platform', 'corpus','expansion']
|
---|
[2410424] | 66 | )
|
---|
| 67 |
|
---|
[e0350e0] | 68 | # print("after join", combined.shape[0])
|
---|
| 69 | # print(combined.to_csv(header=True, index=True, sep='\t', na_rep="0"))
|
---|
| 70 |
|
---|
[2410424] | 71 | combined['cat-duration-ns'] = combined['op-duration-ns'] * combined['fraction']
|
---|
[e0350e0] | 72 | combined.drop(columns=['expansion', 'op-duration-ns', 'fraction', 'corpus'], inplace=True)
|
---|
| 73 |
|
---|
| 74 | # print("before summarize", combined.shape[0])
|
---|
| 75 | # print(combined.to_csv(header=True, index=True, sep='\t', na_rep="0"))
|
---|
| 76 |
|
---|
| 77 | summary = combined.pivot_table(
|
---|
| 78 | values=['hw_cur_req_mem(B)','cat-duration-ns'],
|
---|
| 79 | index=['corpus-meanlen-tgt', 'sut-platform', 'category'],
|
---|
| 80 | aggfunc={'hw_cur_req_mem(B)':'mean','cat-duration-ns':['mean', 'min', 'max']} )
|
---|
| 81 | summary = summary.reset_index()
|
---|
| 82 | summary.columns = summary.columns.to_flat_index()
|
---|
| 83 | summary.columns = [
|
---|
| 84 | '-'.join(filter(None, col)).replace(' ', '-') # replaces space with dash if needed
|
---|
| 85 | for col in summary.columns.to_flat_index()
|
---|
| 86 | ]
|
---|
| 87 |
|
---|
| 88 | # reorder columns with memory as first value (after sut-platform, which is last key)
|
---|
| 89 | # cols = summary.columns.tolist()
|
---|
| 90 | # cols.remove("hw_cur_req_mem(B)-mean")
|
---|
| 91 | # insert_after = cols.index("sut-platform") + 1
|
---|
| 92 | # cols.insert(insert_after, "hw_cur_req_mem(B)-mean")
|
---|
| 93 | # summary = summary[cols]
|
---|
| 94 |
|
---|
| 95 | summary = summary[[
|
---|
| 96 | 'corpus-meanlen-tgt',
|
---|
| 97 | 'sut-platform',
|
---|
| 98 | 'hw_cur_req_mem(B)-mean',
|
---|
| 99 | 'category',
|
---|
| 100 | 'cat-duration-ns-mean',
|
---|
| 101 | 'cat-duration-ns-max',
|
---|
| 102 | 'cat-duration-ns-min']]
|
---|
| 103 |
|
---|
| 104 | # print("after summarize", summary.shape[0])
|
---|
| 105 | # print(summary.to_csv(header=True, index=True, sep='\t', na_rep="0"))
|
---|
| 106 |
|
---|
| 107 |
|
---|
| 108 |
|
---|
| 109 | # Ensure 'category' follows the specified order
|
---|
| 110 | summary['category'] = pd.Categorical(summary['category'], categories=catOrder, ordered=True)
|
---|
| 111 |
|
---|
| 112 | # Sort the DataFrame to prepare for cumulative sum
|
---|
| 113 | summary_sorted = summary.sort_values(by=['corpus-meanlen-tgt', 'sut-platform', 'category'])
|
---|
| 114 |
|
---|
| 115 | # Group by the keys and compute exclusive running total
|
---|
| 116 | summary_sorted['grp-prior-duration-ns'] = (
|
---|
| 117 | summary_sorted
|
---|
| 118 | .groupby(['corpus-meanlen-tgt', 'sut-platform'])['cat-duration-ns-mean']
|
---|
| 119 | .transform(lambda s: s.cumsum().shift(fill_value=0))
|
---|
| 120 | )
|
---|
| 121 |
|
---|
| 122 | summary_sorted = summary_sorted.reset_index(drop=True)
|
---|
| 123 |
|
---|
[2410424] | 124 |
|
---|
[e0350e0] | 125 | # print("after accumulation", summary_sorted.shape[0])
|
---|
| 126 | # print(summary_sorted.to_csv(header=True, index=True, sep='\t', na_rep="0"))
|
---|
[2410424] | 127 |
|
---|
| 128 |
|
---|
[e0350e0] | 129 | filtered = summary_sorted[summary_sorted['corpus-meanlen-tgt'].isin(sizes_i_want)]
|
---|
[2410424] | 130 |
|
---|
[e0350e0] | 131 | # print("after filter", filtered.shape[0])
|
---|
[2410424] | 132 |
|
---|
[e0350e0] | 133 | print(filtered.to_csv(header=True, index=False, sep='\t', na_rep="0"))
|
---|