# Read thesis-append-pbv.csv # Output for string-graph-peq-sharing.dat # Project details # Filter operation=peq # Split "series" goups of sut; only those in the "pretty" list # Assert one row per string-length # output: # string-len op-duration # in chunks, each headed by pertty(sut) import pandas as pd import numpy as np import os import sys from pathlib import Path sys.path.insert(0, os.path.dirname(__file__)) from common import * sizes_i_want = [50, 200] # [20, 50, 100, 200] defaultExpansionCfa = 0.2 metaFilename = os.path.dirname(os.path.abspath(__file__)) + '/string-allocn-attrib-meta.dat' catOrder = Path(metaFilename).read_text() catOrder = str.split(catOrder) cfatimings = loadParseTimingData('result-allocate-speed-cfa.csv', xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'}, xFactNames=['topIters'], xFactDtypes={'topIters':np.int64}) cfatimings = cfatimings.groupby('expansion').get_group(defaultExpansionCfa) cfaattribs = loadParseAttribData('result-allocate-attrib-cfa.ssv') cfaattribs = cfaattribs.groupby('expansion').get_group(defaultExpansionCfa) cfasizings = loadParseSizingData('result-allocate-space-cfa.ssv', xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'}) cfasizings = cfasizings.groupby('expansion').get_group(defaultExpansionCfa) stltimings = loadParseTimingData('result-allocate-speed-stl.csv', xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'}, xFactNames=['topIters'], xFactDtypes={'topIters':np.int64}) stlattribs = loadParseAttribData('result-allocate-attrib-stl.ssv') stlsizings = loadParseSizingData('result-allocate-space-stl.ssv', xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'}) timings = pd.concat([cfatimings, stltimings]) attribs = pd.concat([cfaattribs, stlattribs]) sizings = pd.concat([cfasizings, stlsizings]) # print("before join", timings.shape[0], attribs.shape[0]) # print(timings.to_csv(header=True, index=True, sep='\t', na_rep="0")) # print(attribs.to_csv(header=True, index=True, sep='\t', na_rep="0")) # print(sizings.to_csv(header=True, index=True, sep='\t', na_rep="0")) combined = pd.merge( left=timings[['sut-platform', 'corpus', 'corpus-meanlen-tgt','expansion', 'op-duration-ns']], right=attribs[['sut-platform', 'corpus', 'expansion', 'category', 'fraction']], on=['sut-platform', 'corpus','expansion'] ) combined = pd.merge( left=combined, right=sizings[['sut-platform', 'corpus','expansion','hw_cur_req_mem(B)']], on=['sut-platform', 'corpus','expansion'] ) # print("after join", combined.shape[0]) # print(combined.to_csv(header=True, index=True, sep='\t', na_rep="0")) combined['cat-duration-ns'] = combined['op-duration-ns'] * combined['fraction'] combined.drop(columns=['expansion', 'op-duration-ns', 'fraction', 'corpus'], inplace=True) # print("before summarize", combined.shape[0]) # print(combined.to_csv(header=True, index=True, sep='\t', na_rep="0")) summary = combined.pivot_table( values=['hw_cur_req_mem(B)','cat-duration-ns'], index=['corpus-meanlen-tgt', 'sut-platform', 'category'], aggfunc={'hw_cur_req_mem(B)':'mean','cat-duration-ns':['mean', 'min', 'max']} ) summary = summary.reset_index() summary.columns = summary.columns.to_flat_index() summary.columns = [ '-'.join(filter(None, col)).replace(' ', '-') # replaces space with dash if needed for col in summary.columns.to_flat_index() ] # reorder columns with memory as first value (after sut-platform, which is last key) # cols = summary.columns.tolist() # cols.remove("hw_cur_req_mem(B)-mean") # insert_after = cols.index("sut-platform") + 1 # cols.insert(insert_after, "hw_cur_req_mem(B)-mean") # summary = summary[cols] summary = summary[[ 'corpus-meanlen-tgt', 'sut-platform', 'hw_cur_req_mem(B)-mean', 'category', 'cat-duration-ns-mean', 'cat-duration-ns-max', 'cat-duration-ns-min']] # print("after summarize", summary.shape[0]) # print(summary.to_csv(header=True, index=True, sep='\t', na_rep="0")) # Ensure 'category' follows the specified order summary['category'] = pd.Categorical(summary['category'], categories=catOrder, ordered=True) # Sort the DataFrame to prepare for cumulative sum summary_sorted = summary.sort_values(by=['corpus-meanlen-tgt', 'sut-platform', 'category']) # Group by the keys and compute exclusive running total summary_sorted['grp-prior-duration-ns'] = ( summary_sorted .groupby(['corpus-meanlen-tgt', 'sut-platform'])['cat-duration-ns-mean'] .transform(lambda s: s.cumsum().shift(fill_value=0)) ) summary_sorted = summary_sorted.reset_index(drop=True) # print("after accumulation", summary_sorted.shape[0]) # print(summary_sorted.to_csv(header=True, index=True, sep='\t', na_rep="0")) filtered = summary_sorted[summary_sorted['corpus-meanlen-tgt'].isin(sizes_i_want)] # print("after filter", filtered.shape[0]) print(filtered.to_csv(header=True, index=False, sep='\t', na_rep="0"))