Ignore:
Timestamp:
Jun 4, 2025, 1:43:34 PM (4 months ago)
Author:
Michael Brooks <mlbrooks@…>
Branches:
master
Children:
f858ca5
Parents:
c8bdbaf
Message:

Recent rework of string benchmarks

File:
1 edited

Legend:

Unmodified
Added
Removed
  • doc/theses/mike_brooks_MMath/plots/string-allocn-attrib.py

    rc8bdbaf re0350e0  
    1414import os
    1515import sys
     16from pathlib import Path
    1617
    1718sys.path.insert(0, os.path.dirname(__file__))
     
    1920
    2021sizes_i_want = [50, 200] # [20, 50, 100, 200]
     22defaultExpansionCfa = 0.2
    2123
    22 # assume CFA threshold only run at default value
     24metaFilename = os.path.dirname(os.path.abspath(__file__)) + '/string-allocn-attrib-meta.dat'
     25catOrder = Path(metaFilename).read_text()
     26catOrder = str.split(catOrder)
    2327
    2428cfatimings = loadParseTimingData('result-allocate-speed-cfa.csv',
    2529                xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'},
    2630                xFactNames=['topIters'], xFactDtypes={'topIters':np.int64})
     31cfatimings = cfatimings.groupby('expansion').get_group(defaultExpansionCfa)
    2732
    2833cfaattribs = loadParseAttribData('result-allocate-attrib-cfa.ssv')
     34cfaattribs = cfaattribs.groupby('expansion').get_group(defaultExpansionCfa)
     35
     36cfasizings = loadParseSizingData('result-allocate-space-cfa.ssv', xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'})
     37cfasizings = cfasizings.groupby('expansion').get_group(defaultExpansionCfa)
    2938
    3039stltimings = loadParseTimingData('result-allocate-speed-stl.csv',
     
    3443stlattribs = loadParseAttribData('result-allocate-attrib-stl.ssv')
    3544
     45stlsizings = loadParseSizingData('result-allocate-space-stl.ssv', xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'})
     46
    3647timings = pd.concat([cfatimings, stltimings])
    3748attribs = pd.concat([cfaattribs, stlattribs])
     49sizings = pd.concat([cfasizings, stlsizings])
     50
     51# print("before join", timings.shape[0], attribs.shape[0])
     52# print(timings.to_csv(header=True, index=True, sep='\t', na_rep="0"))
     53# print(attribs.to_csv(header=True, index=True, sep='\t', na_rep="0"))
     54# print(sizings.to_csv(header=True, index=True, sep='\t', na_rep="0"))
    3855
    3956combined = pd.merge(
    40     left=timings[['sut-platform', 'corpus-meanlen','expansion', 'op-duration-ns']],
    41     right=attribs[['sut-platform', 'corpus-meanlen','expansion', 'category', 'fraction']],
    42     on=['sut-platform', 'corpus-meanlen','expansion']
     57    left=timings[['sut-platform', 'corpus', 'corpus-meanlen-tgt','expansion', 'op-duration-ns']],
     58    right=attribs[['sut-platform', 'corpus', 'expansion', 'category', 'fraction']],
     59    on=['sut-platform', 'corpus','expansion']
    4360)
    4461
     62combined = pd.merge(
     63    left=combined,
     64    right=sizings[['sut-platform', 'corpus','expansion','hw_cur_req_mem(B)']],
     65    on=['sut-platform', 'corpus','expansion']
     66)
     67
     68# print("after join", combined.shape[0])
     69# print(combined.to_csv(header=True, index=True, sep='\t', na_rep="0"))
     70
    4571combined['cat-duration-ns'] = combined['op-duration-ns'] * combined['fraction']
    46 combined.drop(columns=['expansion', 'op-duration-ns', 'fraction'], inplace=True)
     72combined.drop(columns=['expansion', 'op-duration-ns', 'fraction', 'corpus'], inplace=True)
    4773
    48 pvt = combined.pivot( columns='category', values='cat-duration-ns', index=['corpus-meanlen', 'sut-platform'] )
     74# print("before summarize", combined.shape[0])
     75# print(combined.to_csv(header=True, index=True, sep='\t', na_rep="0"))
    4976
    50 desired_dcol_order = ["ctor-dtor", "gc", "malloc-free", "text-import", "harness-leaf", "other"]
    51 pvt = pvt[desired_dcol_order]
     77summary = combined.pivot_table(
     78    values=['hw_cur_req_mem(B)','cat-duration-ns'],
     79    index=['corpus-meanlen-tgt', 'sut-platform', 'category'],
     80    aggfunc={'hw_cur_req_mem(B)':'mean','cat-duration-ns':['mean', 'min', 'max']} )
     81summary = summary.reset_index()
     82summary.columns = summary.columns.to_flat_index()
     83summary.columns = [
     84    '-'.join(filter(None, col)).replace(' ', '-')  # replaces space with dash if needed
     85    for col in summary.columns.to_flat_index()
     86]
    5287
    53 filtered = pvt.loc[pvt.index.get_level_values('corpus-meanlen').isin(sizes_i_want)]
     88# reorder columns with memory as first value (after sut-platform, which is last key)
     89# cols = summary.columns.tolist()
     90# cols.remove("hw_cur_req_mem(B)-mean")
     91# insert_after = cols.index("sut-platform") + 1
     92# cols.insert(insert_after, "hw_cur_req_mem(B)-mean")
     93# summary = summary[cols]
    5494
    55 print(filtered.to_csv(header=True, index=True, sep='\t', na_rep="0"))
     95summary = summary[[
     96    'corpus-meanlen-tgt',
     97    'sut-platform',
     98    'hw_cur_req_mem(B)-mean',
     99    'category',
     100    'cat-duration-ns-mean',
     101    'cat-duration-ns-max',
     102    'cat-duration-ns-min']]
    56103
     104# print("after summarize", summary.shape[0])
     105# print(summary.to_csv(header=True, index=True, sep='\t', na_rep="0"))
     106
     107
     108
     109# Ensure 'category' follows the specified order
     110summary['category'] = pd.Categorical(summary['category'], categories=catOrder, ordered=True)
     111
     112# Sort the DataFrame to prepare for cumulative sum
     113summary_sorted = summary.sort_values(by=['corpus-meanlen-tgt', 'sut-platform',  'category'])
     114
     115# Group by the keys and compute exclusive running total
     116summary_sorted['grp-prior-duration-ns'] = (
     117    summary_sorted
     118    .groupby(['corpus-meanlen-tgt', 'sut-platform'])['cat-duration-ns-mean']
     119    .transform(lambda s: s.cumsum().shift(fill_value=0))
     120)
     121
     122summary_sorted = summary_sorted.reset_index(drop=True)
     123
     124
     125# print("after accumulation", summary_sorted.shape[0])
     126# print(summary_sorted.to_csv(header=True, index=True, sep='\t', na_rep="0"))
     127
     128
     129filtered = summary_sorted[summary_sorted['corpus-meanlen-tgt'].isin(sizes_i_want)]
     130
     131# print("after filter", filtered.shape[0])
     132
     133print(filtered.to_csv(header=True, index=False, sep='\t', na_rep="0"))
Note: See TracChangeset for help on using the changeset viewer.