Ignore:
Timestamp:
Apr 10, 2026, 5:15:34 PM (3 weeks ago)
Author:
Michael Brooks <mlbrooks@…>
Branches:
master
Children:
d1ccc57
Parents:
17f2a7f4
Message:

refactor list perf analysis with histogram view reusable

Location:
doc/theses/mike_brooks_MMath/plots
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • doc/theses/mike_brooks_MMath/plots/ListCommon.py

    r17f2a7f4 r806534c  
    11import pandas as pd
    22import numpy as np
    3 import sys
     3import math
    44import os
    55from subprocess import Popen, PIPE
     
    358358def gMeanNoOutlr(range):
    359359    return ( range.prod() / range.min() / range.max() ) ** ( 1 / ( range.count() - 2 ) )
     360
     361
     362# The range from 0.9759 to 1.0247 (which is 1.05 x wide) has 1.0 in its centre.
     363# This is the bucket with key 0.
     364# Logs of values in this bucket go from -0.5 to +0.5.
     365# Rounding a log value to the nearest integer gives the key.
     366# Exponentiating a key directly gives the centre of its bucket.
     367# Exponentiating a key less 0.5 gives the bottom of its bucket.
     368# Gnuplot expects the latter.
     369
     370bucketMin = 0.25
     371bucketMax = 4.0
     372bucketGrain = 1.05
     373bktKeyLo = math.floor( math.log(bucketMin, bucketGrain) )
     374bktKeyHi = math.ceil( math.log(bucketMax, bucketGrain) )
     375
     376def bktKeyOfVal( relDur ):
     377    distance = math.log(relDur, bucketGrain)
     378    key = round( distance )
     379    return key
     380
     381def bktIxOfVal( relDur ):
     382    return bktKeyToIx( bktKeyOfVal( relDur ) )
     383
     384def botValOfBucketK( key ):
     385    return bucketGrain ** ( key - 0.5 )
     386
     387def topValOfBucketBotVal( botVal ):
     388    return bucketGrain * botVal
     389
     390def bktKeyToIx( key ):
     391    return key - bktKeyLo
     392
     393def bktIxToKey( ix ):
     394    return ix + bktKeyLo
     395
     396def botOfBucketOfVal( relDur ):
     397    return botValOfBucketK( bktKeyOfVal( relDur ) )
     398
     399buckets = [ botValOfBucketK(key) for key in range(bktKeyLo, bktKeyHi) ]
     400
     401# printSingleDetail
     402def printHistos(*,
     403    tgtMovement = 'all',
     404    tgtPolarity = 'all',
     405    tgtAccessor = 'all',
     406    tgtInterleave = 0.0,
     407    marginalizeOn=['fx'] ):
     408
     409    # watch out for filtering too early here; need everything sticking around until baselines are applies
     410    # ie, maybe I should get rid of all the tgt parms at the pre-benchmark layers
     411    timings = getSingleResults(
     412        tgtMovement = tgtMovement,
     413        tgtPolarity = tgtPolarity,
     414        tgtAccessor = tgtAccessor,
     415        tgtInterleave = tgtInterleave)
     416    timings = getJustCanon( timings,
     417                  fxInc = ['cfa-cfa', 'lq-tailq', 'upp-upp', 'lq-list'],
     418                  szInc = ['SM', 'ML'],
     419                  sExcl = [1] )
     420
     421
     422#    annotateBaselines(timings)
     423
     424
     425    options = timings.groupby(explanations)
     426
     427    aggregated = options.agg(
     428        mean_op_dur_ns = ('mean_op_dur_ns', gMeanNoOutlr)
     429    ).reset_index()
     430
     431
     432    annotateBaseline(aggregated, marginalizeOn)
     433#    annotateCommonBaselines(aggregated)
     434
     435
     436    # if examining "why CFA slow" need both
     437    # - getVariousCfa inplace of getJust Canon
     438    # - do annotate-then-filter because baseline needs to stay cfa-tailq-upp
     439    # (filter-then-annotate is fine for general cases (where all three canons are included) and good for build time)
     440
     441
     442    c_measure = c('OpDurRel', marginalizeOn)
     443    # options = timings.groupby(explanations)
     444
     445    # aggregated = options.agg(
     446    #     **{measure:(measure,gMeanNoOutlr)}
     447    # ).reset_index()
     448
     449    c_measureBkt = 'BUCKET_' + c_measure
     450    aggregated[ c_measureBkt ] = aggregated[c_measure].apply( botOfBucketOfVal )
     451
     452    marggrp = aggregated.groupby(marginalizeOn)
     453
     454
     455    # print(f'measure is {measure}')
     456    # print()
     457    # print()
     458
     459    for mkey, mgroup in marggrp:
     460#       print(mgroup, file=sys.stderr)
     461
     462        histo_raw = mgroup[ c_measureBkt ].value_counts()
     463        for b in buckets:
     464            if b not in histo_raw.keys():
     465#                print( f"{b} := 0", file=sys.stderr )
     466                histo_raw[b] = 0
     467        histo_raw = histo_raw.sort_index()
     468
     469        histo = histo_raw.rename("count").reset_index()
     470        histo = histo.rename(columns={c_measureBkt: "y_lo"})
     471        y_lo_col_loc = histo.columns.get_loc("y_lo")
     472        histo.insert(y_lo_col_loc + 1, "y_hi", histo["y_lo"].apply(topValOfBucketBotVal))
     473
     474        header = str.join(', ', mkey)
     475        print(f'"{header}"')
     476        text = histo.to_csv(header=False, index=False, sep='\t')
     477        print(text)
     478        print()
     479        print()
     480
     481        # print(f'"{header}" FULL')
     482        # text = group.to_csv(header=False, index=True, sep='\t')
     483        # print(text)
     484        # print()
     485        # print()
     486
     487    # print(f'"RAW"')
     488    # text = timings.to_csv(header=False, index=True, sep='\t')
     489    # print(text)
  • doc/theses/mike_brooks_MMath/plots/list-mchn-szz.py

    r17f2a7f4 r806534c  
    33import os
    44import sys
    5 import math
    65
    76sys.path.insert(0, os.path.dirname(__file__))
    87from ListCommon import *
    9 
    10 # The range from 0.9759 to 1.0247 (which is 1.05 x wide) has 1.0 in its centre.
    11 # This is the bucket with key 0.
    12 # Logs of values in this bucket go from -0.5 to +0.5.
    13 # Rounding a log value to the nearest integer gives the key.
    14 # Exponentiating a key directly gives the centre of its bucket.
    15 # Exponentiating a key less 0.5 gives the bottom of its bucket.
    16 # Gnuplot expects the latter.
    17 
    18 bucketMin = 0.25
    19 bucketMax = 4.0
    20 bucketGrain = 1.05
    21 bktKeyLo = math.floor( math.log(bucketMin, bucketGrain) )
    22 bktKeyHi = math.ceil( math.log(bucketMax, bucketGrain) )
    23 
    24 def bktKeyOfVal( relDur ):
    25     distance = math.log(relDur, bucketGrain)
    26     key = round( distance )
    27     return key
    28 
    29 def bktIxOfVal( relDur ):
    30     return bktKeyToIx( bktKeyOfVal( relDur ) )
    31 
    32 def botValOfBucketK( key ):
    33     return bucketGrain ** ( key - 0.5 )
    34 
    35 def topValOfBucketBotVal( botVal ):
    36     return bucketGrain * botVal
    37 
    38 def bktKeyToIx( key ):
    39     return key - bktKeyLo
    40 
    41 def bktIxToKey( ix ):
    42     return ix + bktKeyLo
    43 
    44 def botOfBucketOfVal( relDur ):
    45     return botValOfBucketK( bktKeyOfVal( relDur ) )
    46 
    47 buckets = [ botValOfBucketK(key) for key in range(bktKeyLo, bktKeyHi) ]
    48 
    49 # printSingleDetail
    50 def printHistos(*,
    51     tgtMovement = 'all',
    52     tgtPolarity = 'all',
    53     tgtAccessor = 'all',
    54     tgtInterleave = 0.0,
    55     marginalizeOn=['fx'] ):
    56 
    57     # watch out for filtering too early here; need everything sticking around until baselines are applies
    58     # ie, maybe I should get rid of all the tgt parms at the pre-benchmark layers
    59     timings = getSingleResults(
    60         tgtMovement = tgtMovement,
    61         tgtPolarity = tgtPolarity,
    62         tgtAccessor = tgtAccessor,
    63         tgtInterleave = tgtInterleave)
    64     timings = getJustCanon( timings,
    65                   fxInc = ['cfa-cfa', 'lq-tailq', 'upp-upp', 'lq-list'],
    66                   szInc = ['SM', 'ML'],
    67                   sExcl = [1] )
    68 
    69 
    70 #    annotateBaselines(timings)
    71 
    72 
    73     options = timings.groupby(explanations)
    74 
    75     aggregated = options.agg(
    76         mean_op_dur_ns = ('mean_op_dur_ns', gMeanNoOutlr)
    77     ).reset_index()
    78 
    79 
    80     annotateBaseline(aggregated, marginalizeOn)
    81 #    annotateCommonBaselines(aggregated)
    82 
    83 
    84     # if examining "why CFA slow" need both
    85     # - getVariousCfa inplace of getJust Canon
    86     # - do annotate-then-filter because baseline needs to stay cfa-tailq-upp
    87     # (filter-then-annotate is fine for general cases (where all three canons are included) and good for build time)
    88 
    89 
    90     c_measure = c('OpDurRel', marginalizeOn)
    91     # options = timings.groupby(explanations)
    92 
    93     # aggregated = options.agg(
    94     #     **{measure:(measure,gMeanNoOutlr)}
    95     # ).reset_index()
    96 
    97     c_measureBkt = 'BUCKET_' + c_measure
    98     aggregated[ c_measureBkt ] = aggregated[c_measure].apply( botOfBucketOfVal )
    99 
    100     marggrp = aggregated.groupby(marginalizeOn)
    101 
    102 
    103     # print(f'measure is {measure}')
    104     # print()
    105     # print()
    106 
    107     for mkey, mgroup in marggrp:
    108 #       print(mgroup, file=sys.stderr)
    109 
    110         histo_raw = mgroup[ c_measureBkt ].value_counts()
    111         for b in buckets:
    112             if b not in histo_raw.keys():
    113 #                print( f"{b} := 0", file=sys.stderr )
    114                 histo_raw[b] = 0
    115         histo_raw = histo_raw.sort_index()
    116 
    117         histo = histo_raw.rename("count").reset_index()
    118         histo = histo.rename(columns={c_measureBkt: "y_lo"})
    119         y_lo_col_loc = histo.columns.get_loc("y_lo")
    120         histo.insert(y_lo_col_loc + 1, "y_hi", histo["y_lo"].apply(topValOfBucketBotVal))
    121 
    122         header = str.join(', ', mkey)
    123         print(f'"{header}"')
    124         text = histo.to_csv(header=False, index=False, sep='\t')
    125         print(text)
    126         print()
    127         print()
    128 
    129         # print(f'"{header}" FULL')
    130         # text = group.to_csv(header=False, index=True, sep='\t')
    131         # print(text)
    132         # print()
    133         # print()
    134 
    135     # print(f'"RAW"')
    136     # text = timings.to_csv(header=False, index=True, sep='\t')
    137     # print(text)
    138    
    1398
    1409printHistos(
     
    14312    tgtAccessor = 'all',
    14413    marginalizeOn=['machine', 'SizeZone'] )
    145 
Note: See TracChangeset for help on using the changeset viewer.