Changeset d1ccc57


Ignore:
Timestamp:
Apr 10, 2026, 5:16:25 PM (26 hours ago)
Author:
Michael Brooks <mlbrooks@…>
Branches:
master
Children:
e2e927e
Parents:
806534c
Message:

adjust list plot analysis making drilling independent from marginalizing/conditioning

fix physical-effect histograms (by providing right arguments on above knobs) to be completely unconditional (which they always claimed to be). difference is noticeable but not story-affecting.

Location:
doc/theses/mike_brooks_MMath/plots
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • doc/theses/mike_brooks_MMath/plots/ListCommon.py

    r806534c rd1ccc57  
    136136    #   print( "marginalizing on", marginalizeOn, "conditioning on", conditionOn, file=sys.stderr )
    137137
    138         stats = canonSrc.groupby(conditionOn)['mean_op_dur_ns'].agg(**{
    139             c_tgtPeers: 'count',
    140             c_tgtBl: gmean
    141         })
    142         group_lookup = timings.set_index(conditionOn).index
    143         timings[c_tgtPeers] = stats[c_tgtPeers].reindex(group_lookup).values
    144         timings[c_tgtBl] = stats[c_tgtBl].reindex(group_lookup).values
     138        if conditionOn:
     139            stats = canonSrc.groupby(conditionOn)['mean_op_dur_ns'].agg(**{
     140                c_tgtPeers: 'count',
     141                c_tgtBl: gmean
     142            })
     143            group_lookup = timings.set_index(conditionOn).index
     144            timings[c_tgtPeers] = stats[c_tgtPeers].reindex(group_lookup).values
     145            timings[c_tgtBl] = stats[c_tgtBl].reindex(group_lookup).values
     146        else:
     147            stats = canonSrc.groupby((lambda _: 0))['mean_op_dur_ns'].agg(**{
     148                c_tgtPeers: 'count',
     149                c_tgtBl: gmean
     150            })
     151            # Extract the single row
     152            row = stats.iloc[0]
     153            # Broadcast to all rows
     154            timings[c_tgtPeers] = row[c_tgtPeers]
     155            timings[c_tgtBl] = row[c_tgtBl]
     156
    145157
    146158        # everywhere := itself / [preferred-subset derived]
     
    405417    tgtAccessor = 'all',
    406418    tgtInterleave = 0.0,
    407     marginalizeOn=['fx'] ):
     419    drillOn = ['fx'],
     420    marginalizeOn = None ):  # None means match drill-on
     421
     422    if marginalizeOn == None:
     423        marginalizeOn = drillOn
    408424
    409425    # watch out for filtering too early here; need everything sticking around until baselines are applies
     
    450466    aggregated[ c_measureBkt ] = aggregated[c_measure].apply( botOfBucketOfVal )
    451467
    452     marggrp = aggregated.groupby(marginalizeOn)
     468    drillgrp = aggregated.groupby(drillOn)
    453469
    454470
     
    457473    # print()
    458474
    459     for mkey, mgroup in marggrp:
     475    for dkey, dgroup in drillgrp:
    460476#       print(mgroup, file=sys.stderr)
    461477
    462         histo_raw = mgroup[ c_measureBkt ].value_counts()
     478        histo_raw = dgroup[ c_measureBkt ].value_counts()
    463479        for b in buckets:
    464480            if b not in histo_raw.keys():
     
    472488        histo.insert(y_lo_col_loc + 1, "y_hi", histo["y_lo"].apply(topValOfBucketBotVal))
    473489
    474         header = str.join(', ', mkey)
     490        header = str.join(', ', dkey)
    475491        print(f'"{header}"')
    476492        text = histo.to_csv(header=False, index=False, sep='\t')
  • doc/theses/mike_brooks_MMath/plots/list-mchn-szz.py

    r806534c rd1ccc57  
    1111    tgtPolarity = 'all',
    1212    tgtAccessor = 'all',
    13     marginalizeOn=['machine', 'SizeZone'] )
     13    drillOn=['machine', 'SizeZone'],
     14    marginalizeOn=explanations )
Note: See TracChangeset for help on using the changeset viewer.