source: doc/theses/mike_brooks_MMath/plots/ListCommon.py@ 81e1984b

Last change on this file since 81e1984b was 29c6a7d, checked in by Michael Brooks <mlbrooks@…>, 2 months ago

Use plots-based data crunching for quick analysis of ad-hoc testing, supporting tailq data not being present

  • Property mode set to 100644
File size: 6.4 KB
Line 
1# Based on crunch1
2# updates for run-scenario columns not seen back then
3# result eyeballs okay
4
5import pandas as pd
6import numpy as np
7import sys
8import os
9
10
11def getSingleResults(infileLocal, *,
12 tgtMovement = 'all',
13 tgtPolarity = 'all',
14 tgtAccessor = 'all',
15 tgtInterleave = 0.0 ):
16
17 infile = os.path.dirname(os.path.abspath(__file__)) + '/../benchmarks/list/' + infileLocal
18
19 timings = pd.read_csv(
20 infile,
21 names=['RunMoment', 'RunIdx', 'Args', 'Program', 'expt_ops_completed', 'expt_elapsed_sec', 'mean_op_dur_ns'],
22 dtype={'RunMoment': str,
23 'RunIdx': np.int64,
24 'Args': str,
25 'Program': str,
26 'expt_ops_completed': np.int64,
27 'expt_elapsed_sec': np.float64,
28 'mean_op_dur_ns': np.float64},
29 parse_dates=['RunMoment']
30 )
31 #print(timings.head())
32
33 ## parse executable name and args
34
35 timings[['ExperimentDurSec',
36 'CheckDonePeriod',
37 'NumNodes',
38 'ExperimentDurOpCount',
39 'Seed',
40 'InterleaveFrac']] = timings['Args'].str.strip().str.split(expand=True)
41 timings["NumNodes"] = pd.to_numeric(timings["NumNodes"])
42 timings["InterleaveFrac"] = pd.to_numeric(timings["InterleaveFrac"]).round(3)
43
44 timings[['__ProgramPrefix',
45 'fx',
46 'op']] = timings['Program'].str.split('--', expand=True)
47
48 timings[['movement',
49 'polarity',
50 'accessor']] = timings['op'].str.split('-', expand=True)
51
52 ## calculate relative to baselines
53 baseline_fx = 'lq-tailq'
54 baseline_intrl = 0.0
55
56 # chose calc "FineCrossRun" from labpc:crunch3
57 byPeer = timings.groupby(['NumNodes', 'op', 'InterleaveFrac'])
58 for [NumNodes, op, intrlFrac], peerGroup in byPeer:
59 grpfx = peerGroup.groupby(['fx'])
60 if baseline_fx in grpfx.groups:
61 baselineRows = grpfx.get_group(baseline_fx)
62 baselineDur = meanNoOutlr( baselineRows['mean_op_dur_ns'] )
63 else:
64 baselineDur = 1.0
65 timings.loc[peerGroup.index, 'BaselineFxOpDurNs'] = baselineDur
66 timings['OpDurRelFx'] = timings['mean_op_dur_ns'] / timings['BaselineFxOpDurNs']
67
68 # relative to same fx, no interleave
69 byPeer = timings.groupby(['NumNodes', 'op', 'fx'])
70 for [NumNodes, op, fx], peerGroup in byPeer:
71 baselineRows = peerGroup.groupby(['InterleaveFrac']).get_group(baseline_intrl)
72 baselineDur = meanNoOutlr( baselineRows['mean_op_dur_ns'] )
73 timings.loc[peerGroup.index, 'BaselineIntrlOpDurNs'] = baselineDur
74 timings['OpDurRelIntrl'] = timings['mean_op_dur_ns'] / timings['BaselineIntrlOpDurNs']
75
76 movements = timings['movement'].unique()
77 polarities = timings['polarity'].unique()
78 accessors = timings['accessor'].unique()
79 interleaves = timings['InterleaveFrac'].unique()
80
81 if movements.size > 1:
82 movements = np.append(movements, 'all')
83 if polarities.size > 1:
84 polarities = np.append(polarities, 'all')
85 if accessors.size > 1:
86 accessors = np.append(accessors, 'all')
87
88 if (tgtMovement != 'all'):
89 grp = timings.groupby('movement')
90 timings = grp.get_group(tgtMovement)
91 if (tgtPolarity != 'all'):
92 grp = timings.groupby('polarity')
93 timings = grp.get_group(tgtPolarity)
94 if (tgtAccessor != 'all'):
95 grp = timings.groupby('accessor')
96 timings = grp.get_group(tgtAccessor)
97 if (tgtInterleave != 'all'):
98 timings = timings[ timings['InterleaveFrac'] == float(tgtInterleave) ]
99
100 return timings
101
102def getSummaryMeta(metaFileCore):
103 metafile = os.path.dirname(os.path.abspath(__file__)) + "/" + metaFileCore + '-meta.dat'
104 metadata = pd.read_csv(
105 metafile,
106 names=['OpIx', 'Op'],
107 delimiter='\t'
108 )
109 metadata[['movement',
110 'polarity',
111 'accessor']] = metadata['Op'].str.split('\\\\n', expand=True)
112 metadata.replace('*', 'all', inplace=True)
113 return metadata
114
115def printManySummary(*,
116 infileLocal,
117 metafileCore,
118 fxs,
119 sizeQual = (lambda x: x < 150), # x < 8
120 tgtInterleave = 0.0,
121 measure = 'OpDurRelFx') :
122
123 metadata = getSummaryMeta(metafileCore)
124
125 print("# op_num\tfx_num\tfx\tmean\tstdev\tmin\tmax\tcount\tpl95\tpl68\tp50\tph68\tph95")
126
127 for op in metadata.itertuples():
128 timings = getSingleResults(infileLocal,
129 tgtMovement = op.movement,
130 tgtPolarity = op.polarity,
131 tgtAccessor = op.accessor,
132 tgtInterleave = tgtInterleave )
133
134 timings = timings[ timings['fx'].isin(fxs) ]
135 timings = timings[ timings['NumNodes'].apply(sizeQual) ]
136
137 fxnums = timings['fx'].apply(
138 lambda fx: fxs.index(fx) + 1
139 )
140 timings.insert(loc=0, column='fx_num', value=fxnums)
141 timings.insert(loc=0, column='op_num', value=op.OpIx)
142
143 grouped = timings.groupby(['op_num', 'fx_num', 'fx'])
144
145 aggregated = grouped[measure].agg(
146 ["mean", "std", "min", "max", "count",
147 lambda x: x.quantile(0.025),
148 lambda x: x.quantile(0.16),
149 lambda x: x.quantile(0.5),
150 lambda x: x.quantile(0.84),
151 lambda x: x.quantile(0.975)]
152 )
153
154 text = aggregated.to_csv(header=False, index=True, sep='\t')
155 print(text, end='')
156
157def printSingleDetail(infileLocal, *,
158 tgtMovement = 'all',
159 tgtPolarity = 'all',
160 tgtAccessor = 'all',
161 tgtInterleave = 0.0,
162 measure = 'mean_op_dur_ns' ):
163
164 timings = getSingleResults(infileLocal,
165 tgtMovement = tgtMovement,
166 tgtPolarity = tgtPolarity,
167 tgtAccessor = tgtAccessor,
168 tgtInterleave = tgtInterleave)
169 groupedFx = timings.groupby('fx')
170
171 for fx, fgroup in groupedFx:
172 # print(fgroup.head())
173 groupedRun = fgroup.groupby(['NumNodes']) # , 'fx', 'op'
174 aggregated = groupedRun[measure].agg(
175 ["mean", "std", "min", "max", "count", "sum"]
176 )
177 aggregated['mean_no_outlr'] = (
178 ( aggregated['sum'] - aggregated['min'] - aggregated['max'] )
179 /
180 ( aggregated['count'] - 2 )
181 )
182
183 #print(aggregated.head())
184
185 print('"{header}"'.format(header=fx))
186 text = aggregated.to_csv(header=False, index=True, sep='\t')
187 print(text)
188 print()
189 print()
190
191def meanNoOutlr(range):
192 return ( range.sum() - range.min() - range.max() ) / ( range.count() - 2 )
Note: See TracBrowser for help on using the repository browser.