source: doc/theses/mike_brooks_MMath/plots/ListCommon.py@ c824afd

stuck-waitfor-destruct
Last change on this file since c824afd was f648875, checked in by Michael Brooks <mlbrooks@…>, 3 months ago

Refresh data to harness of 9d3dc40802c6. Latest harness and data now correspond. Plots in built thesis show latest data.

  • Property mode set to 100644
File size: 7.3 KB
Line 
1# Based on crunch1
2# updates for run-scenario columns not seen back then
3# result eyeballs okay
4
5import pandas as pd
6import numpy as np
7import sys
8import os
9from subprocess import Popen, PIPE
10
11def getDataset( infile ):
12 # grep to remove lines that end in comma; these were error runs
13 with Popen("grep '[^,]$' " + infile, shell=True, stdout=PIPE) as process:
14 timings = pd.read_csv(
15 process.stdout,
16 names=['RunMoment', 'RunIdx', 'Args', 'Program', 'Width',
17 'expt_ops_completed', 'expt_elapsed_sec', 'mean_op_dur_ns'],
18 dtype={'RunMoment': str,
19 'RunIdx': np.int64,
20 'Args': str,
21 'Program': str,
22 'Width': np.int64,
23 'expt_ops_completed': np.int64,
24 'expt_elapsed_sec': np.float64,
25 'mean_op_dur_ns': np.float64},
26 parse_dates=['RunMoment']
27 )
28 # print(timings.head())
29
30 ## parse executable name and args
31
32 timings[['ExperimentDurSec',
33 'CheckDonePeriod',
34 'NumNodes',
35 'ExperimentDurOpCount',
36 'Seed',
37 'InterleaveFrac']] = timings['Args'].str.strip().str.split(expand=True)
38 timings["NumNodes"] = pd.to_numeric(timings["NumNodes"])
39 timings["InterleaveFrac"] = pd.to_numeric(timings["InterleaveFrac"]).round(3)
40
41 timings[['__ProgramPrefix',
42 'fx',
43 'op']] = timings['Program'].str.split('--', expand=True)
44
45 timings[['movement',
46 'polarity',
47 'accessor']] = timings['op'].str.split('-', expand=True)
48
49 ## calculate relative to baselines
50 baseline_fx = 'lq-tailq'
51 baseline_intrl = 0.0
52
53 # chose calc "FineCrossRun" from labpc:crunch3
54 byPeer = timings.groupby(['NumNodes', 'op', 'InterleaveFrac'])
55 for [NumNodes, op, intrlFrac], peerGroup in byPeer:
56 grpfx = peerGroup.groupby(['fx'])
57 if baseline_fx in grpfx.groups:
58 baselineRows = grpfx.get_group(baseline_fx)
59 baselineDur = meanNoOutlr( baselineRows['mean_op_dur_ns'] )
60 else:
61 baselineDur = 1.0
62 timings.loc[peerGroup.index, 'BaselineFxOpDurNs'] = baselineDur
63 timings['OpDurRelFx'] = timings['mean_op_dur_ns'] / timings['BaselineFxOpDurNs']
64
65 # relative to same fx, no interleave
66 byPeer = timings.groupby(['NumNodes', 'op', 'fx'])
67 for [NumNodes, op, fx], peerGroup in byPeer:
68 baselineRows = peerGroup.groupby(['InterleaveFrac']).get_group(baseline_intrl)
69 baselineDur = meanNoOutlr( baselineRows['mean_op_dur_ns'] )
70 timings.loc[peerGroup.index, 'BaselineIntrlOpDurNs'] = baselineDur
71 timings['OpDurRelIntrl'] = timings['mean_op_dur_ns'] / timings['BaselineIntrlOpDurNs']
72
73 return timings
74
75def getSingleResults(infileLocal, *,
76 tgtMovement = 'all',
77 tgtPolarity = 'all',
78 tgtAccessor = 'all',
79 tgtInterleave = 0.0 ):
80
81 infile = os.path.dirname(os.path.abspath(__file__)) + '/../benchmarks/list/' + infileLocal
82
83 timings = getDataset( infile )
84
85 movements = timings['movement'].unique()
86 polarities = timings['polarity'].unique()
87 accessors = timings['accessor'].unique()
88 interleaves = timings['InterleaveFrac'].unique()
89
90 if movements.size > 1:
91 movements = np.append(movements, 'all')
92 if polarities.size > 1:
93 polarities = np.append(polarities, 'all')
94 if accessors.size > 1:
95 accessors = np.append(accessors, 'all')
96
97 if (tgtMovement != 'all'):
98 grp = timings.groupby('movement')
99 timings = grp.get_group(tgtMovement)
100 if (tgtPolarity != 'all'):
101 grp = timings.groupby('polarity')
102 timings = grp.get_group(tgtPolarity)
103 if (tgtAccessor != 'all'):
104 grp = timings.groupby('accessor')
105 timings = grp.get_group(tgtAccessor)
106 if (tgtInterleave != 'all'):
107 timings = timings[ timings['InterleaveFrac'] == float(tgtInterleave) ]
108
109 return timings
110
111def stripMachine(pyCore):
112 parts = str.split(pyCore, '-')
113 exceptLast = parts[ 0 : -1 ]
114 return str.join('-', exceptLast)
115
116def getSummaryMeta(metaFileCore):
117 metafile = os.path.dirname(os.path.abspath(__file__)) + "/" + metaFileCore + '-meta.dat'
118 metadata = pd.read_csv(
119 metafile,
120 names=['OpIx', 'Op'],
121 delimiter='\t'
122 )
123 metadata[['movement',
124 'polarity',
125 'accessor']] = metadata['Op'].str.split('\\\\n', expand=True)
126 metadata.replace('*', 'all', inplace=True)
127 metadata.replace('S', 'stack', inplace=True)
128 metadata.replace('Q', 'queue', inplace=True)
129 metadata.replace('iF', 'insfirst', inplace=True)
130 metadata.replace('iL', 'inslast', inplace=True)
131 metadata.replace('H', 'allhead', inplace=True)
132 metadata.replace('Ie', 'inselem', inplace=True)
133 metadata.replace('Re', 'remelem', inplace=True)
134 return metadata
135
136swiftSweetspot = (lambda x: x > 16 and x < 150)
137# swiftSweetspot = (lambda x: x > 4 and x < 32)
138javaSweetspot = (lambda x: x >= 24 and x <= 256)
139
140def printManySummary(*,
141 infileLocal,
142 metafileCore,
143 fxs,
144 sizeQual,
145 tgtInterleave = 0.0,
146 measure = 'OpDurRelFx') :
147
148 metadata = getSummaryMeta(metafileCore)
149
150 print("# op_num\tfx_num\tfx\tmean\tstdev\tmin\tmax\tcount\tpl95\tpl68\tp50\tph68\tph95")
151
152 for op in metadata.itertuples():
153 timings = getSingleResults(infileLocal,
154 tgtMovement = op.movement,
155 tgtPolarity = op.polarity,
156 tgtAccessor = op.accessor,
157 tgtInterleave = tgtInterleave )
158
159 timings = timings[ timings['fx'].isin(fxs) ]
160 timings = timings[ timings['NumNodes'].apply(sizeQual) ]
161
162 fxnums = timings['fx'].apply(
163 lambda fx: fxs.index(fx) + 1
164 )
165 timings.insert(loc=0, column='fx_num', value=fxnums)
166 timings.insert(loc=0, column='op_num', value=op.OpIx)
167
168 grouped = timings.groupby(['op_num', 'fx_num', 'fx'])
169
170 aggregated = grouped[measure].agg(
171 ["mean", "std", "min", "max", "count",
172 lambda x: x.quantile(0.025),
173 lambda x: x.quantile(0.16),
174 lambda x: x.quantile(0.5),
175 lambda x: x.quantile(0.84),
176 lambda x: x.quantile(0.975)]
177 )
178
179 text = aggregated.to_csv(header=False, index=True, sep='\t')
180 print(text, end='')
181
182def printSingleDetail(infileLocal, *,
183 tgtMovement = 'all',
184 tgtPolarity = 'all',
185 tgtAccessor = 'all',
186 tgtInterleave = 0.0,
187 measure = 'mean_op_dur_ns' ):
188
189 timings = getSingleResults(infileLocal,
190 tgtMovement = tgtMovement,
191 tgtPolarity = tgtPolarity,
192 tgtAccessor = tgtAccessor,
193 tgtInterleave = tgtInterleave)
194 groupedFx = timings.groupby('fx')
195
196 for fx, fgroup in groupedFx:
197 # print(fgroup.head())
198 groupedRun = fgroup.groupby(['NumNodes']) # , 'fx', 'op'
199 aggregated = groupedRun[measure].agg(
200 ["mean", "std", "min", "max", "count", "sum"]
201 )
202 aggregated['mean_no_outlr'] = (
203 ( aggregated['sum'] - aggregated['min'] - aggregated['max'] )
204 /
205 ( aggregated['count'] - 2 )
206 )
207
208 #print(aggregated.head())
209
210 print('"{header}"'.format(header=fx))
211 text = aggregated.to_csv(header=False, index=True, sep='\t')
212 print(text)
213 print()
214 print()
215
216def meanNoOutlr(range):
217 return ( range.sum() - range.min() - range.max() ) / ( range.count() - 2 )
Note: See TracBrowser for help on using the repository browser.