Index: doc/theses/mike_brooks_MMath/plots/common.py
===================================================================
--- doc/theses/mike_brooks_MMath/plots/common.py	(revision 241042418538c37dbb8067200a54e09c370946d2)
+++ doc/theses/mike_brooks_MMath/plots/common.py	(revision 241042418538c37dbb8067200a54e09c370946d2)
@@ -0,0 +1,113 @@
+import pandas as pd
+import numpy as np
+import sys
+import os
+
+def parseTestCorpus(dt):
+    dt[['test-slug',
+        'sut-platform',
+        'operation',
+        'sut-cfa-level',
+        'sut-cfa-sharing',
+        'op-alloc']] = dt['test'].str.strip().str.split('-', expand=True)
+    dt['sut'] = dt[['sut-platform',
+                        'sut-cfa-level',
+                        'sut-cfa-sharing',
+                        'op-alloc']].agg('-'.join, axis=1)
+
+    dt[['corpus-basename',
+        'corpus-ext']] = dt['corpus'].str.strip().str.split('.', expand=True)
+    dt[['corpus-slug',
+        'corpus-nstrs',
+        'corpus-meanlen',
+        'corpus-runid']] = dt['corpus-basename'].str.strip().str.split('-', expand=True)
+    dt["corpus-nstrs"] = pd.to_numeric(dt["corpus-nstrs"])
+    dt["corpus-meanlen"] = pd.to_numeric(dt["corpus-meanlen"])
+    dt["corpus-runid"] = pd.to_numeric(dt["corpus-runid"])
+
+def loadParseTimingData( infileLocal, xClasNames=[], xClasDtypes={}, xFactNames=[], xFactDtypes={} ):
+
+    infile = os.path.dirname(os.path.abspath(__file__)) + '/../benchmarks/string/' + infileLocal
+
+    timings = pd.read_csv(
+        infile,
+        names=['test', 'corpus'] + xClasNames + [ 'concatsPerReset', 'corpusItemCount', 'corpusMeanLenChars', 'concatDoneActualCount', 'execTimeActualSec'] + xFactNames,
+        dtype={**xClasDtypes, **xFactDtypes, **{
+            'test':                  str,
+            'corpus':                str,
+            'concatsPerReset':       'Int64', # allows missing; https://stackoverflow.com/a/70626154
+            'corpusItemCount':       np.int64,
+            'corpusMeanLenChars':    np.float64,
+            'concatDoneActualCount': np.int64,
+            'execTimeActualSec':     np.float64,
+            'Q': np.int64}},
+
+        na_values=['xxx'],
+    )
+    # print(timings.head())
+
+
+    # project: parse executable and corpus names
+
+    parseTestCorpus(timings)
+
+    # project: calculate fact
+
+    timings['op-duration-s'] = timings['execTimeActualSec'] / timings['concatDoneActualCount']
+    timings['op-duration-ns'] = timings['op-duration-s'] * 1000 * 1000 * 1000
+
+    return timings
+
+def loadParseSizingData( infileLocal, xClasNames=[], xClasDtypes={}, xFactNames=[], xFactDtypes={} ):
+
+    infile = os.path.dirname(os.path.abspath(__file__)) + '/../benchmarks/string/' + infileLocal
+
+    sizings = pd.read_csv(
+        infile,
+        sep=' ',
+        names=['test', 'corpus'] + xClasNames + ['ppid', 'pid', 'malloc_count', 'free_count',
+            'calloc_count', 'realloc_count',
+            'requsted_mem(B)', 'current_req_mem(B)', 'hw_cur_req_mem(B)', 'text', 'heap', 'mmap_so', 'mmap',
+            'stack', 'vvar', 'vdso', 'vsyscall', 'unfigured', 'total_dynamic',
+            'epoch_timestamp(ms)'] + xFactNames,
+        dtype={**xClasDtypes, **xFactDtypes, **{
+            'test':                  str,
+            'corpus':                str,
+            'ppid': np.int64, 'pid': np.int64, 'malloc_count': np.int64, 'free_count': np.int64,
+            'calloc_count': np.int64, 'realloc_count': np.int64,
+            'requsted_mem(B)': np.int64, 'current_req_mem(B)': np.int64,
+            'hw_cur_req_mem(B)': np.int64, 'text': np.int64, 'heap': np.int64,
+            'mmap_so': np.int64, 'mmap': np.int64,
+            'stack': np.int64, 'vvar': np.int64, 'vdso': np.int64, 'vsyscall': np.int64, 'unfigured': np.int64, 'total_dynamic': np.int64,
+            'epoch_timestamp(ms)': np.int64}}
+    )
+
+    parseTestCorpus(sizings)
+
+    return sizings
+
+
+def loadParseAttribData( infileLocal ):
+
+    infile = os.path.dirname(os.path.abspath(__file__)) + '/../benchmarks/string/' + infileLocal
+
+    attribs = pd.read_csv(
+        infile,
+        sep=' ',
+        names=[
+            "test", "corpus", "expansion", "category", "samples_in_category", "total_samples",
+            "fraction", "sources"],
+        dtype={
+            "test": str,
+            "corpus": str,
+            "expansion": np.float64,
+            "category": str,
+            "samples_in_category": np.int64,
+            "total_samples": np.int64,
+            "fraction": np.float64,
+            "sources": str}
+    )
+
+    parseTestCorpus(attribs)
+
+    return attribs
Index: doc/theses/mike_brooks_MMath/plots/string-allocn-attrib.py
===================================================================
--- doc/theses/mike_brooks_MMath/plots/string-allocn-attrib.py	(revision 241042418538c37dbb8067200a54e09c370946d2)
+++ doc/theses/mike_brooks_MMath/plots/string-allocn-attrib.py	(revision 241042418538c37dbb8067200a54e09c370946d2)
@@ -0,0 +1,56 @@
+# Read thesis-append-pbv.csv
+# Output for string-graph-peq-sharing.dat
+
+# Project details
+# Filter operation=peq
+# Split "series" goups of sut; only those in the "pretty" list
+# Assert one row per string-length
+# output:
+# string-len op-duration
+# in chunks, each headed by pertty(sut)
+
+import pandas as pd
+import numpy as np
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(__file__))
+from common import *
+
+sizes_i_want = [50, 200] # [20, 50, 100, 200]
+
+# assume CFA threshold only run at default value
+
+cfatimings = loadParseTimingData('result-allocate-speed-cfa.csv',
+                xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'},
+                xFactNames=['topIters'], xFactDtypes={'topIters':np.int64})
+
+cfaattribs = loadParseAttribData('result-allocate-attrib-cfa.ssv')
+
+stltimings = loadParseTimingData('result-allocate-speed-stl.csv',
+                xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'},
+                xFactNames=['topIters'], xFactDtypes={'topIters':np.int64})
+
+stlattribs = loadParseAttribData('result-allocate-attrib-stl.ssv')
+
+timings = pd.concat([cfatimings, stltimings])
+attribs = pd.concat([cfaattribs, stlattribs])
+
+combined = pd.merge(
+    left=timings[['sut-platform', 'corpus-meanlen','expansion', 'op-duration-ns']],
+    right=attribs[['sut-platform', 'corpus-meanlen','expansion', 'category', 'fraction']],
+    on=['sut-platform', 'corpus-meanlen','expansion']
+)
+
+combined['cat-duration-ns'] = combined['op-duration-ns'] * combined['fraction']
+combined.drop(columns=['expansion', 'op-duration-ns', 'fraction'], inplace=True)
+
+pvt = combined.pivot( columns='category', values='cat-duration-ns', index=['corpus-meanlen', 'sut-platform'] )
+
+desired_dcol_order = ["ctor-dtor", "gc", "malloc-free", "text-import", "harness-leaf", "other"]
+pvt = pvt[desired_dcol_order]
+
+filtered = pvt.loc[pvt.index.get_level_values('corpus-meanlen').isin(sizes_i_want)]
+
+print(filtered.to_csv(header=True, index=True, sep='\t', na_rep="0"))
+
Index: doc/theses/mike_brooks_MMath/plots/string-allocn.d
===================================================================
--- doc/theses/mike_brooks_MMath/plots/string-allocn.d	(revision 241042418538c37dbb8067200a54e09c370946d2)
+++ doc/theses/mike_brooks_MMath/plots/string-allocn.d	(revision 241042418538c37dbb8067200a54e09c370946d2)
@@ -0,0 +1,8 @@
+plots/string-allocn.gp.INPUTS: build/plot-string-allocn.dat
+plots/string-allocn.gp.INPUTS: build/plot-string-allocn-attrib.dat
+plots/string-allocn.py.INPUTS: benchmarks/string/result-allocate-space-cfa.ssv
+plots/string-allocn.py.INPUTS: benchmarks/string/result-allocate-space-stl.ssv
+plots/string-allocn.py.INPUTS: benchmarks/string/result-allocate-speed-cfa.csv
+plots/string-allocn.py.INPUTS: benchmarks/string/result-allocate-speed-stl.csv
+plots/string-allocn-attrib.py.INPUTS: benchmarks/string/result-allocate-attrib-stl.ssv
+plots/string-allocn-attrib.py.INPUTS: benchmarks/string/result-allocate-attrib-cfa.ssv
Index: doc/theses/mike_brooks_MMath/plots/string-allocn.gp
===================================================================
--- doc/theses/mike_brooks_MMath/plots/string-allocn.gp	(revision 241042418538c37dbb8067200a54e09c370946d2)
+++ doc/theses/mike_brooks_MMath/plots/string-allocn.gp	(revision 241042418538c37dbb8067200a54e09c370946d2)
@@ -0,0 +1,108 @@
+set terminal pdf color enhanced size 6.0in,4.0in font "Times,17"
+#set terminal postscript portrait enhanced size 7.5, 10. color solid 9.5;
+#set terminal wxt size 950,1250
+
+INDIR="build"
+OUTDIR="build"
+
+SCALE=1024
+
+# common to all
+set macros
+set output OUTDIR."/plot-string-allocn.pdf"
+set multiplot layout 1, 3 ;
+set key outside top center horizontal
+set grid
+
+# common to only first two graphs
+set logscale x 2
+#set mxtics 3                      # 3 steps within each doubling (e.g. 3 steps of of 32 between 32 and 128 => tick on 32s in there)
+set xlabel "Heap Used (B)"
+set logscale y 10
+
+#
+# CFA multisize
+#
+
+set ylabel "Duration (ns)" offset 2,0
+set yrange[35:125]
+set ytics add (40, 50, 60, 70, 80, 90, 110, 120)
+
+set xrange[32:4096]
+set xtics rotate by -90
+set xtics ("" 32, "64 k" 64, "" 128, "256 k" 256, "" 512, "1 M" 1024, "" 2048, "4 M" 4096)
+
+# First each curve, then each default-expansion point
+plot INDIR."/plot-string-allocn.dat" \
+	   i 0 using ($3/SCALE):2 title columnheader(1) with linespoints lt rgb "blue"    pt  2   ps 1 lw 1, \
+	'' i 1 using ($3/SCALE):2 title columnheader(1) with linespoints lt rgb "red"     pt  3   ps 1 lw 1, \
+	'' i 2 using ($3/SCALE):2 title columnheader(1) with linespoints lt rgb "brown"   pt  8   ps 1 lw 1, \
+	'' i 3 using ($3/SCALE):2 title columnheader(1) with linespoints lt rgb "black"   pt  10  ps 1 lw 1, \
+	'' i 4 using ($3/SCALE):2 title columnheader(1) with linespoints lt rgb "magenta" pt  12  ps 1 lw 1, \
+	'' i 0 using ( ($4 == 1) ? ($3/SCALE) : 1/0 ):2 notitle with points lt rgb "blue"    pt  66 ps 2, \
+	'' i 1 using ( ($4 == 1) ? ($3/SCALE) : 1/0 ):2 notitle with points lt rgb "red"     pt  66 ps 2, \
+	'' i 2 using ( ($4 == 1) ? ($3/SCALE) : 1/0 ):2 notitle with points lt rgb "brown"   pt  66 ps 2, \
+	'' i 3 using ( ($4 == 1) ? ($3/SCALE) : 1/0 ):2 notitle with points lt rgb "black"   pt  66 ps 2, \
+	'' i 4 using ( ($4 == 1) ? ($3/SCALE) : 1/0 ):2 notitle with points lt rgb "magenta" pt  66 ps 2
+
+unset ylabel
+
+unset xtics
+unset ytics
+unset yrange
+unset xrange
+
+
+#
+# STL comparison
+#
+
+
+set yrange[40:85]
+set ytics add (40, 45, 50, 55, 60, 65, 70, 75, 80, 85)
+set mytics 90                     # 90 steps within each decade (e.g. 90 steps of of 1 between 10 and 100 => tick on 1s in there)
+
+set xrange[64:4096]
+set xtics rotate by -90
+set xtics ("64 k" 64, "128 k" 128, "256 k" 256, "512 k" 512, "1 M" 1024, "2 M" 2048, "4 M" 4096)
+
+# skullduggeries:
+# hardcoding chunk index and assuming data in the chunk (by hardcoding only the stl series title)
+# series order is meaningless but important: achieves z-order readability and legend order acceptability
+
+plot INDIR."/plot-string-allocn.dat" \
+	   i 8 using                          ($3/SCALE)        :2 title "tradeoff"      with lines       lt rgb "#77000000"  dt (2,2)       lw 8, \
+	'' i 1 using                          ($3/SCALE)        :2 title columnheader(1) with linespoints lt rgb "red"        pt  3    ps 1  lw 1, \
+	'' i 6 using ( (strcol(4) eq "cfa") ? ($3/SCALE) : 1/0 ):2 notitle               with points      lt rgb "red"        pt 66    ps 2,       \
+	'' i 6 using                          ($3/SCALE)        :2 notitle               with lines       lt rgb "#77000000"  dt (2,2)       lw 8, \
+	'' i 6 using ( (strcol(4) eq "stl") ? ($3/SCALE) : 1/0 ):2 title "stl, len=50"   with points      lt rgb "red"        pt 5     ps 1,       \
+	'' i 3 using                          ($3/SCALE)        :2 title columnheader(1) with linespoints lt rgb "black"      pt  10   ps 1  lw 1, \
+	'' i 8 using ( (strcol(4) eq "stl") ? ($3/SCALE) : 1/0 ):2 title "stl, len=200"  with points      lt rgb "black"      pt 5     ps 1,       \
+	'' i 8 using ( (strcol(4) eq "cfa") ? ($3/SCALE) : 1/0 ):2 notitle               with points      lt rgb "black"      pt 66    ps 2
+
+
+unset mytics
+unset ytics
+unset yrange
+unset xrange
+
+
+
+# common to first two graphs
+unset logscale
+unset xlabel
+unset mxtics
+
+#
+# Attribution
+#
+
+set style data histogram
+set ytics auto
+set style histogram clustered gap 1 rowstacked
+set style fill solid border -1
+set boxwidth 0.8
+set xtics rotate by -45
+
+plot for [col=3:8] \
+    INDIR.'/plot-string-allocn-attrib.dat' using col:xticlabels(stringcolumn(2).", len=".stringcolumn(1)) index 0 title columnheader(col)
Index: doc/theses/mike_brooks_MMath/plots/string-allocn.py
===================================================================
--- doc/theses/mike_brooks_MMath/plots/string-allocn.py	(revision 241042418538c37dbb8067200a54e09c370946d2)
+++ doc/theses/mike_brooks_MMath/plots/string-allocn.py	(revision 241042418538c37dbb8067200a54e09c370946d2)
@@ -0,0 +1,97 @@
+# Read thesis-append-pbv.csv
+# Output for string-graph-peq-sharing.dat
+
+# Project details
+# Filter operation=peq
+# Split "series" goups of sut; only those in the "pretty" list
+# Assert one row per string-length
+# output:
+# string-len op-duration
+# in chunks, each headed by pertty(sut)
+
+import pandas as pd
+import numpy as np
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(__file__))
+from common import *
+
+# re: apparent cherrypicking
+# The system's response to the liveness threshold is not smooth.
+# The system only uses the threshold to decide whether it will double the text heap again or not.
+# The system's speed for a given string size in a given amount of memory is not affected by the specific value of the liveness threshold.
+# Goals with this selection are
+#  - showing one speed result per <string size, memory usage amount>
+#  - cropping diminishing or negative returns for large memory sizes
+#    - diminishing is obvious, already shown past chosen sweet spot in this selection
+#    - negative caused by overflowing llc, not relevant to sting impl
+favSizes = {20:[-1.0, 0.05, 0.1, 0.2, 0.5, 0.9],
+            50:[-1.0, 0.05, 0.1, 0.2, 0.5, 0.9],
+            100:[-1.0, 0.1, 0.2, 0.5, 0.9],
+            200:[-1.0, 0.1, 0.2, 0.5, 0.9],
+            500:[-1.0, 0.4, 0.9, 0.98]}
+
+defaultExpansions = [-1, 0.2]
+
+cfatimings = loadParseTimingData('result-allocate-speed-cfa.csv',
+                xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'},
+                xFactNames=['topIters'], xFactDtypes={'topIters':np.int64})
+
+cfasizings = loadParseSizingData('result-allocate-space-cfa.ssv', xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'})
+
+stltimings = loadParseTimingData('result-allocate-speed-stl.csv',
+                xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'},
+                xFactNames=['topIters'], xFactDtypes={'topIters':np.int64})
+
+stlsizings = loadParseSizingData('result-allocate-space-stl.ssv', xClasNames=['expansion'], xClasDtypes={'expansion':'Float64'})
+
+timings = pd.concat([cfatimings, stltimings])
+sizings = pd.concat([cfasizings, stlsizings])
+
+combined = pd.merge(
+    left=timings,
+    right=sizings[['sut', 'corpus','expansion','hw_cur_req_mem(B)']],
+    on=['sut', 'corpus','expansion']
+)
+
+combined['is-default'] = np.isin(combined['expansion'], defaultExpansions).astype(int)
+
+# print ('!!')
+# print(combined)
+
+
+# Emit
+
+# First, for the CFA curves
+sut = "cfa"
+sutGroup = combined.groupby('sut-platform').get_group(sut)
+
+groupedSize = sutGroup.groupby('corpus-meanlen')
+
+for sz, szgroup in groupedSize:
+
+    if sz in favSizes.keys():
+            szgroup_sorted = szgroup.sort_values(by='expansion')
+
+            print('"{sut}, len={len}"'.format(sut=sut, len=sz))
+            # print(szgroup_sorted)  ##
+            # print(szgroup_sorted['expansion'], 'isin', favSizes[sz]) ##
+            favoured = szgroup_sorted.loc[szgroup_sorted['expansion'].isin(favSizes[sz])]
+            # print('!') ##
+            # print(favoured) ##
+            text = favoured[['expansion', 'op-duration-ns', 'hw_cur_req_mem(B)', 'is-default']].to_csv(header=False, index=False, sep='\t')
+            print(text)
+            print()
+
+# Again, for the STL-comparisons, default expansion only
+
+atDefaults = combined.groupby('is-default').get_group(1)
+
+for sz, szgroup in atDefaults.groupby('corpus-meanlen'):
+
+    if sz in favSizes.keys():
+            print(sz)
+            text = szgroup[['expansion', 'op-duration-ns', 'hw_cur_req_mem(B)', 'sut-platform']].to_csv(header=False, index=False, sep='\t')
+            print(text)
+            print()
Index: doc/theses/mike_brooks_MMath/plots/string-pbv-fixcorp.py
===================================================================
--- doc/theses/mike_brooks_MMath/plots/string-pbv-fixcorp.py	(revision 241042418538c37dbb8067200a54e09c370946d2)
+++ doc/theses/mike_brooks_MMath/plots/string-pbv-fixcorp.py	(revision 241042418538c37dbb8067200a54e09c370946d2)
@@ -0,0 +1,48 @@
+# Read thesis-append-pbv.csv
+# Output for string-graph-peq-sharing.dat
+
+# Project details
+# Filter operation=peq
+# Split "series" goups of sut; only those in the "pretty" list
+# Assert one row per string-length
+# output:
+# string-len op-duration
+# in chunks, each headed by pertty(sut)
+
+import pandas as pd
+import numpy as np
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(__file__))
+from common import *
+
+prettyFieldNames = {
+    "cfa-ll-share-na": "{/Helvetica=15 C{/Symbol \\42}} share",
+    "stl-na-na-na": "STL",
+}
+
+timings = loadParseTimingData('result-append-pbv.csv')
+
+
+# Filter operation=pbv, corpus=1-*-1
+
+timings = timings.groupby('operation').get_group('pbv')
+timings = timings.groupby('corpus-nstrs').get_group(1)
+timings = timings.groupby('corpus-runid').get_group(1)
+
+
+# Emit in groups
+
+groupedSut = timings.groupby('sut')
+
+for sut, sgroup in groupedSut:
+
+    if sut in prettyFieldNames:
+
+        sgroup_sorted = sgroup.sort_values(by='corpus-meanlen')
+
+        print('"{header}"'.format(header=prettyFieldNames[sut]))
+        text = sgroup_sorted[['corpus-meanlen', 'op-duration-ns']].to_csv(header=False, index=False, sep='\t')
+        print(text)
+        print()
Index: doc/theses/mike_brooks_MMath/plots/string-pbv-varcorp.py
===================================================================
--- doc/theses/mike_brooks_MMath/plots/string-pbv-varcorp.py	(revision 241042418538c37dbb8067200a54e09c370946d2)
+++ doc/theses/mike_brooks_MMath/plots/string-pbv-varcorp.py	(revision 241042418538c37dbb8067200a54e09c370946d2)
@@ -0,0 +1,48 @@
+# Read thesis-append-pbv.csv
+# Output for string-graph-peq-sharing.dat
+
+# Project details
+# Filter operation=peq
+# Split "series" goups of sut; only those in the "pretty" list
+# Assert one row per string-length
+# output:
+# string-len op-duration
+# in chunks, each headed by pertty(sut)
+
+import pandas as pd
+import numpy as np
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(__file__))
+from common import *
+
+prettyFieldNames = {
+    "cfa-ll-share-na": "{/Helvetica=15 C{/Symbol \\42}} share",
+    "stl-na-na-na": "STL",
+}
+
+timings = loadParseTimingData('result-append-pbv.csv')
+
+
+# Filter operation=pbv, corpus=100-*-1
+
+timings = timings.groupby('operation').get_group('pbv')
+timings = timings.groupby('corpus-nstrs').get_group(100)
+timings = timings.groupby('corpus-runid').get_group(1)
+
+
+# Emit in groups
+
+groupedSut = timings.groupby('sut')
+
+for sut, sgroup in groupedSut:
+
+    if sut in prettyFieldNames:
+
+        sgroup_sorted = sgroup.sort_values(by='corpus-meanlen')
+
+        print('"{header}"'.format(header=prettyFieldNames[sut]))
+        text = sgroup_sorted[['corpus-meanlen', 'op-duration-ns']].to_csv(header=False, index=False, sep='\t')
+        print(text)
+        print()
Index: doc/theses/mike_brooks_MMath/plots/string-pbv.d
===================================================================
--- doc/theses/mike_brooks_MMath/plots/string-pbv.d	(revision 241042418538c37dbb8067200a54e09c370946d2)
+++ doc/theses/mike_brooks_MMath/plots/string-pbv.d	(revision 241042418538c37dbb8067200a54e09c370946d2)
@@ -0,0 +1,3 @@
+plots/string-pbv.gp.INPUTS: build/plot-string-pbv-varcorp.dat build/plot-string-pbv-fixcorp.dat | build
+plots/string-pbv-varcorp.py.INPUTS: benchmarks/string/result-append-pbv.csv
+plots/string-pbv-fixcorp.py.INPUTS: benchmarks/string/result-append-pbv.csv
Index: doc/theses/mike_brooks_MMath/plots/string-pbv.gp
===================================================================
--- doc/theses/mike_brooks_MMath/plots/string-pbv.gp	(revision 241042418538c37dbb8067200a54e09c370946d2)
+++ doc/theses/mike_brooks_MMath/plots/string-pbv.gp	(revision 241042418538c37dbb8067200a54e09c370946d2)
@@ -0,0 +1,36 @@
+set terminal pdf color enhanced size 6.0in,3.0in font "Times,17"
+#set terminal postscript portrait enhanced size 7.5, 10. color solid 9.5;
+#set terminal wxt size 950,1250
+
+INDIR="build"
+OUTDIR="build"
+
+set macros
+set output OUTDIR."/plot-string-pbv.pdf"
+
+set multiplot layout 1, 2 ;
+
+
+#set pointsize 2.0
+set grid
+set key bottom right
+#set key at screen 0.45, screen 0.8
+set xtics (1,2,5,10,20,50,100,200,500)
+set logscale x
+set logscale y 2
+set xlabel "String length passed, varying (mean)"
+set ylabel "Time per pass (ns, mean), log_{2} scale"
+set yrange [4:64]
+set linetype 3 dashtype 2
+set linetype 4 dashtype 2
+plot INDIR."/plot-string-pbv-varcorp.dat" \
+	   i 0 using 1:2 title columnheader(1) with linespoints lt rgb "red"	pt  3  ps 1 lw 1, \
+	'' i 1 using 1:2 title columnheader(1) with linespoints lt rgb "blue"	pt  6  ps 1 lw 1
+
+set xlabel "String length passed, fixed"
+set ylabel
+plot INDIR."/plot-string-pbv-fixcorp.dat"  \
+	   i 0 using 1:2 title columnheader(1) with linespoints lt rgb "red"	pt  3  ps 1 lw 1, \
+	'' i 1 using 1:2 title columnheader(1) with linespoints lt rgb "blue"	pt  6  ps 1 lw 1
+
+unset multiplot
Index: doc/theses/mike_brooks_MMath/plots/string-peq-cppemu.py
===================================================================
--- doc/theses/mike_brooks_MMath/plots/string-peq-cppemu.py	(revision 411aa65a1e6399ae182c88528a23be46ed2f8dd6)
+++ doc/theses/mike_brooks_MMath/plots/string-peq-cppemu.py	(revision 241042418538c37dbb8067200a54e09c370946d2)
@@ -12,7 +12,9 @@
 import pandas as pd
 import numpy as np
+import sys
 import os
 
-infile = os.path.dirname(os.path.abspath(__file__)) + '/../benchmarks/string/result-append-pbv.csv'
+sys.path.insert(0, os.path.dirname(__file__))
+from common import *
 
 prettyFieldNames = {
@@ -23,58 +25,15 @@
 }
 
-timings = pd.read_csv(
-    infile,
-    names=['test', 'corpus', 'concatsPerReset', 'corpusItemCount', 'corpusMeanLenChars', 'concatDoneActualCount', 'execTimeActualSec'],
-    dtype={'test':                  str,
-           'corpus':                str,
-           'concatsPerReset':       'Int64', # allows missing; https://stackoverflow.com/a/70626154
-           'corpusItemCount':       np.int64,
-           'corpusMeanLenChars':    np.float64,
-           'concatDoneActualCount': np.int64,
-           'execTimeActualSec':     np.float64},
-    na_values=['xxx'],
-)
-# print(timings.head())
+timings = loadParseTimingData('result-append-pbv.csv')
 
+# Filter operation=peq, corpus=100-*-1
 
-# project: parse executable and corpus names
-
-timings[['test-slug',
-     'sut-platform',
-     'operation',
-     'sut-cfa-level',
-     'sut-cfa-sharing',
-     'op-alloc']] = timings['test'].str.strip().str.split('-', expand=True)
-timings['sut'] = timings[['sut-platform',
-                    'sut-cfa-level',
-                    'sut-cfa-sharing',
-                    'op-alloc']].agg('-'.join, axis=1)
-
-timings[['corpus-basename',
-     'corpus-ext']] = timings['corpus'].str.strip().str.split('.', expand=True)
-timings[['corpus-slug',
-     'corpus-nstrs',
-     'corpus-meanlen',
-     'corpus-runid']] = timings['corpus-basename'].str.strip().str.split('-', expand=True)
-timings["corpus-nstrs"] = pd.to_numeric(timings["corpus-nstrs"])
-timings["corpus-meanlen"] = pd.to_numeric(timings["corpus-meanlen"])
-timings["corpus-runid"] = pd.to_numeric(timings["corpus-runid"])
-
-
-# project: calculate fact
-
-timings['op-duration-s'] = timings['execTimeActualSec'] / timings['concatDoneActualCount']
-timings['op-duration-ns'] = timings['op-duration-s'] * 1000 * 1000 * 1000
-
-
-# Filter operation=peq
-
-groupedOp = timings.groupby('operation')
-tgtOpTimings = groupedOp.get_group('peq')
-
+timings = timings.groupby('operation').get_group('peq')
+timings = timings.groupby('corpus-nstrs').get_group(100)
+timings = timings.groupby('corpus-runid').get_group(1)
 
 # Emit in groups
 
-groupedSut = tgtOpTimings.groupby('sut')
+groupedSut = timings.groupby('sut')
 
 for sut, sgroup in groupedSut:
Index: doc/theses/mike_brooks_MMath/plots/string-peq-sharing.d
===================================================================
--- doc/theses/mike_brooks_MMath/plots/string-peq-sharing.d	(revision 241042418538c37dbb8067200a54e09c370946d2)
+++ doc/theses/mike_brooks_MMath/plots/string-peq-sharing.d	(revision 241042418538c37dbb8067200a54e09c370946d2)
@@ -0,0 +1,2 @@
+plots/string-peq-sharing.gp.INPUTS: build/plot-string-peq-sharing.dat | build
+plots/string-peq-sharing.py.INPUTS: benchmarks/string/result-append-pbv.csv
Index: doc/theses/mike_brooks_MMath/plots/string-peq-sharing.gp
===================================================================
--- doc/theses/mike_brooks_MMath/plots/string-peq-sharing.gp	(revision 241042418538c37dbb8067200a54e09c370946d2)
+++ doc/theses/mike_brooks_MMath/plots/string-peq-sharing.gp	(revision 241042418538c37dbb8067200a54e09c370946d2)
@@ -0,0 +1,25 @@
+set terminal pdf color enhanced size 6.0in,3.0in font "Times,17"
+#set terminal postscript portrait enhanced size 7.5, 10. color solid 9.5;
+#set terminal wxt size 950,1250
+
+INDIR="build"
+OUTDIR="build"
+
+set macros
+set output OUTDIR."/plot-string-peq-sharing.pdf"
+#set pointsize 2.0
+set grid
+set key top left
+set xtics (1,2,5,10,20,50,100,200,500)
+set logscale x
+#set logscale y
+set yrange [10:115]
+set xlabel "String Length being appended (mean, geo. dist.), log scale" offset 2,0
+set ylabel "Time per append (ns, mean)"
+set linetype 2 dashtype 2
+set linetype 4 dashtype 2
+plot INDIR."/plot-string-peq-sharing.dat" \
+	   i 0 using 1:2 title columnheader(1) with linespoints lt rgb "red"	pt  2  ps 1 lw 1, \
+	'' i 1 using 1:2 title columnheader(1) with linespoints lt rgb "red"	pt  3  ps 1 lw 1, \
+	'' i 2 using 1:2 title columnheader(1) with linespoints lt rgb "blue"	pt  6  ps 1 lw 1, \
+	'' i 3  using 1:2 title columnheader(1) with linespoints lt rgb "blue"	pt  8  ps 1 lw 1
Index: doc/theses/mike_brooks_MMath/plots/string-peq-sharing.py
===================================================================
--- doc/theses/mike_brooks_MMath/plots/string-peq-sharing.py	(revision 241042418538c37dbb8067200a54e09c370946d2)
+++ doc/theses/mike_brooks_MMath/plots/string-peq-sharing.py	(revision 241042418538c37dbb8067200a54e09c370946d2)
@@ -0,0 +1,50 @@
+# Read thesis-append-pbv.csv
+# Output for string-graph-peq-sharing.dat
+
+# Project details
+# Filter operation=peq
+# Split "series" goups of sut; only those in the "pretty" list
+# Assert one row per string-length
+# output:
+# string-len op-duration
+# in chunks, each headed by pertty(sut)
+
+import pandas as pd
+import numpy as np
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(__file__))
+from common import *
+
+prettyFieldNames = {
+    "cfa-ll-share-fresh": "{/Helvetica=15 C{/Symbol \\42} +=} share fresh",
+    "cfa-ll-share-reuse": "{/Helvetica=15 C{/Symbol \\42} +=} share reuse",
+    "stl-na-na-fresh": "STL {/Helvetica=15 +=} fresh",
+    "stl-na-na-reuse": "STL {/Helvetica=15 +=} reuse",
+}
+
+timings = loadParseTimingData('result-append-pbv.csv')
+
+
+# Filter operation=peq, corpus=100-*-1
+
+timings = timings.groupby('operation').get_group('peq')
+timings = timings.groupby('corpus-nstrs').get_group(100)
+timings = timings.groupby('corpus-runid').get_group(1)
+
+
+# Emit in groups
+
+groupedSut = timings.groupby('sut')
+
+for sut, sgroup in groupedSut:
+
+    if sut in prettyFieldNames:
+
+        sgroup_sorted = sgroup.sort_values(by='corpus-meanlen')
+
+        print('"{header}"'.format(header=prettyFieldNames[sut]))
+        text = sgroup_sorted[['corpus-meanlen', 'op-duration-ns']].to_csv(header=False, index=False, sep='\t')
+        print(text)
+        print()
Index: doc/theses/mike_brooks_MMath/plots/string-pta-sharing.d
===================================================================
--- doc/theses/mike_brooks_MMath/plots/string-pta-sharing.d	(revision 241042418538c37dbb8067200a54e09c370946d2)
+++ doc/theses/mike_brooks_MMath/plots/string-pta-sharing.d	(revision 241042418538c37dbb8067200a54e09c370946d2)
@@ -0,0 +1,2 @@
+plots/string-pta-sharing.gp.INPUTS: build/plot-string-pta-sharing.dat | build
+plots/string-pta-sharing.py.INPUTS: benchmarks/string/result-append-pbv.csv
Index: doc/theses/mike_brooks_MMath/plots/string-pta-sharing.gp
===================================================================
--- doc/theses/mike_brooks_MMath/plots/string-pta-sharing.gp	(revision 241042418538c37dbb8067200a54e09c370946d2)
+++ doc/theses/mike_brooks_MMath/plots/string-pta-sharing.gp	(revision 241042418538c37dbb8067200a54e09c370946d2)
@@ -0,0 +1,24 @@
+set terminal pdf color enhanced size 6.0in,3.0in font "Times,17"
+#set terminal postscript portrait enhanced size 7.5, 10. color solid 9.5;
+#set terminal wxt size 950,1250
+
+INDIR="build"
+OUTDIR="build"
+
+set macros
+set output OUTDIR."/plot-string-pta-sharing.pdf"
+#set pointsize 2.0
+set grid
+set key top left
+set xtics (1,2,5,10,20,50,100,200,500)
+set logscale x
+set yrange [8:4096]
+set logscale y 2
+set xlabel "String Length being appended (mean, geo. dist.), log scale" offset 2,0
+set ylabel "Time per append (ns, mean), log_{2} scale"
+#show colornames
+plot INDIR."/plot-string-pta-sharing.dat" \
+	   i 0 using 1:2 title columnheader(1) with linespoints lt rgb "red"	pt  2  ps 1 lw 1, \
+	'' i 1 using 1:2 title columnheader(1) with linespoints lt rgb "dark-green" pt  4  ps 1 lw 1, \
+	'' i 2 using 1:2 title columnheader(1) with linespoints lt rgb "blue"	pt  6  ps 1 lw 1, \
+	'' i 3  using 1:2 title columnheader(1) with linespoints lt rgb "dark-green" pt  12  ps 1 lw 1
Index: doc/theses/mike_brooks_MMath/plots/string-pta-sharing.py
===================================================================
--- doc/theses/mike_brooks_MMath/plots/string-pta-sharing.py	(revision 241042418538c37dbb8067200a54e09c370946d2)
+++ doc/theses/mike_brooks_MMath/plots/string-pta-sharing.py	(revision 241042418538c37dbb8067200a54e09c370946d2)
@@ -0,0 +1,57 @@
+# Read thesis-append-pbv.csv
+# Output for string-graph-pta-sharing.dat
+
+# Project details
+# Filter operation=peq
+# Split "series" goups of sut; only those in the "pretty" list
+# Assert one row per string-length
+# output:
+# string-len op-duration
+# in chunks, each headed by pertty(sut)
+
+import pandas as pd
+import numpy as np
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(__file__))
+from common import *
+
+prettyFieldNames = {
+    "peq": {
+        "cfa-ll-share-fresh": "{/Helvetica=15 C{/Symbol \\42} x += y} share fresh",
+        "stl-na-na-fresh": "STL {/Helvetica=15 x += y} fresh",
+    },
+    "pta": {
+        "cfa-ll-share-fresh": "{/Helvetica=15 C{/Symbol \\42} x = x + y} share fresh",
+        "stl-na-na-fresh": "STL {/Helvetica=15  x = x + y} fresh",
+    }
+}
+
+timings = loadParseTimingData('result-append-pbv.csv')
+
+
+# Filter corpus=100-*-1
+
+timings = timings.groupby('corpus-nstrs').get_group(100)
+timings = timings.groupby('corpus-runid').get_group(1)
+
+# Emit in groups
+
+groupedSut = timings.groupby('sut')
+
+for sut, sgroup in groupedSut:
+    groupedOp = sgroup.groupby('operation')
+    for op,opPretty in prettyFieldNames.items():
+
+        if op in groupedOp.groups:
+            tgtOpTimings = groupedOp.get_group(op)
+
+            if sut in opPretty:
+
+                sgroup_sorted = tgtOpTimings.sort_values(by='corpus-meanlen')
+
+                print('"{header}"'.format(header=opPretty[sut]))
+                text = sgroup_sorted[['corpus-meanlen', 'op-duration-ns']].to_csv(header=False, index=False, sep='\t')
+                print(text)
+                print()
