Index: doc/theses/mike_brooks_MMath/pictures/string-graph-pta-sharing.dat
===================================================================
--- doc/theses/mike_brooks_MMath/pictures/string-graph-pta-sharing.dat	(revision 873e96c50fdbcd9c570704baf2d35ee75af83a05)
+++ doc/theses/mike_brooks_MMath/pictures/string-graph-pta-sharing.dat	(revision b22b28e256792b1873d88fb1a1a17b8c6a086ce8)
@@ -9,4 +9,16 @@
 200	50.3
 500	79.2
+
+
+"{/Helvetica=15 C{/Symbol \42} +=} share reuse"
+1	17.4
+2	21.5
+5	23.2
+10	23.5
+20	26.7
+50	30.9
+100	40.9
+200	50.3
+500	80.5
 
 
Index: doc/theses/mike_brooks_MMath/plot-peq-sharing.gp
===================================================================
--- doc/theses/mike_brooks_MMath/plot-peq-sharing.gp	(revision 873e96c50fdbcd9c570704baf2d35ee75af83a05)
+++ doc/theses/mike_brooks_MMath/plot-peq-sharing.gp	(revision b22b28e256792b1873d88fb1a1a17b8c6a086ce8)
@@ -13,4 +13,5 @@
 set logscale x
 #set logscale y 2
+set yrange [0:115]
 set xlabel "String Length being appended (mean, geo. dist.), log scale" offset 2,0
 set ylabel "Time per append (ns, mean)"
Index: doc/theses/mike_brooks_MMath/plot-pta-sharing.gp
===================================================================
--- doc/theses/mike_brooks_MMath/plot-pta-sharing.gp	(revision 873e96c50fdbcd9c570704baf2d35ee75af83a05)
+++ doc/theses/mike_brooks_MMath/plot-pta-sharing.gp	(revision b22b28e256792b1873d88fb1a1a17b8c6a086ce8)
@@ -12,13 +12,16 @@
 set xtics (1,2,5,10,20,50,100,200,500)
 set logscale x
+set yrange [8:4096]
 set logscale y 2
 set xlabel "String Length being appended (mean, geo. dist.), log scale" offset 2,0
 set ylabel "Time per append (ns, mean), log_{2} scale"
-set linetype 5 dashtype 2
+set linetype 2 dashtype 2
+set linetype 6 dashtype 2
 #show colornames
 plot DIR."/string-graph-pta-sharing.dat" \
 	   i 0 using 1:2 title columnheader(1) with linespoints lt rgb "red"	pt  2  ps 1 lw 1, \
-	'' i 1 using 1:2 title columnheader(1) with linespoints lt rgb "dark-green" pt  4  ps 1 lw 1, \
-	'' i 2 using 1:2 title columnheader(1) with linespoints lt rgb "blue"	pt  6  ps 1 lw 1, \
-	'' i 3  using 1:2 title columnheader(1) with linespoints lt rgb "dark-green" pt  12  ps 1 lw 1, \
-	'' i 4  using 1:2 title columnheader(1) with linespoints lt rgb "blue"	pt  8  ps 1 lw 1
+	'' i 1 using 1:2 title columnheader(1) with linespoints lt rgb "red"	pt  3  ps 1 lw 1, \
+	'' i 2 using 1:2 title columnheader(1) with linespoints lt rgb "dark-green" pt  4  ps 1 lw 1, \
+	'' i 3 using 1:2 title columnheader(1) with linespoints lt rgb "blue"	pt  6  ps 1 lw 1, \
+	'' i 4  using 1:2 title columnheader(1) with linespoints lt rgb "dark-green" pt  12  ps 1 lw 1, \
+	'' i 5  using 1:2 title columnheader(1) with linespoints lt rgb "blue"	pt  8  ps 1 lw 1
Index: doc/theses/mike_brooks_MMath/plots/string-peq-cppemu.gp
===================================================================
--- doc/theses/mike_brooks_MMath/plots/string-peq-cppemu.gp	(revision 873e96c50fdbcd9c570704baf2d35ee75af83a05)
+++ doc/theses/mike_brooks_MMath/plots/string-peq-cppemu.gp	(revision b22b28e256792b1873d88fb1a1a17b8c6a086ce8)
@@ -13,6 +13,6 @@
 set xtics (1,2,5,10,20,50,100,200,500)
 set logscale x
-set logscale y
-set yrange [10:200]
+#set logscale y
+set yrange [0:115]
 set xlabel "String Length being appended (mean, geo. dist.), log scale" offset 2,0
 set ylabel "Time per append (ns, mean)"
Index: doc/theses/mike_brooks_MMath/string.tex
===================================================================
--- doc/theses/mike_brooks_MMath/string.tex	(revision 873e96c50fdbcd9c570704baf2d35ee75af83a05)
+++ doc/theses/mike_brooks_MMath/string.tex	(revision b22b28e256792b1873d88fb1a1a17b8c6a086ce8)
@@ -17,19 +17,19 @@
 \begin{cquote}
 \begin{tabular}{@{}l|l|l|l@{}}
-C @char [ ]@			&  \CC @string@			& Java @String@     & \CFA @string@	\\
+C @char [ ]@			&  \CC @string@			& Java @String@	& \CFA @string@	\\
 \hline
-@strcpy@, @strncpy@		& @=@					& @=@               & @=@	\\
-@strcat@, @strncat@		& @+@, @+=@				& @+@, @+=@         & @+@, @+=@	\\
+@strcpy@, @strncpy@		& @=@					& @=@			& @=@	\\
+@strcat@, @strncat@		& @+@, @+=@				& @+@, @+=@		& @+@, @+=@	\\
 @strcmp@, @strncmp@		& @==@, @!=@, @<@, @<=@, @>@, @>=@
-                                                & @equals@, @compareTo@
-																	& @==@, @!=@, @<@, @<=@, @>@, @>=@ \\
-@strlen@				& @length@, @size@	 	& @length@			& @size@ 	\\
-@[ ]@					& @[ ]@				 	& @charAt@          & @[ ]@	\\
-@strncpy@				& @substr@			 	& @substring@       & @( )@, on RHS of @=@	\\
-@strncpy@				& @replace@				& @replace@         & @( )@, on LHS of @=@ \\
-@strstr@				& @find@				& @indexOf@         & @find@ \\
-@strcspn@				& @find_first_of@		& @matches@         & @include@ \\
-@strspn@				& @find_first_not_of@	& @matches@         & @exclude@ \\
-n/a						& @c_str@, @data@		& n/a               & @strcpy@, @strncpy@ \\
+												& @equals@, @compareTo@
+																& @==@, @!=@, @<@, @<=@, @>@, @>=@ \\
+@strlen@				& @length@, @size@	 	& @length@		& @size@ 	\\
+@[ ]@					& @[ ]@				 	& @charAt@		& @[ ]@	\\
+@strncpy@				& @substr@			 	& @substring@	& @( )@, on RHS of @=@	\\
+@strncpy@				& @replace@				& @replace@		& @( )@, on LHS of @=@ \\
+@strstr@				& @find@				& @indexOf@		& @find@ \\
+@strcspn@				& @find_first_of@		& @matches@		& @include@ \\
+@strspn@				& @find_first_not_of@	& @matches@		& @exclude@ \\
+N/A						& @c_str@, @data@		& N/A			& @strcpy@, @strncpy@ \\
 \end{tabular}
 \end{cquote}
@@ -53,5 +53,5 @@
 
 
-\section{\CFA \lstinline{string} type}
+\section{\CFA \lstinline{string} Type}
 \label{s:stringType}
 
@@ -272,6 +272,6 @@
 ch = ch + 'b'; $\C[2in]{// LHS disambiguate, add character values}$
 s = 'a' + 'b'; $\C{// LHS disambiguate, concatenate characters}$
-printf( "%c\n", @'a' + 'b'@ ); $\C[2in]{// no LHS information, ambiguous}$
-printf( "%c\n", @(return char)@('a' + 'b') ); $\C{// disambiguate with ascription cast}$
+printf( "%c\n", @'a' + 'b'@ ); $\C{// no LHS information, ambiguous}$
+printf( "%c\n", @(return char)@('a' + 'b') ); $\C{// disambiguate with ascription cast}\CRT$
 \end{cfa}
 The ascription cast, @(return T)@, disambiguates by stating a (LHS) type to use during expression resolution (not a conversion).
@@ -319,6 +319,6 @@
 ch = ch * 3; $\C[2in]{// LHS disambiguate, multiply character values}$
 s = 'a' * 3; $\C{// LHS disambiguate, concatenate characters}$
-printf( "%c\n", @'a' * 3@ ); $\C[2in]{// no LHS information, ambiguous}$
-printf( "%c\n", @(return char)@('a' * 3) ); $\C{// disambiguate with ascription cast}$
+printf( "%c\n", @'a' * 3@ ); $\C{// no LHS information, ambiguous}$
+printf( "%c\n", @(return char)@('a' * 3) ); $\C{// disambiguate with ascription cast}\CRT$
 \end{cfa}
 Fortunately, character multiplication without LHS information is even rarer than addition, so repurposing the operator @*@ for @string@ types is not a problem.
@@ -600,5 +600,5 @@
 &
 \begin{cfa}
-for ( ;; ) {
+for () {
 	size_t posn = exclude( line, alpha );
   if ( posn == len( line ) ) break;
@@ -722,5 +722,5 @@
 \end{tabular}
 \end{cquote}
-Input text can be gulped, including whitespace, from the current point to an arbitrary delimiter character using @getline@.
+Input text can be \emph{gulped}, including whitespace, from the current point to an arbitrary delimiter character using @getline@.
 
 The \CFA philosophy for input is that, for every constant type in C, these constants should be usable as input.
@@ -760,5 +760,5 @@
 \end{tabular}
 \end{cquote}
-Note, the ability to read in quoted strings to match with program string constants.
+Note, the ability to read in quoted strings with whitespace to match with program string constants.
 The @nl@ at the end of an input ignores the rest of the line.
 
@@ -845,5 +845,5 @@
 					& Laxed: The target's type is anything string-like; it may have a different status concerning ownership.
 								& Strict: The target's type is the same as the source; both strings are equivalent peers concerning ownership.
-											& n/a		& no	& yes	& yes \\
+											& N/A		& no	& yes	& yes \\
 \hline
 Referent
@@ -863,5 +863,5 @@
 	The C ``string'' is @char *@, under the conventions of @<string.h>@. Because this type does not manage a text allocation, symmetry does not apply.
 \item
-	The Java @String@ class is analyzed; its @StringBuffer@ class behaves similarly to @C++@.
+	The Java @String@ class is analyzed; its @StringBuffer@ class behaves similarly to \CC.
 \end{itemize}
 \caption{Comparison of languages' strings, storage management perspective.}
@@ -869,20 +869,19 @@
 \end{figure}
 
-In C, these declarations give very different things.
+In C, these declarations are very different.
 \begin{cfa}
 char x[$\,$] = "abcde";
 char * y = "abcde";
 \end{cfa}
-Both associate the declared name with fixed-six contiguous bytes, filled as @{'a', 'b', 'c', 'd', 'e', 0}@.
-But @x@ gets them allocated in the active stack frame (with values filled in as control passes the declaration), while @y@ refers into the executable's read-only data section.
+Both associate the declared name with the fixed, six contiguous bytes: @{'a', 'b', 'c', 'd', 'e', 0}@.
+But @x@ is allocated on the stack (with values filled at the declaration), while @y@ refers to the executable's read-only data-section.
 With @x@ representing an allocation, it offers information in @sizeof(x)@ that @y@ does not.
-But this extra information is second-class, as it can only be used in the immediate lexical context, \ie it cannot be passed on to string operations or user functions.
+But this extra information is second-class, as it can only be used in the immediate lexical context, \ie it cannot be passed to string operations or user functions.
 Only pointers to text buffers are first-class, and discussed further.
-
 \begin{cfa}
 char * s = "abcde";
-char * s1 = s;  $\C{// alias state, n/a symmetry, variable-constrained referent}$
-char * s2 = &s[1];  $\C{// alias state, n/a symmetry, variable-constrained referent}\CRT$
-char * s3 = &s2[1];  $\C{// alias state, n/a symmetry, variable-constrained referent}
+char * s1 = s;  $\C[2.25in]{// alias state, N/A symmetry, variable-constrained referent}$
+char * s2 = &s[1];  $\C{// alias state, N/A symmetry, variable-constrained referent}$
+char * s3 = &s2[1];  $\C{// alias state, N/A symmetry, variable-constrained referent}\CRT$
 printf( "%s %s %s %s\n", s, s1, s2, s3 );
 $\texttt{\small abcde abcde bcde cde}$
@@ -904,10 +903,10 @@
 string & s5 = s.substr(2,4);  $\C{// error: cannot point to temporary}\CRT$
 \end{cfa}
-The @s1@ lax symmetry reflects how its validity of depends on the lifetime of @s@.
+The @s1@ lax symmetry reflects how its validity depends on the lifetime of @s@.
 It is common practice in \CC to use the @s1@-style for a by-reference function parameter.
 Doing so assumes that the callee only uses the referenced string for the duration of the call, \ie no storing the parameter (as a reference) for later.
 So, when the called function is a constructor, its definition typically uses an @s2@-style copy-initialization.
 Exceptions to this pattern are possible, but require the programmer to assure safety where the type system does not.
-The @s3@ initialization must copy the substring because it must support a subsequent @c_str@ call, which provides a null-termination, generally at a different position than the source string's.
+The @s3@ initialization must copy the substring to support a subsequent @c_str@ call, which provides null-termination, generally at a different position than the source string's.
 @s2@ assignment could be made fast, by reference-counting the text area and using copy-on-write, but would require an implementation upgrade.
 
@@ -929,5 +928,5 @@
 With @s2@, the case for fast-copy is more subtle.
 Certainly, its value is not pointer-equal to @s@, implying at least a further allocation.
-But because Java is not constrained to use a null-terminated representation, a standard-library implementation is free to refer to the source characters in-place.
+But because Java is \emph{not} constrained to use a null-terminated representation, a standard-library implementation is free to refer to the source characters in-place.
 Java does not meet the aliasing requirement because immutability makes it impossible to modify.
 Java's @StringBuffer@ provides aliasing (see @replace@ example on \VPageref{p:JavaReplace}), though without supporting symmetric treatment of a fragment referent, \eg @substring@ of a @StringBuffer@ is a @String@;
@@ -960,36 +959,50 @@
 
 
-
-\subsection{Logical overlap}
+\subsection{Logical Overlap}
 
 It may be unfamiliar to combine \VRef[Figure]{f:StrSemanticCompare}'s alias state and fragment referent in one API, or at the same time.
 This section shows the capability in action.
 
-In summary, the metaphor of a GUI text editor is intended.
-Selecting a consecutive block of text using the mouse defines an aliased substring within the file.
-Typing in this state overwrites what was there before, replacing the originally selected text with more or less text.
-But the \emph{whole file} grows or shrinks as a result, not just the selection.
-This action models assigning to an aliased substring when the two strings overlap by total containment: one string is the selection, the other is the whole file.
-
-Now extend the metaphor to a multi-user online editor.
-If Alice selects a range of text at the bottom of the file, wile Bob is rewriting a paragraph at the top, Alice's selection holds onto the logical characters initially selected, unaffected by Bob making the total file grow/shrink, and unaffectd by Bob causing the start index of Alice's selction to vary.
-This action models assigning to an aliased substring when the two strings do not overlap at all: one string is Alice's selection, the other is Bob's.
-
-If a third office worker were also watching Alice's and Bob's actions on the whole file (a string with ``all the text'' is kept around), then two further single-user-edit cases give the semantics of the individual edits flowing into the whole.
-But, departing from the document analogy, it is not necessary to keep a such a third string:
-no one has to resource-manage ``the document.''
-When an original string, from which both the Alice- and Bob-parts came, ceases to exist, Alice and Bob are left with two independent strings.
-They are independent because Alice and Bob have no API for growing the bounds of a string to subsume text that may once have been around it.
-
-Edge cases, notably ``Venn-diagram overlap,'' had to have handlings chosen.
-The intent in fleshing out these details was to achieve the above story, with a single API, while keeping the rest as simple as possible.
-The remainder of this section shows the resulting decisions, played out at the API level.
-
-\CFA uses the marker @`share@ as a dynamic mechanism to indicate alias (mutations shared) \vs snapshot (not quite an immutable result, but one with subsequent mutations isolated).
+\begin{comment}
+The metaphor of a GUI text-editor is used to illustrate combining these features.
+Most editors allow selecting a consecutive block of text (highlighted) to define an aliased substring within a document.
+Typing in this area overwrites the prior text, replacing the selected text with less, same, or more text.
+Importantly, the document also changes size, not just the selection.
+%This alias model is assigning to an aliased substring for two strings overlapping by total containment: one is the selected string, the other is the document.
+Extend the metaphor to two selected areas, where one area can be drag-and-dropped into another, changing the text in the drop area and correspondingly changing the document.
+When the selected areas are indenpendent, the semantics of the drag-and-drop are straightforward.
+However, for overlapping selections, either partial or full, there are multiple useful semantics.
+For example, two areas overlap at the top, or bottom, or a block at a corner, where one areas is dropped into the other.
+For selecting a smaller area within a larger, and dropping the smaller area into the larger to replace it.
+In both cases, meaningful semantics must be constructed or the operation precluded.
+However, without this advanced capability, certain operations become multi-step, possible requiring explicit temporaries.
+\end{comment}
+
+A GUI text-editor provides a metaphor.
+Selecting a block of text using the mouse defines an aliased substring within a document.
+Typing in this area overwrites what was there, replacing the originally selected text with more or less text.
+But the \emph{containing document} also grows or shrinks, not just the selection.
+This action models assigning to an aliased substring when one string is completely contained in the other.
+
+Extend the metaphor to a multi-user editor.
+If Alice selects a range of text at the bottom, while Bob is rewriting a paragraph at the top, Alice's selection holds onto the characters initially selected, unaffected by Bob making the document grow/shrink even though Alice's start index in the document is changing.
+This action models assigning to an aliased substring when the two strings do not overlap.
+
+Logically, Alice's and Bob's actions on the whole document are like two single-user-edit cases, giving the semantics of the individual edits flowing into a whole.
+But, there is no need to have two separate document strings.
+Even if a third selection removes all the text, both Alice's and Bob's strings remain.
+The independence of their selections assumes that the editor API does not allow the selection to be enlarged, \ie adding text from the containing environment, which may have disappeared.
+
+This leaves the ``Venn-diagram overlap'' cases, where Alice's and Bob's selections overlap at the top, bottom, or corner.
+In this case, the selection areas are dependent, and so, changes in content and size in one may have an affect in the other.
+There are multiple possible semantics for this case.
+The remainder of this section shows the chosen semantics for all of the cases.
+
+String sharing is expressed using the @`share@ marker to indicate aliasing (mutations shared) \vs snapshot (not quite an immutable result, but one with subsequent mutations isolated).
 This aliasing relationship is a sticky property established at initialization.
 For example, here strings @s1@ and @s1a@ are in an aliasing relationship, while @s2@ is in a copy relationship.
 \input{sharing1.tex}
 Here, the aliasing (@`share@) causes partial changes (subscripting) to flow in both directions.
-(In the following examples, watch how @s1@ and @s1a@ change together, and @s2@ is independent.)
+(In the following examples, note how @s1@ and @s1a@ change together, and @s2@ is independent.)
 \input{sharing2.tex}
 Similarly for complete changes.
@@ -999,5 +1012,5 @@
 \input{sharing4.tex}
 
-Now, consider string @s1_mid@ being an alias in the middle of @s1@, along with @s2@, made by a simple copy from the middle of @s1@.
+Now, consider string @s1_mid@ being an alias in the middle of @s1@, along with @s2@, made by a copy from the middle of @s1@.
 \input{sharing5.tex}
 Again, @`share@ passes changes in both directions; copy does not.
@@ -1020,7 +1033,7 @@
 When @s1_bgn@'s size increases by 3, @s1_mid@'s starting location moves from 1 to 4 and @s1_end@'s from 3 to 6,
 
-When changes happens on an aliasing substring that overlap.
+When changes happen on an aliasing substring that overlap.
 \input{sharing10.tex}
-Strings @s1_crs@ and @s1_mid@ overlap at character 4, @j@ because the substrings are 3,2 and 4,2.
+Strings @s1_crs@ and @s1_mid@ overlap at character 4, @j@, because the substrings are 3,2 and 4,2.
 When @s1_crs@'s size increases by 1, @s1_mid@'s starting location moves from 4 to 5, but the overlapping character remains, changing to @'+'@.
 
@@ -1079,6 +1092,5 @@
 
 
-
-\section{Storage management}
+\section{Storage Management}
 
 This section discusses issues related to storage management of strings.
@@ -1099,10 +1111,9 @@
 const string s1 = "abc";
 \end{cfa}
-the @const@ applies to the @s1@ pointer to @"abc"@, and @"abc"@ is an immutable constant that is \emph{copied} into the string's storage.
-Hence, @s1@ is not pointing at an immutable constant, meaning its underlying string can be mutable, unless some other designation is specified, such as Java's global immutable rule.
-
-
-
-\subsection{General implementation}
+@const@ applies to the @s1@ pointer to @"abc"@, and @"abc"@ is an immutable constant that is \emph{copied} into the string's storage.
+Hence, @s1@ is not pointing at an immutable constant and its underlying string is mutable, unless some other designation is specified, such as Java's global immutable rule.
+
+
+\subsection{General Implementation}
 \label{string-general-impl}
 
@@ -1113,10 +1124,10 @@
 A string is a smart pointer into this buffer.
 
-This cycle of frequent cheap allocations, interspersed with infrequent expensive compactions, has obvious similarities to a general-purpose memory manager based on garbage collection (GC).
+This cycle of frequent cheap allocations, interspersed with infrequent expensive compactions, has obvious similarities to a general-purpose memory-manager based on garbage collection (GC).
 A few differences are noteworthy.
 First, in a general purpose manager, the allocated objects may contain pointers to other objects, making the transitive reachability of these objects a crucial property.
 Here, the allocations are text, so one allocation never keeps another alive.
 Second, in a general purpose manager, the handle that keeps an allocation alive is a bare pointer.
-For strings, a fatter representation is acceptable because this pseudo-pointer is only used for enty into the string-heap, not for general data-sub-structure linking around the general heap.
+For strings, a fatter representation is acceptable because this pseudo-pointer is only used for entry into the string-heap, not for general data-substructure linking around the general heap.
 
 \begin{figure}
@@ -1128,10 +1139,10 @@
 \VRef[Figure]{f:memmgr-basic} shows the representation.
 The heap header and text buffer define a sharing context.
-Normally, one global sharing context is appropriate for an entire program;
-concurrent exceptions are discussed in \VRef{s:ControllingImplicitSharing}.
-A string is a handle into the buffer and node within a linked list.
+Normally, one global context is appropriate for an entire program;
+concurrency is discussed in \VRef{s:ControllingImplicitSharing}.
+A string is a handle to a node in a linked list containing a information about a string text in the buffer.
 The list is doubly linked for $O(1)$ insertion and removal at any location.
 Strings are ordered in the list by text start address.
-The header maintains a next-allocation pointer, @alloc@, pointing to the last live allocation in the buffer.
+The heap header maintains a next-allocation pointer, @alloc@, pointing to the last live allocation in the buffer.
 No external references point into the buffer and the management procedure relocates the text allocations as needed.
 A string handle references a containing string, while its string is contiguous and not null terminated.
@@ -1139,11 +1150,11 @@
 String handles can be allocated in the stack or heap, and represent the string variables in a program.
 Normal C life-time rules apply to guarantee correctness of the string linked-list.
-The text buffer is large enough with good management so that often only one dynamic allocation is necessary during program execution.
+The text buffer is large enough with good management so that often only one dynamic allocation is necessary during program execution, but not so large as to cause program bloat.
 % During this period, strings can vary in size dynamically.
 
 When the text buffer fills, \ie the next new string allocation causes @alloc@ to point beyond the end of the buffer, the strings are compacted.
 The linked handles define all live strings in the buffer, which indirectly defines the allocated and free space in the buffer.
-Since the string handles are in sorted order, the handle list can be traversed, copying the first live text to the start of the buffer, and subsequent strings after each other.
-If, upon compaction, the amount of free storage would still be less than the new string allocation, a larger text buffer is heap-allocated, the current buffer is copied into the new buffer, and the original buffer is freed.
+The string handles are maintained in sorted order, so the handle list can be traversed, copying the first live text to the start of the buffer, and subsequent strings after each other.
+After compaction, if free storage is still be less than the new string allocation, a larger text buffer is heap-allocated, the current buffer is copied into the new buffer, and the original buffer is freed.
 Note, the list of string handles is structurally unaffected during a compaction;
 only the text pointers in the handles are modified to new buffer locations.
@@ -1157,8 +1168,8 @@
 Both string initialization styles preserve the string module's internal invariant that the linked-list order matches the buffer order.
 For string destruction, handles are removed from the list.
-As a result, once a last handle using a run of buffer characters is destroyed, that buffer space gets excluded from the next compaction, making its character-count available in the compacted buffer.
-
-Certain string operations can result in a substring of another string.
-The resulting handle is then placed in the correct sorted position in the list, possible with a short linear search to locate the position.
+Once the last handle using a run of buffer characters is destroyed, that buffer space is excluded from use until the next compaction.
+
+Certain string operations result in a substring of another string.
+The resulting handle is then placed in the correct sorted position in the list, possible requiring a short linear search to locate the position.
 For string operations resulting in a new string, that string is allocated at the end of the buffer.
 For shared-edit strings, handles that originally referenced containing locations need to see the new value at the new buffer location.
@@ -1175,9 +1186,9 @@
 
 
-\subsection{RAII limitations}
+\subsection{RAII Limitations}
 \label{string-raii-limit}
 
 Earlier work on \CFA~\cite[ch.~2]{Schluntz17} implemented object constructors and destructors for all types (basic and user defined).
-A constructor is a user-defined function run implicitly \emph{after} an object's storage is allocated, and a destructor is a user-defined function run \emph{before} an object's storage is deallcated.
+A constructor is a user-defined function run implicitly \emph{after} an object's storage is allocated, and a destructor is a user-defined function run \emph{before} an object's storage is deallocated.
 This feature, called Resource Acquisition Is Initialization (RAII)~\cite[p.~389]{Stroustrup94}, helps guarantee invariants for users before accessing an object and for the programming environment after an object terminates.
 
@@ -1213,19 +1224,20 @@
 \end{cfa}
 A module providing the @T@ type can traverse @all_T@ at relevant times, to keep the objects ``good.''
-Hence, declaring a @T@ not only ensures that it begins with an initially ``good'' value, but it also provides an implicit subscription to a service that keeps the value ``good'' in the future.
+Hence, declaring a @T@ not only ensures that it begins with an initially ``good'' value, but it also provides an implicit subscription to a service that keeps the value ``good'' during its lifetime.
 Again, both \CFA and \CC support this usage style.
 
 A third capability concerns \emph{implicitly} requested copies.
 When stack-allocated objects are used as parameter and return values, a sender's version exists in one stack frame and a receiver's version exists in another.
-In the parameter direction, the language's function-call handling must arrange for a copy-constructor call to happen\footnote{
-	\CC also offers move constructors and return-value optimization~\cite{RVO20}.
-	These features help reduce unhelpful copy-constructor calls, which, for types like the example \lstinline{S}, would lead to extra memory allocations.
-	\CFA does not currently have these features; adding similarly-intended features to \CFA is desirable.
-	However, this section is about a problem in the realization of features that \CFA already supports.
-	To understand the problem presented, the appropriate comparison is with classic versions of \CC that treated such copy-constructor calls as necessary.}
-at a time near the control transfer into the callee, with the source as the caller's (sender's) version and the target as the callee's (receiver's) version.
-(In the return direction, the roles are reversed and the copy-constructor call happens near the return of control.)
-\CC supports this capability without qualification.
-\CFA offers limited support here; simple examples work, but implicit copying does not combine successfully with the other RAII capabilities discussed.
+In the parameter direction, the language's function-call handling must arrange for a copy-constructor call to happen, at a time near the control transfer into the callee. %, with the source as the caller's (sender's) version and the target as the callee's (receiver's) version.
+In the return direction, the roles are reversed and the copy-constructor call happens near the return of control.
+\CC supports this capability.% without qualification.
+\CFA offers limited support;
+simple examples work, but implicit copying does not combine successfully with the other RAII capabilities discussed.
+
+\CC also offers move constructors and return-value optimization~\cite{RVO20}.
+These features help reduce unhelpful copy-constructor calls, which, for types like the @S@ example, would lead to extra memory allocations.
+\CFA does not currently have these features; adding similarly-intended features to \CFA is desirable.
+However, this section is about a problem in the realization of features that \CFA already supports.
+Hence, the comparison continues with the classic version of \CC that treated such copy-constructor calls as necessary.
 
 To summarize the unsupported combinations, the relevant features are:
@@ -1243,5 +1255,5 @@
 At that time, adhering to a principal of minimal intervention, this code could always be treated as passthrough:
 \begin{cfa}
-struct U {...};
+struct U { ... };
 // RAII to go here
 void f( U u ) { F_BODY(u) }
@@ -1249,29 +1261,35 @@
 f( x );
 \end{cfa}
-But adding custom RAII (at ``...here'') changes things.
-The common C++ lowering~\cite[Sec. 3.1.2.3]{cxx:raii-abi} proceeds differently than the present CFA lowering.
-
-\noindent
-\begin{tabular}{l|l}
-\begin{cfa}
-// C++, likely CFA to be
+But adding custom RAII (at ``...go here'') changes things.
+The common \CC lowering~\cite[Sec. 3.1.2.3]{cxx:raii-abi} proceeds differently than the present \CFA lowering.
+\begin{cquote}
+\setlength{\tabcolsep}{15pt}
+\begin{tabular}{@{}l|l@{}}
+\begin{cfa}
+$\C[0.0in]{// \CC, \CFA future}\CRT$
 struct U {...};
 // RAII elided
 void f( U * __u_orig ) {
 	U u = * __u_orig;  // call copy ctor
-	F_BODY(u)
+	F_BODY( u );
 	// call dtor, u
 }
 U x; // call default ctor
-f( & x ) ;
+
+
+f( &x ) ;
+
+
 // call dtor, x
 \end{cfa}
 &
 \begin{cfa}
-// CFA today
+$\C[0.0in]{// \CFA today}\CRT$
 struct U {...};
 // RAII elided
 void f( U u ) {
-	F_BODY(u)
+
+	F_BODY( u );
+
 }
 U x; // call default ctor
@@ -1284,12 +1302,13 @@
 \end{cfa}
 \end{tabular}
-
-In the CFA-today scheme, the lowered form is still using a by-value C call.
-C does a @memcpy@ on structs passed by value.
-And so, @F_BDY@ sees the bits of @__u_for_f@ occurring at an address that has never been presented to the @U@ lifecycle functions.
-If @U@ is trying to have a style-\#2 invariant, it shows up broken in @F_BDY@: references that are supposed to be to @u@ are actually to the different location @__u_for_f@.
-The \CC scheme does not have this problem because it constructs the for-@f@ copy in the correct location.
-
-Yet, the \CFA-today scheme is sufficient to deliver style-\#1 invariants (in this style-\#3 use case) because this scheme still does the correct number of lifecycle calls, using correct values, at correct times.  So, reference-counting or simple ownership applications get their invariants respected under call/return-by-value.
+\end{cquote}
+The current \CFA scheme is still using a by-value C call.
+C does a @memcpy@ on structures passed by value.
+And so, @F_BODY@ sees the bits of @__u_for_f@ occurring at an address that has never been presented to the @U@ lifecycle functions.
+If @U@ is trying to have a style-\#2 invariant, it shows up broken in @F_BODY@: references supposedly to @u@ are actually to @__u_for_f@.
+The \CC scheme does not have this problem because it constructs the for @f@ copy in the correct location within @f@.
+
+Yet, the current \CFA scheme is sufficient to deliver style-\#1 invariants (in this style-\#3 use case) because this scheme still does the correct number of lifecycle calls, using correct values, at correct times.
+So, reference-counting or simple ownership applications get their invariants respected under call/return-by-value.
 
 % [Mike is not currently seeing how distinguishing initialization from assignment is relevant]
@@ -1325,30 +1344,31 @@
 % The following discusses the consequences of this semantics with respect to lifetime management of \CFA strings.
 
-
-The string API offers style \#3's pass-by-value in, for example, in the return of @"a" + "b"@.
+The string API offers style \#3's pass-by-value in, \eg in the return of @"a" + "b"@.
 Its implementation uses the style-\#2 invariant of the string handles being linked to each other, helping to achieve high performance.
-Since these two RAII styles cannont coexist, a workaround splits the API into two layers: one that provides pass-by-value, built upon the other with inter-linked handles.
+Since these two RAII styles cannot coexist, a workaround splits the API into two layers: one that provides pass-by-value, built upon the other with inter-linked handles.
 The layer with pass-by-value incurs a performance penalty, while the layer without delivers the desired runtime performance.
-The slower, friendlier High Level API (HL, type @string@) wrapps the faster, more primitive Low Level API (LL, type @string_res@, abbreviating ``resource'').
+The slower, friendlier High Level API (HL, type @string@) wraps the faster, more primitive Low Level API (LL, type @string_res@, abbreviating ``resource'').
 Both APIs present the same features, up to return-by-value operations being unavailable in LL and implemented via the workaround in HL.
 The intention is for most future code to target HL.
-When the RAII issue is fixed, the full HL feature set will be acheivable using the LL-style lifetime management.
-So then, there will be no need for two API levels; HL will be removed; LL's type will be renamed to @string@; programs written for current HL will run faster.
+When the RAII issue is fixed, the full HL feature set will be achievable using the LL-style lifetime management.
+Then, HL will be removed;
+LL's type will be renamed @string@ and programs written for current HL will run faster.
 In the meantime, performance-critical sections of applications must use LL.
 Subsequent performance experiments \see{\VRef{s:PerformanceAssessment}} use the LL API when comparing \CFA to other languages.
 This measurement gives a fair estimate of the goal state for \CFA.
 A separate measure of the HL overhead is also included.
-
-\VRef[Section]{string-general-impl} described the goal state for \CFA.  In present state, the type @string_res@ replaces its mention of @string@ as inter-linked handle.
-
-To use LL, a programmer rewrites invocations that used pass-by-value APIs into invocations where the resourcing is more explicit.
-Many invocations are unaffected, notably including assignment and comparison.
-Of the capabilities listed in \VRef[Figure]{f:StrApiCompare}, only the following three cases have revisions.
-
-\noindent
+hence, \VRef[Section]{string-general-impl} us describing the goal state for \CFA.
+In present state, the type @string_res@ replaces its mention of @string@ as inter-linked handle.
+
+To use LL, a programmer rewrites invocations using pass-by-value APIs into invocations where resourcing is more explicit.
+Many invocations are unaffected, notably assignment and comparison.
+Of the capabilities listed in \VRef[Figure]{f:StrApiCompare}, only the following three cases need revisions.
+\begin{cquote}
+\setlength{\tabcolsep}{15pt}
 \begin{tabular}{ll}
 HL & LL \\
 \hline
 \begin{cfa}
+
 string s = "a" + "b";
 \end{cfa}
@@ -1363,4 +1383,5 @@
 string s = "abcde";
 string s2 = s(2, 3); // s2 == "cde"
+
 s(2,3) = "x"; // s == "abx" && s2 == "cde"
 \end{cfa}
@@ -1376,4 +1397,5 @@
 \begin{cfa}
 string s = "abcde";
+
 s[2] = "xxx";  // s == "abxxxde"
 \end{cfa}
@@ -1385,14 +1407,12 @@
 \end{cfa}
 \end{tabular}
-
+\end{cquote}
 The actual HL workaround is having @string@ wrap a pointer to a uniquely owned, heap-allocated @string_res@.  This arrangement has @string@ being style-\#1 RAII, which is compatible with pass-by-value.
 
 
-
-\subsection{Sharing implementation}
+\subsection{Sharing Implementation}
 \label{sharing-impl}
 
-The \CFA string module has two mechanisms to handle the case when string handles share a run of text.
-
+The \CFA string module has two mechanisms to deal with string handles sharing text.
 In the first type of sharing, the user requests that both string handles be views of the same logical, modifiable string.
 This state is typically produced by the substring operation.
@@ -1404,6 +1424,6 @@
 $\texttt{\small axcde xc}$
 \end{cfa}
-In a typical substring call, the source string-handle is referencing an entire string, and the resulting, newly made, string handle is referencing a portion of the original.
-In this state, a subsequent modification made by either is visible in both.
+Here, the source string-handle is referencing an entire string, and the resulting, newly made, string handle is referencing a contained portion of the original.
+In this state, a modification made in the overlapping area is visible in both strings.
 
 The second type of sharing happens when the system implicitly delays the physical execution of a logical \emph{copy} operation, as part of its copy-on-write optimization.
@@ -1418,5 +1438,5 @@
 In this state, a subsequent modification done on one handle triggers the deferred copy action, leaving the handles referencing different text within the buffer, holding distinct values.
 
-A further abstraction, in the string module's implementation, helps distinguish the two senses of sharing.
+A further abstraction helps distinguish the two senses of sharing.
 A share-edit set (SES) is an equivalence class over string handles, being the reflexive, symmetric and transitive closure of the relationship of one string being constructed from another, with the ``share'' option given.
 The SES is represented by a second linked list among the handles.
@@ -1427,5 +1447,5 @@
 
 
-\subsection{Controlling implicit sharing}
+\subsection{Controlling Implicit Sharing}
 \label{s:ControllingImplicitSharing}
 
@@ -1434,14 +1454,4 @@
 In detail, string sharing has inter-linked string handles, so managing one string is also managing the neighbouring strings, and from there, a data structure of the ``set of all strings.''
 Therefore, it is useful to toggle this capability on or off when it is not providing any application benefit.
-
-\begin{figure}
-    \begin{tabular}{ll}
-        \lstinputlisting[language=CFA, firstline=10, lastline=55]{sharectx.run.cfa}
-        &
-        \raisebox{-0.17\totalheight}{\includegraphics{string-sharectx.pdf}} % lower
-    \end{tabular}
-	\caption{Controlling copying vs sharing of strings using \lstinline{string_sharectx}.}
-	\label{fig:string-sharectx}
-\end{figure}
 
 The \CFA string library provides the type @string_sharectx@ to control an ambient sharing context.
@@ -1456,14 +1466,22 @@
 Executing the example does not produce an interesting outcome, but the comments in the picture indicate when the logical copy operation runs with
 \begin{description}
-    \item[share:] the copy being deferred, as described through the rest of this section (fast), or
-    \item[copy:] the copy performed eagerly (slow).
+	\item[share:] the copy being deferred, as described through the rest of this section (fast), or
+	\item[copy:] the copy performed eagerly (slow).
 \end{description}
 Only eager copies can cross @string_sharectx@ boundaries.
 The intended use is with stack-managed lifetimes, in which the established context lasts until the current function returns, and affects all functions called that do not create their own contexts.
 
-[ TODO: true up with ``is thread local'' (implement that and expand this discussion to give a concurrent example, or adjust this wording) ]
-
-
-\subsection{Sharing and threading}
+\begin{figure}
+	\begin{tabular}{ll}
+		\lstinputlisting[language=CFA, firstline=10, lastline=55]{sharectx.run.cfa}
+		&
+		\raisebox{-0.17\totalheight}{\includegraphics{string-sharectx.pdf}} % lower
+	\end{tabular}
+	\caption{Controlling copying vs sharing of strings using \lstinline{string_sharectx}.}
+	\label{fig:string-sharectx}
+\end{figure}
+
+
+\subsection{Sharing and Threading}
 
 The \CFA string library provides no thread safety, the same as \CC string, providing similar performance goals.
@@ -1474,50 +1492,8 @@
 
 
-\subsection{Future work}
-
-Implementing the small-string optimization is straightforward, as a string header contains a pointer to the string text in the buffer.
-This pointer could be marked with a flag and contain a small string.
-However, there is now a conditional check required on the fast-path to switch between small and large string operations.
-
-It might be possible to pack 16- or 32-bit Unicode characters within the same string buffer as 8-bit characters.
-Again, locations for identification flags must be found and checked along the fast path to select the correct actions.
-Handling utf8 (variable length), is more problematic because simple pointer arithmetic cannot be used to stride through the variable-length characters.
-Trying to use a secondary array of fixed-sized pointers/offsets to the characters is possible, but raises the question of storage management for the utf8 characters themselves.
-
-
-\section{Performance assessment}
-\label{s:PerformanceAssessment}
-
-I assessed the \CFA string library's speed and memory usage against strings in \CC STL.
-
-Overall, this analysis shows that adding support for the features shown earlier in the chapter comes at no substantial cost in the performance of featrues common to both APIs.
-
-Moreover, the results support the \CFA string's position as a high-level enabler of simplified text processing.
-STL makes its user think about memory management.
-When the user does, and is successful, STL's performance can be very good.
-But when the user fails to think through the consequences of the STL representation, performance becomes poor.
-The \CFA string lets the user work at the level of just putting the right text into right variables, with corresponding performance degradations reduced or eliminated.
-
-% The final test shows the overall win of the \CFA text-sharing mechanism.
-% It exercises several operations together, showing \CFA enabling clean user code to achieve performance that STL requires less-clean user code to achieve.
-
-
-\subsection{Methodology}
-
-These tests use a \emph{corpus} of strings.
-Their lengths are important; the specific characters occurring in them are immaterial.
-In a result graph, a corpus's mean string length is often the independent variable shown on the X axis.
-
-When a corpus contains strings of different lenghths, the lengths are drawn from a geometric distribution.
-Therefore, strings much longer than the mean occur nontrivially and strings slightly shorter than the mean occur most often.
-A corpus's string sizes are one of:
-\begin{description}
-	\item [Fixed-size] all string lengths are of the stated size.
-	\item [Varying 1 and up] the string lengths are drawn from the geometric distribution with a stated mean and all lengths occur.
-	\item [Varying 16 and up] string lengths are drawn from the geometric distribution with the stated mean, but only lengths 16 and above occur; thus, the stated mean is above 16.  \PAB{Is this one unused?  May have just been for ``normalize.''}
-\end{description}
-The special treatment of length 16 deals with the short-string optimization (SSO) in STL @string@, currently not implemented in \CFA, though a fine future improvement to \CFA.
-In the general case, an STL string handle is a pointer (to separately allocated text) and a length.
-But when the text is shorter than this representation, the optimization repurposes the handle's storage to eliminate using the heap.
+\subsection{Short-String Optimization}
+
+\CC implements a short-string ($\le$16) optimization (SSO).
+As a string header contains a pointer to the string text, this pointer can be tagged and used to contain a short string, removing a dynamic memory allocation/deallocation.
 \begin{c++}
 class string {
@@ -1529,67 +1505,95 @@
 		char sstr[sizeof(lstr)]; $\C{// short string <16 characters, text in situ}$
 	};
-	$\C{// tagging for kind (short or long) elided}$
+	// some tagging for short or long strings
 };
 \end{c++}
-
+However, there is now a conditional check required on the fast-path to switch between short and long string operations.
+
+It might be possible to pack 16- or 32-bit Unicode characters within the same string buffer as 8-bit characters.
+Again, locations for identification flags must be found and checked along the fast path to select the correct actions.
+Handling utf8 (variable length), is more problematic because simple pointer arithmetic cannot be used to stride through the variable-length characters.
+Trying to use a secondary array of fixed-sized pointers/offsets to the characters is possible, but raises the question of storage management for the utf8 characters themselves.
+
+
+\section{Performance Assessment}
+\label{s:PerformanceAssessment}
+
+I assessed the \CFA string library's speed and memory usage against strings in \CC STL.
+Overall, this analysis shows that adding support for the features shown earlier in the chapter comes at no substantial cost in the performance of features common to both APIs.
+
+Moreover, the results support the \CFA string's position as a high-level enabler of simplified text processing.
+STL makes its user think about memory management.
+When the user does, and is successful, STL's performance can be very good.
+But when the user fails to think through the consequences of the STL representation, performance becomes poor.
+The \CFA string lets the user work at the level of just putting the right text into the right variables, with corresponding performance degradations reduced or eliminated.
+
+% The final test shows the overall win of the \CFA text-sharing mechanism.
+% It exercises several operations together, showing \CFA enabling clean user code to achieve performance that STL requires less-clean user code to achieve.
+
+
+\subsection{Methodology}
+
+These tests use a \emph{corpus} of strings.
+Their lengths are important; the specific characters occurring in them are immaterial.
+In a result graph, a corpus's mean string length is often the independent variable shown on the X axis.
+
+When a corpus contains strings of different lengths, the lengths are drawn from a geometric distribution.
+Therefore, strings much longer than the mean occur less often and strings slightly shorter than the mean occur most often.
+A corpus's string sizes are one of:
+\begin{description}
+	\item [Fixed-size] all string lengths are of the stated size.
+	\item [Varying 1 and up] the string lengths are drawn from the geometric distribution with a stated mean and all lengths occur.
+	\item [Varying 16 and up] string lengths are drawn from the geometric distribution with the stated mean, but only lengths 16 and above occur; thus, the stated mean is above 16.
+\end{description}
+The special treatment of length 16 deals with the SSO in STL @string@, currently not implemented in \CFA.
 A fixed-size or from-16 distribution ensures that \CC's extra-optimized cases are isolated within, or removed from, the comparison.
-
 In all experiments that use a corpus, its text is generated and loaded into the system under test before the timed phase begins.
-
 To ensure comparable results, a common memory allocator is used for \CFA and \CC.
-\CFA runs the llheap allocator~\cite{Zulfiqar22}; the test rig plugs this same allocator into \CC.
+\CFA runs the llheap allocator~\cite{Zulfiqar22}, which is also plugged into \CC.
 
 The operations being measured take dozens of nanoseconds, so a succession of many invocations is run and timed as a group.
-The experiments run with fixed duration (targeting approximately 5 seconds), stopping upon passing a goal time, as determined by re-checking @clock()@ every 10,000 invocations, which is never more often than once per 80 ms.
-Timing outcomes reprt mean nanoseconds per invocation, which includes harness overhead and the targeted string API execution.
+The experiments run for a fixed duration (5 seconds), as determined by re-checking @clock()@ every 10,000 invocations, which is never more often than once per 80 ms.
+Timing outcomes report mean nanoseconds per invocation, which includes harness overhead and the targeted string API execution.
 
 \PAB{To discuss: hardware and such}
 
-As discussed in \VRef[Section]{string-raii-limit}, general performance comparisons are made using \CFA's faster, low-level string API, whose string type is named @string_res@.
-
-\VRef{s:ControllingImplicitSharing} presents an operational mode where \CFA string sharing is turned off.  In this mode, the \CFA string operates similarly to \CC's, by using a distinct heap allocation for each string's text.
-Some experiments include measurements in this mode for baselining purposes.
-It is called ``\CC emulation mode'' or ``nosharing'' here.
-
+As discussed in \VRef[Section]{string-raii-limit}, general performance comparisons are made using \CFA's faster, low-level string API, named @string_res@.
+\VRef{s:ControllingImplicitSharing} presents an operational mode where \CFA string sharing is turned off.
+In this mode, the \CFA string operates similarly to \CC's, by using a heap allocation for string text.
+Some experiments include measurements in this mode for baselining purposes, called ``\CC emulation mode'' or ``nosharing''.
 
 
 \subsection{Test: Append}
 
-These tests measure the speed of appending strings from the corpus onto a larger, growing string.  They show \CFA performing comparably to \CC overall, though with reduced penalties for simple API misuses for which \CC programmers may not know to watch out.
-
+These tests measure the speed of appending strings from the corpus onto a larger, growing string.
+They show \CFA performing comparably to \CC overall, though with penalties for simple API misuses.
 The basic harness is:
-\begin{cquote}
-\setlength{\tabcolsep}{20pt}
-\begin{cfa}
-START_TIMER
-for ( ... ) {
-	string_res accum;
-	for ( i; 100 ) {
-		accum += corpus[ f(i) ]; // importing from char * here
-		COUNT_ONE_OP_DONE
+\begin{cfa}
+// set alarm duration
+for ( ... ) { $\C[1.5in]{// loop for duration}$
+	for ( i; N ) { $\C{// perform multiple appends (concatenations)}$
+		accum += corpus[ f( i ) ];
 	}
+	count += N; $\C{// count number of appends}\CRT$
 }
-STOP_TIMER
-\end{cfa}
-\end{cquote}
-The harness's outer loop executes until a sample-worthy amount of execution has happened.
-The inner loop builds up the desired-length string with successive appends, before the outer makes it start over from a blank accumulator.
-Each harness run targets a specific (mean) corpus string length and produces one data point on the result graph.
-
+\end{cfa}
+The harness's outer loop executes for the experiment duration.
+The string is reset to empty before appending (not shown).
+The inner loop builds up a growing-length string with successive appends.
+Each run targets a specific (mean) corpus string length and produces one data point on the result graph.
 Three specific comparisons are made with this harness.
 Each picks its own independent-variable basis of comparison.
-
-All three comparisons use the varying-from-1 corpus construction, \ie they allow the STL to show its advantage from small-string optimization.
+All three comparisons use the varying-from-1 corpus construction, \ie they allow the STL to show its advantage for SSO.
 
 
 \subsubsection{Fresh vs Reuse in \CC, Emulation Baseline}
 
-The first experiment compares \CFA with \CC, with \CFA operating in nosharing mode (and \CC having no other mode).
-This experiment simply baselines how \CFA modestly lags \CC's optimization/tuning level generally, yet reproduces a coarser phenomenon.
-
-This experiment also introduces the first \CC coding pitfall, which the next experiment will show is helped by turning on \CFA sharing.  By this pitfall, a \CC programmer must pay attention to string variable reuse.
-
-\begin{cquote}
-\setlength{\tabcolsep}{20pt}
+The first experiment compares \CFA with \CC, with \CFA operating in nosharing mode and \CC having no other mode, hence both string package are using @malloc@/@free@.
+% This experiment establishes a baseline for other experiments.
+This experiment also introduces the first \CC coding pitfall, which the next experiment shows is helped by turning on \CFA sharing.
+% This pitfall shows, a \CC programmer must pay attention to string variable reuse.
+In the following, both programs are doing the same thing: start with @accum@ empty and build it up by appending @N@ strings (type @string@ in \CC and the faster @string_res@ in \CFA).
+\begin{cquote}
+\setlength{\tabcolsep}{40pt}
 \begin{tabular}{@{}ll@{}}
 % \multicolumn{1}{c}{\textbf{fresh}} & \multicolumn{1}{c}{\textbf{reuse}} \\
@@ -1597,7 +1601,7 @@
 
 for ( ... ) {
-	@string_res accum;@       // fresh
-	for ( ... )
-		accum @+=@ ...
+	@string_res accum;@	$\C[1.5in]{// fresh}$
+	for ( N )
+		accum @+=@ ...  $\C{// append}\CRT$
 }
 \end{cfa}
@@ -1606,19 +1610,16 @@
 string_res accum;
 for ( ... ) {
-	@accum = "";@  $\C[1in]{// reuse\CRT}$
-	for ( ... )
-		accum @+=@ ...
+	@accum = "";@  $\C[1.5in]{// reuse}$
+	for ( N )
+		accum @+=@ ...  $\C{// append}\CRT$
 }
 \end{cfa}
 \end{tabular}
 \end{cquote}
-
-Both programs are doing the same thing: start with @x@ empty and build it up by appending the same chunks.
-A programmer should not have to consider this difference.
-But from under the covers, each string being an individual allocation leaks through.
-While the inner loop is appending text to an @x@ that had not yet grown to have a large capacity, the program is, naturally, paying to extend the variable-length allocation, occasionally.
-This capacity stretching is a sticky property that survives assigning a (short, empty-string) value into an existing initialization.
-So, the ``reuse'' version benefits from not growing the allocation on subsequent runs of the inner loop.
-Yet, the ``fresh'' version is constantly restarting from a small buffer.
+The difference is creating a new or reusing an existing string variable.
+The pitfall is that most programmers do not consider this difference.
+However, creating a new variable implies deallocating the previous string storage and allocating new empty storage.
+As the string grows, further deallocations/allocations are required to release the previous and extend the current string storage.
+So, the fresh version is constantly restarting with zero string storage, while the reuse version benefits from having its prior large storage from the last append sequence.
 
 \begin{figure}
@@ -1626,39 +1627,72 @@
 	\includegraphics{plot-string-peq-cppemu.pdf}
 %	\includegraphics[width=\textwidth]{string-graph-peq-cppemu.png}
-	\caption{Fresh vs Reuse in \CC, Emulation Baseline.  Average time per iteration with one \lstinline{x += y} invocation (lower is better).  Comparing \CFA's STL emulation mode with STL implementations, and comparing the ``fresh'' with ``reused'' reset styles.}
+	\caption{Fresh vs Reuse in \CC, Emulation Baseline.
+	Average time per iteration with one \lstinline{x += y} invocation (lower is better).
+	Comparing \CFA's STL emulation mode with STL implementations, and comparing the fresh with reused reset styles.}
 	\label{fig:string-graph-peq-cppemu}
-\end{figure}
-
-\VRef[Figure]{fig:string-graph-peq-cppemu} shows the resulting performance.
-The fresh \vs reuse penalty is the dominant difference.
-The cost is 40\% averaged over the cases shown and minimally 24\%.
-It shows up consistently on both the \CFA and STL implementations, and this cost is more prominent with larger strings.
-
-The lesser \CFA \vs STL difference shows \CFA reproducing STL's performance, up to a 15\% penalty averaged over the cases shown, diminishing with larger strings, and 50\% in the worst case.
-This penalty characterizes implementation fine tuning done with STL and not done yet done with \CFA.
-
-
-\subsubsection{\CFA's Fresh-Reuse Compromise}
-
-This comparison has the same setup as the last one, except that the \CFA implementation is switched to use its sharing mode.  The outcome is that the fresh/reuse difference vanishes in \CFA, with \CFA consistently delivering performance that compromises between the two \CC cases.
-
-\begin{figure}
-\centering
+	\bigskip
+	\bigskip
 	\includegraphics{string-graph-peq-sharing.pdf}
 %	\includegraphics[width=\textwidth]{string-graph-peq-sharing.png}
-	\caption{\CFA Compromise for Fresh \vs Reuse.  Average time per iteration with one \lstinline{x += y} invocation (lower is better).  Comparing \CFA's sharing mode with STL, and comparing the ``fresh'' with ``reused'' reset styles.  The \CC results are repeated from \ref{fig:string-graph-peq-cppemu}.}
+	\caption{\CFA Compromise for Fresh \vs Reuse.
+	Average time per iteration with one \lstinline{x += y} invocation (lower is better).
+	Comparing \CFA's sharing mode with STL, and comparing the fresh with reused reset styles.
+	The \CC results are repeated from \VRef[Figure]{fig:string-graph-peq-cppemu}.}
 	\label{fig:string-graph-peq-sharing}
 \end{figure}
 
-\VRef[Figure]{fig:string-graph-peq-sharing} has the result.
-At append lengths 5 and above, \CFA not only splits the two STL cases, but its slowdown of 16\% over STL with user-managed reuse is close to the baseline \CFA-v-STL implementation difference seen with \CFA in STL-emulation mode.
-
-
-\subsubsection{\CFA's low overhead for misusing \lstinline{+}}
-
-A further pitfall occurs when the user writes @x = x + y@, rather than @x += y@.  Again, they are logically equivalent.
+\VRef[Figure]{fig:string-graph-peq-cppemu} shows the resulting performance.
+The two fresh (solid) lines and the two reuse (dash) lines are identical, except for lengths $\le$10, where the \CC SSO has a 40\% average and minimally 24\% advantage.
+The gap between the fresh and reuse lines is the removal of the dynamic memory allocates and reuse of prior storage, \eg 100M allocations for fresh \vs 100 allocations for reuse across all experiments.
+While allocation reduction is huge, data copying dominates the cost, so the lines are still reasonably close together.
+
+
+\subsubsection{\CFA's Sharing Mode}
+
+This comparison is the same as the last one, except the \CFA implementation is using sharing mode.
+Hence, both \CFA's fresh and reuse versions have no memory allocations, and as before, only for reuse does \CC have no memory allocations.
+\VRef[Figure]{fig:string-graph-peq-sharing} shows the resulting performance.
+For fresh at append lengths 5 and above, \CFA is now closer to the \CC reuse performance, because of removing the dynamic allocations.
+However, for reuse, \CFA has slowed down slightly, to performance matching the new fresh version, as the two versions are now implemented virtually the same.
+The reason for the \CFA reuse slow-down is the overhead of managing the sharing scheme (primarily maintaining the list of handles), without gaining any benefit.
+
+\begin{comment}
+FIND A HOME!!!
+The potential benefits of the sharing scheme do not give \CFA an edge over \CC when appending onto a reused string, though the first one helps \CFA win at going onto a fresh string.  These abilities are:
+\begin{itemize}
+\item
+To grow a text allocation repeatedly without copying it elsewhere.
+This ability is enabled by \CFA's most-recently modified string being located immediately before the text buffer's \emph{shared} bump-pointer area, \ie often a very large greenfield, relative to the \emph{individual} string being grown.
+With \CC-reuse, this benefit is already reaped by the user's reuse of a pre-stretched allocation.
+Yet \CC-fresh pays the higher cost because its room to grow for free is at most a constant times the original string's length.
+\item
+To share an individual text allocation across multiple related strings.
+This ability is not applicable to appending with @+=@.
+It in play in [xref sub-experiment pta] and [xref experiment pbv].
+\item
+To share a text arena across unrelated strings, sourcing disparate allocations from a common place.
+That is, always allocating from a bump pointer, and never maintaining free lists.
+This ability is not relevant to running any append scenario on \CFA with sharing, because appending modifies an existing allocation and is not driving several allocations.
+This ability is assessed in [xref experiment allocn].
+\end{itemize}
+This cost, of slowing down append-with-reuse, is \CFA paying the piper for other scenarios done well.
+\CFA prioritizes the fresh use case because it is more natural.
+The \emph{user-invoked} reuse scheme is an unnatural programming act because it deliberately misuses lexical scope: a variable (@accum@) gets its lifetime extended beyond the scope in which it is used.
+
+A \CFA user needing the best performance on an append scenario can still access the \CC-like speed by invoking noshare.
+This (indirect) resource management is memory-safe, as compared to that required in \CC to use @string&@, where knowledge of another string's lifetime comes into play.
+This abstraction opt-out is also different from invoking the LL API-level option.
+In fact, these considerations are orthogonal.
+But the key difference is that invoking the LL API would be a temporary measure, to use a workaround of a known \CFA language issue; choosing to exempt a string from sharing is a permanent act of program tuning.
+Beyond these comparisons, opting for noshare actually provides program ``eye candy,'' indicating that under-the-hood thinking is becoming relevant here.
+\end{comment}
+
+
+\subsubsection{Misusing Concatenation}
+
+A further pitfall occurs writing the apparently equivalent @x = x + y@ \vs @x += y@.
 For numeric types, the generated code is equivalent, giving identical performance.
 However, for string types there can be a significant difference.
-This pitfall is a particularly likely hazard for beginners.
+This pitfall is a particularly likely for beginners.
 
 In earlier experiments, the choice of \CFA API among HL and LL had no impact on the functionality being tested.
@@ -1708,5 +1742,5 @@
 \end{tabular}
 \end{cquote}
-Note that this ``Goal'' code functions today in HL.
+Note, the goal code functions today in HL but with slower performance.
 
 \begin{figure}
@@ -1714,21 +1748,23 @@
 	\includegraphics{string-graph-pta-sharing.pdf}
 %	\includegraphics[width=\textwidth]{string-graph-pta-sharing.png}
-	\caption{CFA's low overhead for misusing \lstinline{+}.  Average time per iteration with one \lstinline{x += y} invocation (lower is better). Comparing \CFA (having implicit sharing activated) with STL, and comparing the \lstinline{+}-then-\lstinline{=} with the \lstinline{+=} append styles.  The \lstinline{+=} results are repeated from \VRef[Figure]{fig:string-graph-peq-sharing}.}
+	\caption{CFA's low overhead for misusing concatenation.  Average time per iteration with one \lstinline{x += y} invocation (lower is better). Comparing \CFA (having implicit sharing activated) with STL, and comparing the \lstinline{+}-then-\lstinline{=} with the \lstinline{+=} append styles.  The \lstinline{+=} results are repeated from \VRef[Figure]{fig:string-graph-peq-sharing}.}
 	\label{fig:string-graph-pta-sharing}
 \end{figure}
 
-\VRef[Figure]{fig:string-graph-pta-sharing} gives the outcome.  The STL's penalty is $8 \times$ while \CFA's is only $2 \times$, averaged across the cases shown here.
+\VRef[Figure]{fig:string-graph-pta-sharing} gives the outcome, where the Y-axis is log scale because of the large differences.
+The STL's penalty is $8 \times$ while \CFA's is only $2 \times$, averaged across the cases shown here.
 Moreover, the STL's gap increases with string size, while \CFA's converges.
 So again, \CFA helps users who just want to treat strings as values, and not think about the resource management under the covers.
 
-While not a design goal, and not graphed out, \CFA in STL-emulation mode heppened to outperform STL in this case.  User-managed allocation reuse did not affect either implementation in this case; only ``fresh'' results are shown.
-
-
-\subsection{Test: Pass argument}
+While not a design goal, and not graphed, \CFA in STL-emulation mode outperformed STL in this case.
+User-managed allocation reuse did not affect either implementation in this case; only ``fresh'' results are shown.
+
+
+\subsection{Test: Pass Argument}
 
 STL has a penalty for passing a string by value, which forces users to think about memory management when communicating values with a function.
-The key \CFA value-add is that a user can think of a string simply as a value; this test shows that \CC charges a stiff penalty for thining this way, while \CFA does not.
+The key \CFA value-add is that a user can think of a string simply as a value; this test shows that \CC charges a stiff penalty for thinking this way, while \CFA does not.
 This test illustrates a main advantage of the \CFA sharing algorithm (in one case).
-It shows STL's small-string optimization providing a successful mitigation (in the other case).
+It shows STL's SSO providing a successful mitigation (in the other case).
 
 The basic operation considered is:
@@ -1749,10 +1785,8 @@
 
 }
-START_TIMER
-for ( i; ... ) {
-	helper( corpus[ f(i) ] ); // imported from char * previously
-	COUNT_ONE_OP_DONE
+for ( ... ) { // loop for duration
+	helper( corpus[ f( i ) ] );
+	count += 1;
 }
-STOP_TIMER
 \end{cfa}
 &
@@ -1761,18 +1795,14 @@
 	string_res q = { qref, COPY_VALUE };
 }
-// rest same, elided
-
-
-
-
-
-\end{cfa}
-\end{tabular}
-\end{cquote}
-The Goal (HL) version gives the simplest sketch of the test harness.
-It uses a single level of looping.
-Each iteration uses a corpus item as the argument to a function call.
+
+
+
+
+\end{cfa}
+\end{tabular}
+\end{cquote}
+The goal (HL) version gives the modified test harness, with a single loop.
+Each iteration uses a corpus item as the argument to the function call.
 These corpus items were imported to the string heap before beginning the timed run.
-
 
 \begin{figure}
@@ -1781,5 +1811,5 @@
 %	\includegraphics[width=\textwidth]{string-graph-pbv.png}
 	\caption{Average time per iteration (lower is better) with one call to a function that takes a by-value string argument, comparing \CFA (having implicit sharing activated) with STL.
-(a) With \emph{Varying-from-1} corpus construction, in which the STL-only benefit of small-string optimization occurs, in varying degrees, at all string sizes.
+(a) With \emph{Varying-from-1} corpus construction, in which the STL-only benefit of SSO optimization occurs, in varying degrees, at all string sizes.
 (b) With \emph{Fixed-size} corpus construction, in which this benefit applies exactly to strings with length below 16.
 [TODO: show version (b)]}
@@ -1787,13 +1817,12 @@
 \end{figure}
 
-
 \VRef[Figure]{fig:string-graph-pbv} shows the costs for calling a function that receives a string argument by value.
-STL's performance worsens as string length increases, while \CFA has the same performance at all sizes.
-
+STL's performance worsens uniformly as string length increases, while \CFA has the same performance at all sizes.
+Although the STL is better than \CFA until string length 10 because of the SSO.
 While improved, the \CFA cost to pass a string is still nontrivial.
 The contributor is adding and removing the callee's string handle from the global list.
-This cost is $1.5 \times$ to $2 \times$ over STL's when small-string optimization applies, though this cost should be avoidable in the same case, upon a \CFA realization of this optimization.
-At the larger sizes, when STL has to manage storage for the string, STL runs more than $3 \times$ slower, mainly due to time spent in the general-purpose memory allocator.
-\PAB{Need to check that.  Expecting copying to dominate.}
+This cost is $1.5 \times$ to $2 \times$ over STL's when SSO applies, but is avoidable once \CFA realizes this optimization.
+At the larger sizes, the STL runs more than $3 \times$ slower, because it has to allocation/deallocate storage for the parameter and copy the argument string to the parameter.
+If the \CC string is passed by reference, the results are better and flat across string lengths like \CFA.
 
 
@@ -1805,6 +1834,7 @@
 
 A garbage collector, afforded the freedom of managed memory (where it knows about all the pointers and is allowed to modify them), often runs faster than malloc-free in an amortized analysis, even though it must occasionally stop to collect.
-The sppedup happens because GC is able to use its collection time to move objects.
-(In the case of the mini-allocator powering the \CFA string library, objects are runs of text.)  Moving objects lets fresh allocations consume from a large contiguous store of available memory; the ``bump pointer'' bookkeeping for such a scheme is very light.
+The speedup happens because GC is able to use its collection time to move objects.
+(In the case of the mini-allocator powering the \CFA string library, objects are runs of text.)
+Moving objects lets fresh allocations consume from a large contiguous store of available memory; the ``bump pointer'' bookkeeping for such a scheme is very light.
 A malloc-free implementation without the freedom to move objects must, in the general case, allocate in the spaces between existing objects; doing so entails the heavier bookkeeping of maintaining a linked structure of freed allocations and/or coalescing freed allocations.