Context Navigation

-              rd914750
+              r931f1b4
 \section{String Operations}
+% https://en.wikipedia.org/wiki/Comparison_of_programming_languages_(string_functions)
 \VRef[Figure]{f:StrApiCompare} shows a general comparison of string APIs for C, \CC, Java and \CFA.
 …
 @strlen@                                & @length@, @size@              & @length@                      & @size@        \\
 @[ ]@                                   & @[ ]@                                 & @charAt@          & @[ ]@     \\
 @strncpy@                               & @substr@                              & @substring@       & @( )@     \\
 @strncpy@                               & @replace@                             & @replace@         & @=@ \emph{(on a substring)}\\
+@strncpy@                               & @substr@                              & @substring@       & @( )@ RHS @=@     \\
+@strncpy@                               & @replace@                             & @replace@         & @( )@ LHS @=@ \\
 @strstr@                                & @find@                                & @indexOf@         & @find@ \\
 @strcspn@                               & @find_first_of@               & @matches@         & @include@ \\
 …
 \begin{cquote}
 \sf
 \begin{tabular}{@{}rrrrrl@{}}
 \small\tt a & \small\tt b & \small\tt c & \small\tt d & \small\tt e \\
+\begin{tabular}{@{}rrrrll@{}}
+\small\tt "a & \small\tt b & \small\tt c & \small\tt d & \small\tt e" \\
 & 1 & 2 & 3 & 4 & left to right index \\
 -5 & -4 & -3 & -2 & -1 & right to left index
 …
 \begin{cfa}
 #include @<string.hfa>@
 @string@ s = "abcde", name = "MIKE", digit, alpha, punctuation, ifstmt;
+@string@ s = "abcde", name = "MIKE", digit = "0123456789";
 const char cs[] = "abc";
 int i;
+digit  = "0123456789";
+punctuation = "().,";
+ifstmt = "IF (A > B) {";
+\end{cfa}
+Note, the include file @string.hfa@ to access type @string@.
+\end{cfa}
+Note, the include file @<string.hfa>@ to access type @string@.
 …
 Extending the pattern to a regular expression is a possible extension.
+\subsection{Searching}
+The @index@ operation
+\begin{cfa}
+int index( const string & key, int start = 1, occurrence occ = first );
+\end{cfa}
+returns the position of the first or last occurrence of the @key@ (depending on the occurrence indicator @occ@ that is either @first@ or @last@) in the current string starting the search at position @start@.
+If the @key@ does not appear in the current string, the length of the current string plus one is returned.
+%If the @key@ has zero length, the value 1 is returned regardless of what the current string contains.
+A negative starting position is a specification from the right end of the string.
+The replace operation returns a string in which all occurrences of a substring are replaced by another string.
 \begin{cquote}
 \setlength{\tabcolsep}{15pt}
 \begin{tabular}{@{}l|l@{}}
 \begin{cfa}
+i = find( digit, "567" );
+i = find( digit, "567", 7 );
+i = digit.index( "567", -1, last );
+i = name.index( "E", 5, last );
+\end{cfa}
+&
+\begin{cfa}
+s = replace( "PETER", "E", "XX" );
+s = replace( "PETER", "ET", "XX" );
+s = replace( "PETER", "W", "XX" );
+\end{cfa}
+&
+\begin{cfa}
+"PXXTXXR"
+"PXXER"
+"PETER"
+\end{cfa}
+\end{tabular}
+\end{cquote}
+The replacement is done left-to-right and substituted text is not examined for replacement.
+\subsection{Searching}
+The find operation returns the position of the first occurrence of a key string in a string.
+If the key does not appear in the current string, the length of the current string plus one is returned.
+\begin{cquote}
+\setlength{\tabcolsep}{15pt}
+\begin{tabular}{@{}l|l@{}}
+\begin{cfa}
+i = find( digit, '3' );
+i = "45" ^ digit; // python style "45" in digit
+string x = "567";
+i = find( digit, x );
+\end{cfa}
+&
+\begin{cfa}
+\end{cfa}
+\end{tabular}
+\end{cquote}
+The next two string operations test a string to see if it is or is not composed completely of a particular class of characters.
+For example, are the characters of a string all alphabetic or all numeric?
+Use of these operations involves a two step operation.
+First, it is necessary to create an instance of type @strmask@ and initialize it to a string containing the characters of the particular character class, as in:
+\begin{cfa}
+strmask digitmask = digit;
+strmask alphamask = string( "abcdefghijklmnopqrstuvwxyz" );
+\end{cfa}
+Second, the character mask is used in the functions @include@ and @exclude@ to check a string for compliance of its characters with the characters indicated by the mask.
+The @include@ operation
+\begin{cfa}
+int include( const strmask &, int = 1, occurrence occ = first );
+\end{cfa}
+returns the position of the first or last character (depending on the occurrence indicator, which is either @first@ or @last@) in the current string that does not appear in the @mask@ starting the search at position @start@;
+hence it skips over characters in the current string that are included (in) the @mask@.
+The characters in the current string do not have to be in the same order as the @mask@.
+If all the characters in the current string appear in the @mask@, the length of the current string plus one is returned, regardless of which occurrence is being searched for.
+A negative starting position is a specification from the right end of the string.
+\begin{cfa}
+i = name.include( digitmask );          $\C{// i is assigned 1}$
+i = name.include( alphamask );          $\C{// i is assigned 6}$
+\end{cfa}
+The @exclude@ operation
+\begin{cfa}
+int exclude( string &mask, int start = 1, occurrence occ = first )
+\end{cfa}
+returns the position of the first or last character (depending on the occurrence indicator, which is either @first@ or @last@) in the current string that does appear in the @mask@ string starting the search at position @start@;
+hence it skips over characters in the current string that are excluded from (not in) in the @mask@ string.
+The characters in the current string do not have to be in the same order as the @mask@ string.
+If all the characters in the current string do NOT appear in the @mask@ string, the length of the current string plus one is returned, regardless of which occurrence is being searched for.
+A negative starting position is a specification from the right end of the string.
+\begin{cfa}
+i = name.exclude( digitmask );          $\C{// i is assigned 6}$
+i = ifstmt.exclude( strmask( punctuation ) ); $\C{// i is assigned 4}$
+\end{cfa}
+The @includeStr@ operation:
+\begin{cfa}
+string includeStr( strmask &mask, int start = 1, occurrence occ = first )
+\end{cfa}
+returns the longest substring of leading or trailing characters (depending on the occurrence indicator, which is either @first@ or @last@) of the current string that ARE included in the @mask@ string starting the search at position @start@.
+A negative starting position is a specification from the right end of the string.
+\begin{cfa}
+s = name.includeStr( alphamask );       $\C{// s is assigned "MIKE"}$
+s = ifstmt.includeStr( alphamask );     $\C{// s is assigned "IF"}$
+s = name.includeStr( digitmask );       $\C{// s is assigned ""}$
+\end{cfa}
+The @excludeStr@ operation:
+\begin{cfa}
+string excludeStr( strmask &mask, int start = 1, occurrence = first )
+\end{cfa}
+returns the longest substring of leading or trailing characters (depending on the occurrence indicator, which is either @first@ or @last@) of the current string that are excluded (NOT) in the @mask@ string starting the search at position @start@.
+A negative starting position is a specification from the right end of the string.
+\begin{cfa}
+s = name.excludeStr( digitmask);        $\C{// s is assigned "MIKE"}$
+s = ifstmt.excludeStr( strmask( punctuation ) ); $\C{// s is assigned "IF "}$
+s = name.excludeStr( alphamask);        $\C{// s is assigned ""}$
+\end{cfa}
+\subsection{Miscellaneous}
+The @trim@ operation
+\begin{cfa}
+string trim( string &mask, occurrence occ = first )
+\end{cfa}
+returns a string in that is the longest substring of leading or trailing characters (depending on the occurrence indicator, which is either @first@ or @last@) which ARE included in the @mask@ are removed.
+\begin{cfa}
+// remove leading blanks
+s = string( "   ABC" ).trim( " " );     $\C{// s is assigned "ABC",}$
+// remove trailing blanks
+s = string( "ABC   " ).trim( " ", last ); $\C{// s is assigned "ABC",}$
+\end{cfa}
+The @translate@ operation
+\begin{cfa}
+string translate( string &from, string &to )
+\end{cfa}
+returns a string that is the same length as the original string in which all occurrences of the characters that appear in the @from@ string have been translated into their corresponding character in the @to@ string.
+Translation is done on a character by character basis between the @from@ and @to@ strings; hence these two strings must be the same length.
+If a character in the original string does not appear in the @from@ string, then it simply appears as is in the resulting string.
+\begin{cfa}
+// upper to lower case
+name = name.translate( "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz" );
+                        // name is assigned "name"
+s = ifstmt.translate( "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz" );
+                        // ifstmt is assigned "if (a > b) {"
+// lower to upper case
+name = name.translate( "abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ" );
+                        // name is assigned "MIKE"
+\end{cfa}
+The @replace@ operation
+\begin{cfa}
+string replace( string &from, string &to )
+\end{cfa}
+returns a string in which all occurrences of the @from@ string in the current string have been replaced by the @to@ string.
+\begin{cfa}
+s = name.replace( "E", "XX" );          $\C{// s is assigned "PXXTXXR"}$
+\end{cfa}
+The replacement is done left-to-right.
+When an instance of the @from@ string is found and changed to the @to@ string, it is NOT examined again for further replacement.
+\end{cfa}
+\end{tabular}
+\end{cquote}
+The character-class operations indicates if a string is composed completely of a particular class of characters, \eg, alphabetic, numeric, vowels, \etc.
+\begin{cquote}
+\setlength{\tabcolsep}{15pt}
+\begin{tabular}{@{}l|l@{}}
+\begin{cfa}
+charclass vowels{ "aeiouy" };
+i = include( "aaeiuyoo", vowels );
+i = include( "aabiuyoo", vowels );
+\end{cfa}
+&
+\begin{cfa}
+  // compliant
+  // b non-compliant
+\end{cfa}
+\end{tabular}
+\end{cquote}
+@vowels@ defines a character class and function @include@ checks if all characters in the string are included in the class (compliance).
+The position of the last character plus 1 is return if the string is compliant or the position of the first non-compliant character.
+There is no relationship between the order of characters in the two strings.
+Function @exclude@ is the reverse of @include@, checking if all characters in the string are excluded from the class (compliance).
+\begin{cquote}
+\setlength{\tabcolsep}{15pt}
+\begin{tabular}{@{}l|l@{}}
+\begin{cfa}
+i = exclude( "cdbfghmk", vowels );
+i = exclude( "cdyfghmk", vowels );
+\end{cfa}
+&
+\begin{cfa}
+  // compliant
+  // y non-compliant
+\end{cfa}
+\end{tabular}
+\end{cquote}
+Both forms can return the longest substring of compliant characters.
+\begin{cquote}
+\setlength{\tabcolsep}{15pt}
+\begin{tabular}{@{}l|l@{}}
+\begin{cfa}
+s = include( "aaeiuyoo", vowels );
+s = include( "aabiuyoo", vowels );
+s = exclude( "cdbfghmk", vowels );
+s = exclude( "cdyfghmk", vowels );
+\end{cfa}
+&
+\begin{cfa}
+"aaeiuyoo"
+"aa"
+"cdbfghmk"
+"cd"
+\end{cfa}
+\end{tabular}
+\end{cquote}
+The test operation checks if each character in a string is in one of the C character classes.
+\begin{cquote}
+\setlength{\tabcolsep}{15pt}
+\begin{tabular}{@{}l|l@{}}
+\begin{cfa}
+i = test( "1FeC34aB", @isxdigit@ );
+i = test( ".,;'!\"", @ispunct@ );
+i = test( "XXXx", @isupper@ );
+\end{cfa}
+&
+\begin{cfa}
+   // compliant
+   // compliant
+   // non-compliant
+\end{cfa}
+\end{tabular}
+\end{cquote}
+The position of the last character plus 1 is return if the string is compliant or the position of the first non-compliant character.
+Combining substring and search allows actions like trimming whitespace from the start of a line.
+\begin{cquote}
+\setlength{\tabcolsep}{15pt}
+\begin{tabular}{@{}l|l@{}}
+\begin{cfa}
+string line = "  \t  xxx yyy zzz";
+string trim = line( test( line, isspace ) );
+\end{cfa}
+&
+\begin{cfa}
+"xxx yyy zzz"
+\end{cfa}
+\end{tabular}
+\end{cquote}
+The translate operation returns a string with each character transformed by one of the C character transformation functions.
+\begin{cquote}
+\setlength{\tabcolsep}{15pt}
+\begin{tabular}{@{}l|l@{}}
+\begin{cfa}
+s = translate( "abc", @toupper@ );
+s = translate( "ABC", @tolower@ );
+int tospace( int c ) { return isspace( c ) ? ' ' : c; }
+s = translate( "X X\tX\nX", @tospace@ );
+\end{cfa}
+&
+\begin{cfa}
+"ABC"
+"abc"
+"X X X X"
+\end{cfa}
+\end{tabular}
+\end{cquote}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 931f1b4 for doc/theses/mike_brooks_MMath/string.tex

Legend:

doc/theses/mike_brooks_MMath/string.tex

Download in other formats: