| 1 | //
 | 
|---|
| 2 | // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
 | 
|---|
| 3 | //
 | 
|---|
| 4 | // The contents of this file are covered under the licence agreement in the
 | 
|---|
| 5 | // file "LICENCE" distributed with Cforall.
 | 
|---|
| 6 | //
 | 
|---|
| 7 | // string_res -- variable-length, mutable run of text, with resource semantics
 | 
|---|
| 8 | //
 | 
|---|
| 9 | // Author           : Michael L. Brooks
 | 
|---|
| 10 | // Created On       : Fri Sep 03 11:00:00 2021
 | 
|---|
| 11 | // Last Modified By : Peter A. Buhr
 | 
|---|
| 12 | // Last Modified On : Wed Feb  7 21:24:40 2024
 | 
|---|
| 13 | // Update Count     : 59
 | 
|---|
| 14 | //
 | 
|---|
| 15 | 
 | 
|---|
| 16 | #pragma once
 | 
|---|
| 17 | 
 | 
|---|
| 18 | #include <fstream.hfa>
 | 
|---|
| 19 | #include <string.h>    // e.g. strlen
 | 
|---|
| 20 | 
 | 
|---|
| 21 |     
 | 
|---|
| 22 | //######################### HandleNode #########################
 | 
|---|
| 23 | //private
 | 
|---|
| 24 | 
 | 
|---|
| 25 | struct VbyteHeap;
 | 
|---|
| 26 | 
 | 
|---|
| 27 | struct HandleNode {
 | 
|---|
| 28 |     HandleNode *flink;                                  // forward link
 | 
|---|
| 29 |     HandleNode *blink;                                  // backward link
 | 
|---|
| 30 |     VbyteHeap *ulink;                   // upward link
 | 
|---|
| 31 | 
 | 
|---|
| 32 |     char *s;                                                    // pointer to byte string
 | 
|---|
| 33 |     unsigned int lnth;                                  // length of byte string
 | 
|---|
| 34 | }; // HandleNode
 | 
|---|
| 35 | 
 | 
|---|
| 36 | VbyteHeap * DEBUG_string_heap();
 | 
|---|
| 37 | size_t DEBUG_string_bytes_in_heap( VbyteHeap * heap );
 | 
|---|
| 38 | size_t DEBUG_string_bytes_avail_until_gc( VbyteHeap * heap );
 | 
|---|
| 39 | const char * DEBUG_string_heap_start( VbyteHeap * heap );
 | 
|---|
| 40 | 
 | 
|---|
| 41 | void TUNING_set_string_heap_liveness_threshold( double val );
 | 
|---|
| 42 | 
 | 
|---|
| 43 | //######################### String #########################
 | 
|---|
| 44 | 
 | 
|---|
| 45 | // A dynamically-sized string
 | 
|---|
| 46 | struct string_res {
 | 
|---|
| 47 |     HandleNode Handle; // chars, start, end, global neighbours
 | 
|---|
| 48 |     bool shareEditSet_owns_ulink;
 | 
|---|
| 49 |     string_res * shareEditSet_prev;
 | 
|---|
| 50 |     string_res * shareEditSet_next;
 | 
|---|
| 51 | };
 | 
|---|
| 52 | 
 | 
|---|
| 53 | 
 | 
|---|
| 54 | //######################### charclass_res #########################
 | 
|---|
| 55 | 
 | 
|---|
| 56 | struct charclass_res {
 | 
|---|
| 57 |     string_res chars;
 | 
|---|
| 58 | };
 | 
|---|
| 59 | 
 | 
|---|
| 60 | void ?{}( charclass_res & ) = void;
 | 
|---|
| 61 | void ?{}( charclass_res &, charclass_res) = void;
 | 
|---|
| 62 | charclass_res ?=?( charclass_res &, charclass_res) = void;
 | 
|---|
| 63 | void ?{}( charclass_res &, const string_res & chars);
 | 
|---|
| 64 | void ?{}( charclass_res &, const char * chars );
 | 
|---|
| 65 | void ?{}( charclass_res &, const char * chars, size_t charssize );
 | 
|---|
| 66 | void ^?{}( charclass_res & );
 | 
|---|
| 67 | 
 | 
|---|
| 68 | 
 | 
|---|
| 69 | //######################### String #########################
 | 
|---|
| 70 | 
 | 
|---|
| 71 | // Getters
 | 
|---|
| 72 | size_t size(const string_res & s);
 | 
|---|
| 73 | 
 | 
|---|
| 74 | // Constructors, Assignment Operators, Destructor
 | 
|---|
| 75 | void ?{}(string_res & s); // empty string
 | 
|---|
| 76 | void ?{}(string_res & s, const char * buffer, size_t bsize); // copy specific length from buffer
 | 
|---|
| 77 | static inline void ?{}(string_res & s, const char * rhs) { // copy from string literal (NULL-terminated)
 | 
|---|
| 78 |     (s){ rhs, strlen(rhs) };
 | 
|---|
| 79 | }
 | 
|---|
| 80 | static inline void ?{}(string_res & s, char c ) {
 | 
|---|
| 81 |     ?{}( s, &c, 1);
 | 
|---|
| 82 | }
 | 
|---|
| 83 | 
 | 
|---|
| 84 | // Deleting the copy constructors makes the compiler reject an attempt to call/return by value
 | 
|---|
| 85 | void ?{}(string_res & s, const string_res & s2) = void;
 | 
|---|
| 86 | void ?{}(string_res & s, string_res & s2) = void;
 | 
|---|
| 87 | 
 | 
|---|
| 88 | enum StrResInitMode { COPY_VALUE, SHARE_EDITS };
 | 
|---|
| 89 | void ?{}(string_res & s, const string_res & src, StrResInitMode, size_t start, size_t len );
 | 
|---|
| 90 | static inline void ?{}(string_res & s, const string_res & src, StrResInitMode mode ) {
 | 
|---|
| 91 |     ?{}( s, src, mode, 0, size(src));
 | 
|---|
| 92 | }
 | 
|---|
| 93 | static inline void ?{}(string_res & s, const string_res & src, StrResInitMode mode, size_t maxlen ) {
 | 
|---|
| 94 |     ?{}( s, src, mode, 0, (size(src) > maxlen)?maxlen:size(src) );
 | 
|---|
| 95 | }
 | 
|---|
| 96 | void ?{}( string_res & s, ssize_t rhs );
 | 
|---|
| 97 | void ?{}( string_res & s, size_t rhs );
 | 
|---|
| 98 | void ?{}( string_res & s, double rhs );
 | 
|---|
| 99 | void ?{}( string_res & s, long double rhs );
 | 
|---|
| 100 | void ?{}( string_res & s, double _Complex rhs );
 | 
|---|
| 101 | void ?{}( string_res & s, long double _Complex rhs );
 | 
|---|
| 102 | 
 | 
|---|
| 103 | string_res & assign(string_res & s, const string_res & src, size_t maxlen); // copy specific length from other string
 | 
|---|
| 104 | string_res & assign(string_res & s, const char * buffer, size_t bsize); // copy specific length from buffer
 | 
|---|
| 105 | static inline string_res & ?=?(string_res & s, const char * c) {  // copy from string literal (NULL-terminated)
 | 
|---|
| 106 |     return assign(s, c, strlen(c));
 | 
|---|
| 107 | }
 | 
|---|
| 108 | string_res & ?=?(string_res & s, const string_res & c);
 | 
|---|
| 109 | string_res & ?=?(string_res & s, string_res & c);
 | 
|---|
| 110 | string_res & ?=?(string_res & s, char c);
 | 
|---|
| 111 | 
 | 
|---|
| 112 | string_res & ?=?( string_res & s, ssize_t rhs );
 | 
|---|
| 113 | string_res & ?=?( string_res & s, size_t rhs );
 | 
|---|
| 114 | string_res & ?=?( string_res & s, double rhs );
 | 
|---|
| 115 | string_res & ?=?( string_res & s, long double rhs );
 | 
|---|
| 116 | string_res & ?=?( string_res & s, double _Complex rhs );
 | 
|---|
| 117 | string_res & ?=?( string_res & s, long double _Complex rhs );
 | 
|---|
| 118 | 
 | 
|---|
| 119 | void ^?{}(string_res & s);
 | 
|---|
| 120 | 
 | 
|---|
| 121 | // IO Operator
 | 
|---|
| 122 | ofstream & ?|?(ofstream & out, const string_res & s);
 | 
|---|
| 123 | void ?|?(ofstream & out, const string_res & s);
 | 
|---|
| 124 | ifstream & ?|?(ifstream & in, string_res & s);
 | 
|---|
| 125 | 
 | 
|---|
| 126 | struct _Istream_Rwidth {
 | 
|---|
| 127 |         string_res * s;
 | 
|---|
| 128 |         inline _Istream_str_base;
 | 
|---|
| 129 | }; // _Istream_Rwidth
 | 
|---|
| 130 | 
 | 
|---|
| 131 | struct _Istream_Rquoted {
 | 
|---|
| 132 |         // string_res * s;
 | 
|---|
| 133 |         // inline _Istream_str_base;
 | 
|---|
| 134 |         _Istream_Rwidth rstr;
 | 
|---|
| 135 | }; // _Istream_Rquoted
 | 
|---|
| 136 | 
 | 
|---|
| 137 | struct _Istream_Rstr {
 | 
|---|
| 138 |         string_res * s;
 | 
|---|
| 139 |         inline _Istream_str_base;
 | 
|---|
| 140 | //      _Istream_Rwidth rstr;
 | 
|---|
| 141 | }; // _Istream_Rstr
 | 
|---|
| 142 | 
 | 
|---|
| 143 | static inline {
 | 
|---|
| 144 |         // read width does not include null terminator
 | 
|---|
| 145 |         _Istream_Rwidth wdi( unsigned int rwd, string_res & s ) { return (_Istream_Rwidth)@{ .s : &s, { {.scanset : 0p}, .wd : rwd, {.flags.rwd : true} } }; }
 | 
|---|
| 146 |         _Istream_Rstr getline( string_res & s, const char delimiter = '\n' ) {
 | 
|---|
| 147 | //              return (_Istream_Rstr)@{ { .s : &s, { {.delimiters : { delimiter, '\0' } }, .wd : -1, {.flags.delimiter : true} } } };
 | 
|---|
| 148 |                 return (_Istream_Rstr)@{ .s : &s, { {.delimiters : { delimiter, '\0' } }, .wd : -1, {.flags.delimiter : true} } };
 | 
|---|
| 149 |         }
 | 
|---|
| 150 |         _Istream_Rstr & getline( _Istream_Rwidth & f, const char delimiter = '\n' ) {
 | 
|---|
| 151 |                 f.delimiters[0] = delimiter; f.delimiters[1] = '\0'; f.flags.delimiter = true; return (_Istream_Rstr &)f;
 | 
|---|
| 152 |         }
 | 
|---|
| 153 |         _Istream_Rquoted quoted( string_res & s, const char Ldelimiter = '\"', const char Rdelimiter = '\0' ) {
 | 
|---|
| 154 |                 return (_Istream_Rquoted)@{ { .s : &s, { {.delimiters : { Ldelimiter, Rdelimiter, '\0' }}, .wd : -1, {.flags.rwd : true} } } };
 | 
|---|
| 155 |         }
 | 
|---|
| 156 |         _Istream_Rquoted & quoted( _Istream_Rwidth & f, const char Ldelimiter = '"', const char Rdelimiter = '\0' ) {
 | 
|---|
| 157 |                 f.delimiters[0] = Ldelimiter;  f.delimiters[1] = Rdelimiter;  f.delimiters[2] = '\0';
 | 
|---|
| 158 |                 return (_Istream_Rquoted &)f;
 | 
|---|
| 159 |         }
 | 
|---|
| 160 |         _Istream_Rstr incl( const char scanset[], string_res & s ) { return (_Istream_Rstr)@{ .s : &s, { {.scanset : scanset}, .wd : -1, {.flags.inex : false} } }; }
 | 
|---|
| 161 |         _Istream_Rstr & incl( const char scanset[], _Istream_Rwidth & f ) { f.scanset = scanset; f.flags.inex = false; return (_Istream_Rstr &)f; }
 | 
|---|
| 162 |         _Istream_Rstr excl( const char scanset[], string_res & s ) { return (_Istream_Rstr)@{ .s : &s, { {.scanset : scanset}, .wd : -1, {.flags.inex : true} } }; }
 | 
|---|
| 163 |         _Istream_Rstr & excl( const char scanset[], _Istream_Rwidth & f ) { f.scanset = scanset; f.flags.inex = true; return (_Istream_Rstr &)f; }
 | 
|---|
| 164 |         _Istream_Rstr ignore( string_res & s ) { return (_Istream_Rstr)@{ .s : &s, { {.scanset : 0p}, .wd : -1, {.flags.ignore : true} } }; }
 | 
|---|
| 165 |         _Istream_Rstr & ignore( _Istream_Rwidth & f ) { f.flags.ignore = true; return (_Istream_Rstr &)f; }
 | 
|---|
| 166 |         _Istream_Rquoted & ignore( _Istream_Rquoted & f ) { f.rstr.flags.ignore = true; return (_Istream_Rquoted &)f; }
 | 
|---|
| 167 |         _Istream_Rstr & ignore( _Istream_Rstr & f ) { f.flags.ignore = true; return (_Istream_Rstr &)f; }
 | 
|---|
| 168 | } // distribution
 | 
|---|
| 169 | ifstream & ?|?( ifstream & is, _Istream_Rquoted f );
 | 
|---|
| 170 | ifstream & ?|?( ifstream & is, _Istream_Rstr f );
 | 
|---|
| 171 | static inline ifstream & ?|?( ifstream & is, _Istream_Rwidth f ) { return is | *(_Istream_Rstr *)&f; }
 | 
|---|
| 172 | 
 | 
|---|
| 173 | // Concatenation
 | 
|---|
| 174 | void ?+=?(string_res & s, const string_res & s2);
 | 
|---|
| 175 | void ?+=?(string_res & s, char c);
 | 
|---|
| 176 | void append(string_res & s, const string_res & s2, size_t maxlen);
 | 
|---|
| 177 | void ?+=?(string_res & s, const char * c);
 | 
|---|
| 178 | void append(string_res & s, const char * buffer, size_t bsize);
 | 
|---|
| 179 | 
 | 
|---|
| 180 | static inline string_res & strcat(string_res & s, const string_res & s2) { s += s2; return s; }
 | 
|---|
| 181 | static inline string_res & strcat(string_res & s, const char * c) { s += c; return s; }
 | 
|---|
| 182 | static inline string_res & strncat(string_res & s, const string_res & s2, size_t maxlen) { append(s, s2, maxlen); return s; }
 | 
|---|
| 183 | static inline string_res & strncat(string_res & s, const char * buffer, size_t bsize) { append(s, buffer, bsize); return s; }
 | 
|---|
| 184 | 
 | 
|---|
| 185 | // Repetition
 | 
|---|
| 186 | void ?*=?(string_res & s, size_t factor);
 | 
|---|
| 187 | 
 | 
|---|
| 188 | // Character access
 | 
|---|
| 189 | void assignAt(const string_res & s, size_t index, char val);
 | 
|---|
| 190 | char ?[?](const string_res & s, size_t index); // Mike changed to ret by val from Sunjay's ref, to match Peter's
 | 
|---|
| 191 | //char codePointAt(const string_res & s, size_t index); // revisit under Unicode
 | 
|---|
| 192 | 
 | 
|---|
| 193 | // Comparisons
 | 
|---|
| 194 | int  strcmp (const string_res &, const string_res &);
 | 
|---|
| 195 | bool ?==?(const string_res &, const string_res &);
 | 
|---|
| 196 | bool ?!=?(const string_res &, const string_res &);
 | 
|---|
| 197 | bool ?>? (const string_res &, const string_res &);
 | 
|---|
| 198 | bool ?>=?(const string_res &, const string_res &);
 | 
|---|
| 199 | bool ?<=?(const string_res &, const string_res &);
 | 
|---|
| 200 | bool ?<? (const string_res &, const string_res &);
 | 
|---|
| 201 | 
 | 
|---|
| 202 | int  strcmp(const string_res &, const char *);
 | 
|---|
| 203 | bool ?==?(const string_res &, const char *);
 | 
|---|
| 204 | bool ?!=?(const string_res &, const char *);
 | 
|---|
| 205 | bool ?>? (const string_res &, const char *);
 | 
|---|
| 206 | bool ?>=?(const string_res &, const char *);
 | 
|---|
| 207 | bool ?<=?(const string_res &, const char *);
 | 
|---|
| 208 | bool ?<? (const string_res &, const char *);
 | 
|---|
| 209 | 
 | 
|---|
| 210 | int  strcmp(const char *, const string_res &);
 | 
|---|
| 211 | bool ?==?(const char *, const string_res &);
 | 
|---|
| 212 | bool ?!=?(const char *, const string_res &);
 | 
|---|
| 213 | bool ?>? (const char *, const string_res &);
 | 
|---|
| 214 | bool ?>=?(const char *, const string_res &);
 | 
|---|
| 215 | bool ?<=?(const char *, const string_res &);
 | 
|---|
| 216 | bool ?<? (const char *, const string_res &);
 | 
|---|
| 217 | 
 | 
|---|
| 218 | // String search
 | 
|---|
| 219 | bool contains(const string_res & s, char ch); // single character
 | 
|---|
| 220 | 
 | 
|---|
| 221 | int find(const string_res & s, char search);
 | 
|---|
| 222 | int find(const string_res & s, const string_res & search);
 | 
|---|
| 223 | int find(const string_res & s, const char * search);
 | 
|---|
| 224 | int find(const string_res & s, const char * search, size_t searchsize);
 | 
|---|
| 225 | 
 | 
|---|
| 226 | int findFrom(const string_res & s, size_t fromPos, char search);
 | 
|---|
| 227 | int findFrom(const string_res & s, size_t fromPos, const string_res & search);
 | 
|---|
| 228 | int findFrom(const string_res & s, size_t fromPos, const char * search);
 | 
|---|
| 229 | int findFrom(const string_res & s, size_t fromPos, const char * search, size_t searchsize);
 | 
|---|
| 230 | 
 | 
|---|
| 231 | bool includes(const string_res & s, const string_res & search);
 | 
|---|
| 232 | bool includes(const string_res & s, const char * search);
 | 
|---|
| 233 | bool includes(const string_res & s, const char * search, size_t searchsize);
 | 
|---|
| 234 | 
 | 
|---|
| 235 | bool startsWith(const string_res & s, const string_res & prefix);
 | 
|---|
| 236 | bool startsWith(const string_res & s, const char * prefix);
 | 
|---|
| 237 | bool startsWith(const string_res & s, const char * prefix, size_t prefixsize);
 | 
|---|
| 238 | 
 | 
|---|
| 239 | bool endsWith(const string_res & s, const string_res & suffix);
 | 
|---|
| 240 | bool endsWith(const string_res & s, const char * suffix);
 | 
|---|
| 241 | bool endsWith(const string_res & s, const char * suffix, size_t suffixsize);
 | 
|---|
| 242 | 
 | 
|---|
| 243 | int include(const string_res & s, const charclass_res & mask);
 | 
|---|
| 244 | int exclude(const string_res & s, const charclass_res & mask);
 | 
|---|
| 245 | 
 | 
|---|
| 246 | // Modifiers
 | 
|---|
| 247 | void padStart(string_res & s, size_t n);
 | 
|---|
| 248 | void padStart(string_res & s, size_t n, char padding);
 | 
|---|
| 249 | void padEnd(string_res & s, size_t n);
 | 
|---|
| 250 | void padEnd(string_res &s, size_t n, char padding);
 | 
|---|
| 251 | 
 | 
|---|