[f450f2f] | 1 | // |
---|
| 2 | // Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo |
---|
| 3 | // |
---|
| 4 | // The contents of this file are covered under the licence agreement in the |
---|
| 5 | // file "LICENCE" distributed with Cforall. |
---|
| 6 | // |
---|
| 7 | // string_res -- variable-length, mutable run of text, with resource semantics |
---|
| 8 | // |
---|
| 9 | // Author : Michael L. Brooks |
---|
| 10 | // Created On : Fri Sep 03 11:00:00 2021 |
---|
[9ca5e56] | 11 | // Last Modified By : Peter A. Buhr |
---|
[211def2] | 12 | // Last Modified On : Wed Feb 7 21:24:40 2024 |
---|
| 13 | // Update Count : 59 |
---|
[f450f2f] | 14 | // |
---|
| 15 | |
---|
| 16 | #pragma once |
---|
| 17 | |
---|
| 18 | #include <fstream.hfa> |
---|
[4e8df745] | 19 | #include <string.h> // e.g. strlen |
---|
[f450f2f] | 20 | |
---|
| 21 | |
---|
| 22 | //######################### HandleNode ######################### |
---|
| 23 | //private |
---|
| 24 | |
---|
| 25 | struct VbyteHeap; |
---|
| 26 | |
---|
| 27 | struct HandleNode { |
---|
| 28 | HandleNode *flink; // forward link |
---|
| 29 | HandleNode *blink; // backward link |
---|
[0f781fb8] | 30 | VbyteHeap *ulink; // upward link |
---|
[f450f2f] | 31 | |
---|
[9ca5e56] | 32 | char *s; // pointer to byte string |
---|
[f450f2f] | 33 | unsigned int lnth; // length of byte string |
---|
| 34 | }; // HandleNode |
---|
| 35 | |
---|
[0f781fb8] | 36 | VbyteHeap * DEBUG_string_heap(); |
---|
[7b0e8b7] | 37 | size_t DEBUG_string_bytes_in_heap( VbyteHeap * heap ); |
---|
[6cc87c0] | 38 | size_t DEBUG_string_bytes_avail_until_gc( VbyteHeap * heap ); |
---|
| 39 | const char * DEBUG_string_heap_start( VbyteHeap * heap ); |
---|
| 40 | |
---|
[08ed947] | 41 | void TUNING_set_string_heap_liveness_threshold( double val ); |
---|
[f450f2f] | 42 | |
---|
| 43 | //######################### String ######################### |
---|
| 44 | |
---|
| 45 | // A dynamically-sized string |
---|
| 46 | struct string_res { |
---|
| 47 | HandleNode Handle; // chars, start, end, global neighbours |
---|
[804bf677] | 48 | bool shareEditSet_owns_ulink; |
---|
[f450f2f] | 49 | string_res * shareEditSet_prev; |
---|
| 50 | string_res * shareEditSet_next; |
---|
| 51 | }; |
---|
| 52 | |
---|
| 53 | |
---|
| 54 | //######################### charclass_res ######################### |
---|
| 55 | |
---|
| 56 | struct charclass_res { |
---|
| 57 | string_res chars; |
---|
| 58 | }; |
---|
| 59 | |
---|
| 60 | void ?{}( charclass_res & ) = void; |
---|
| 61 | void ?{}( charclass_res &, charclass_res) = void; |
---|
| 62 | charclass_res ?=?( charclass_res &, charclass_res) = void; |
---|
| 63 | void ?{}( charclass_res &, const string_res & chars); |
---|
| 64 | void ?{}( charclass_res &, const char * chars ); |
---|
| 65 | void ?{}( charclass_res &, const char * chars, size_t charssize ); |
---|
| 66 | void ^?{}( charclass_res & ); |
---|
| 67 | |
---|
| 68 | |
---|
| 69 | //######################### String ######################### |
---|
| 70 | |
---|
| 71 | // Getters |
---|
[681e12f] | 72 | size_t size(const string_res & s); |
---|
[f450f2f] | 73 | |
---|
| 74 | // Constructors, Assignment Operators, Destructor |
---|
[681e12f] | 75 | void ?{}(string_res & s); // empty string |
---|
| 76 | void ?{}(string_res & s, const char * buffer, size_t bsize); // copy specific length from buffer |
---|
| 77 | static inline void ?{}(string_res & s, const char * rhs) { // copy from string literal (NULL-terminated) |
---|
[4e8df745] | 78 | (s){ rhs, strlen(rhs) }; |
---|
| 79 | } |
---|
[7abc3de] | 80 | static inline void ?{}(string_res & s, char c ) { |
---|
| 81 | ?{}( s, &c, 1); |
---|
| 82 | } |
---|
[f450f2f] | 83 | |
---|
[7abc3de] | 84 | // Deleting the copy constructors makes the compiler reject an attempt to call/return by value |
---|
[681e12f] | 85 | void ?{}(string_res & s, const string_res & s2) = void; |
---|
| 86 | void ?{}(string_res & s, string_res & s2) = void; |
---|
[f450f2f] | 87 | |
---|
| 88 | enum StrResInitMode { COPY_VALUE, SHARE_EDITS }; |
---|
[e8b3717] | 89 | void ?{}(string_res & s, const string_res & src, StrResInitMode, size_t start, size_t len ); |
---|
[681e12f] | 90 | static inline void ?{}(string_res & s, const string_res & src, StrResInitMode mode ) { |
---|
[f450f2f] | 91 | ?{}( s, src, mode, 0, size(src)); |
---|
| 92 | } |
---|
[7abc3de] | 93 | static inline void ?{}(string_res & s, const string_res & src, StrResInitMode mode, size_t maxlen ) { |
---|
| 94 | ?{}( s, src, mode, 0, (size(src) > maxlen)?maxlen:size(src) ); |
---|
| 95 | } |
---|
[f2898df] | 96 | void ?{}( string_res & s, ssize_t rhs ); |
---|
| 97 | void ?{}( string_res & s, size_t rhs ); |
---|
| 98 | void ?{}( string_res & s, double rhs ); |
---|
| 99 | void ?{}( string_res & s, long double rhs ); |
---|
| 100 | void ?{}( string_res & s, double _Complex rhs ); |
---|
| 101 | void ?{}( string_res & s, long double _Complex rhs ); |
---|
[f450f2f] | 102 | |
---|
[e891349] | 103 | string_res & assign(string_res & s, const string_res & src, size_t maxlen); // copy specific length from other string |
---|
[681e12f] | 104 | string_res & assign(string_res & s, const char * buffer, size_t bsize); // copy specific length from buffer |
---|
| 105 | static inline string_res & ?=?(string_res & s, const char * c) { // copy from string literal (NULL-terminated) |
---|
| 106 | return assign(s, c, strlen(c)); |
---|
[4e8df745] | 107 | } |
---|
[681e12f] | 108 | string_res & ?=?(string_res & s, const string_res & c); |
---|
| 109 | string_res & ?=?(string_res & s, string_res & c); |
---|
| 110 | string_res & ?=?(string_res & s, char c); |
---|
[f450f2f] | 111 | |
---|
[f2898df] | 112 | string_res & ?=?( string_res & s, ssize_t rhs ); |
---|
| 113 | string_res & ?=?( string_res & s, size_t rhs ); |
---|
| 114 | string_res & ?=?( string_res & s, double rhs ); |
---|
| 115 | string_res & ?=?( string_res & s, long double rhs ); |
---|
| 116 | string_res & ?=?( string_res & s, double _Complex rhs ); |
---|
| 117 | string_res & ?=?( string_res & s, long double _Complex rhs ); |
---|
| 118 | |
---|
[681e12f] | 119 | void ^?{}(string_res & s); |
---|
[f450f2f] | 120 | |
---|
| 121 | // IO Operator |
---|
[681e12f] | 122 | ofstream & ?|?(ofstream & out, const string_res & s); |
---|
| 123 | void ?|?(ofstream & out, const string_res & s); |
---|
| 124 | ifstream & ?|?(ifstream & in, string_res & s); |
---|
[211def2] | 125 | |
---|
| 126 | struct _Istream_Rwidth { |
---|
| 127 | string_res * s; |
---|
| 128 | inline _Istream_str_base; |
---|
| 129 | }; // _Istream_Rwidth |
---|
| 130 | |
---|
| 131 | struct _Istream_Rquoted { |
---|
| 132 | // string_res * s; |
---|
| 133 | // inline _Istream_str_base; |
---|
| 134 | _Istream_Rwidth rstr; |
---|
| 135 | }; // _Istream_Rquoted |
---|
[ff56dd2e] | 136 | |
---|
| 137 | struct _Istream_Rstr { |
---|
| 138 | string_res * s; |
---|
| 139 | inline _Istream_str_base; |
---|
[211def2] | 140 | // _Istream_Rwidth rstr; |
---|
[ff56dd2e] | 141 | }; // _Istream_Rstr |
---|
| 142 | |
---|
| 143 | static inline { |
---|
| 144 | // read width does not include null terminator |
---|
[211def2] | 145 | _Istream_Rwidth wdi( unsigned int rwd, string_res & s ) { return (_Istream_Rwidth)@{ .s : &s, { {.scanset : 0p}, .wd : rwd, {.flags.rwd : true} } }; } |
---|
[ff56dd2e] | 146 | _Istream_Rstr getline( string_res & s, const char delimiter = '\n' ) { |
---|
[211def2] | 147 | // return (_Istream_Rstr)@{ { .s : &s, { {.delimiters : { delimiter, '\0' } }, .wd : -1, {.flags.delimiter : true} } } }; |
---|
| 148 | return (_Istream_Rstr)@{ .s : &s, { {.delimiters : { delimiter, '\0' } }, .wd : -1, {.flags.delimiter : true} } }; |
---|
| 149 | } |
---|
| 150 | _Istream_Rstr & getline( _Istream_Rwidth & f, const char delimiter = '\n' ) { |
---|
| 151 | f.delimiters[0] = delimiter; f.delimiters[1] = '\0'; f.flags.delimiter = true; return (_Istream_Rstr &)f; |
---|
| 152 | } |
---|
| 153 | _Istream_Rquoted quoted( string_res & s, const char Ldelimiter = '\"', const char Rdelimiter = '\0' ) { |
---|
| 154 | return (_Istream_Rquoted)@{ { .s : &s, { {.delimiters : { Ldelimiter, Rdelimiter, '\0' }}, .wd : -1, {.flags.rwd : true} } } }; |
---|
[ff56dd2e] | 155 | } |
---|
[211def2] | 156 | _Istream_Rquoted & quoted( _Istream_Rwidth & f, const char Ldelimiter = '"', const char Rdelimiter = '\0' ) { |
---|
| 157 | f.delimiters[0] = Ldelimiter; f.delimiters[1] = Rdelimiter; f.delimiters[2] = '\0'; |
---|
| 158 | return (_Istream_Rquoted &)f; |
---|
[ff56dd2e] | 159 | } |
---|
[211def2] | 160 | _Istream_Rstr incl( const char scanset[], string_res & s ) { return (_Istream_Rstr)@{ .s : &s, { {.scanset : scanset}, .wd : -1, {.flags.inex : false} } }; } |
---|
| 161 | _Istream_Rstr & incl( const char scanset[], _Istream_Rwidth & f ) { f.scanset = scanset; f.flags.inex = false; return (_Istream_Rstr &)f; } |
---|
| 162 | _Istream_Rstr excl( const char scanset[], string_res & s ) { return (_Istream_Rstr)@{ .s : &s, { {.scanset : scanset}, .wd : -1, {.flags.inex : true} } }; } |
---|
| 163 | _Istream_Rstr & excl( const char scanset[], _Istream_Rwidth & f ) { f.scanset = scanset; f.flags.inex = true; return (_Istream_Rstr &)f; } |
---|
| 164 | _Istream_Rstr ignore( string_res & s ) { return (_Istream_Rstr)@{ .s : &s, { {.scanset : 0p}, .wd : -1, {.flags.ignore : true} } }; } |
---|
| 165 | _Istream_Rstr & ignore( _Istream_Rwidth & f ) { f.flags.ignore = true; return (_Istream_Rstr &)f; } |
---|
| 166 | _Istream_Rquoted & ignore( _Istream_Rquoted & f ) { f.rstr.flags.ignore = true; return (_Istream_Rquoted &)f; } |
---|
| 167 | _Istream_Rstr & ignore( _Istream_Rstr & f ) { f.flags.ignore = true; return (_Istream_Rstr &)f; } |
---|
[ff56dd2e] | 168 | } // distribution |
---|
[211def2] | 169 | ifstream & ?|?( ifstream & is, _Istream_Rquoted f ); |
---|
[ff56dd2e] | 170 | ifstream & ?|?( ifstream & is, _Istream_Rstr f ); |
---|
[211def2] | 171 | static inline ifstream & ?|?( ifstream & is, _Istream_Rwidth f ) { return is | *(_Istream_Rstr *)&f; } |
---|
[f450f2f] | 172 | |
---|
| 173 | // Concatenation |
---|
[e891349] | 174 | void ?+=?(string_res & s, const string_res & s2); |
---|
| 175 | void ?+=?(string_res & s, char c); |
---|
| 176 | void append(string_res & s, const string_res & s2, size_t maxlen); |
---|
| 177 | void ?+=?(string_res & s, const char * c); |
---|
[681e12f] | 178 | void append(string_res & s, const char * buffer, size_t bsize); |
---|
[e891349] | 179 | |
---|
| 180 | static inline string_res & strcat(string_res & s, const string_res & s2) { s += s2; return s; } |
---|
| 181 | static inline string_res & strcat(string_res & s, const char * c) { s += c; return s; } |
---|
| 182 | static inline string_res & strncat(string_res & s, const string_res & s2, size_t maxlen) { append(s, s2, maxlen); return s; } |
---|
| 183 | static inline string_res & strncat(string_res & s, const char * buffer, size_t bsize) { append(s, buffer, bsize); return s; } |
---|
[f450f2f] | 184 | |
---|
[38951c31] | 185 | // Repetition |
---|
| 186 | void ?*=?(string_res & s, size_t factor); |
---|
| 187 | |
---|
[f450f2f] | 188 | // Character access |
---|
[681e12f] | 189 | void assignAt(const string_res & s, size_t index, char val); |
---|
| 190 | char ?[?](const string_res & s, size_t index); // Mike changed to ret by val from Sunjay's ref, to match Peter's |
---|
| 191 | //char codePointAt(const string_res & s, size_t index); // revisit under Unicode |
---|
[f450f2f] | 192 | |
---|
| 193 | // Comparisons |
---|
[681e12f] | 194 | int strcmp (const string_res &, const string_res &); |
---|
[416b443] | 195 | bool ?==?(const string_res &, const string_res &); |
---|
| 196 | bool ?!=?(const string_res &, const string_res &); |
---|
| 197 | bool ?>? (const string_res &, const string_res &); |
---|
| 198 | bool ?>=?(const string_res &, const string_res &); |
---|
| 199 | bool ?<=?(const string_res &, const string_res &); |
---|
| 200 | bool ?<? (const string_res &, const string_res &); |
---|
| 201 | |
---|
[681e12f] | 202 | int strcmp(const string_res &, const char *); |
---|
| 203 | bool ?==?(const string_res &, const char *); |
---|
| 204 | bool ?!=?(const string_res &, const char *); |
---|
| 205 | bool ?>? (const string_res &, const char *); |
---|
| 206 | bool ?>=?(const string_res &, const char *); |
---|
| 207 | bool ?<=?(const string_res &, const char *); |
---|
| 208 | bool ?<? (const string_res &, const char *); |
---|
| 209 | |
---|
| 210 | int strcmp(const char *, const string_res &); |
---|
| 211 | bool ?==?(const char *, const string_res &); |
---|
| 212 | bool ?!=?(const char *, const string_res &); |
---|
| 213 | bool ?>? (const char *, const string_res &); |
---|
| 214 | bool ?>=?(const char *, const string_res &); |
---|
| 215 | bool ?<=?(const char *, const string_res &); |
---|
| 216 | bool ?<? (const char *, const string_res &); |
---|
[f450f2f] | 217 | |
---|
| 218 | // String search |
---|
[681e12f] | 219 | bool contains(const string_res & s, char ch); // single character |
---|
[f450f2f] | 220 | |
---|
[681e12f] | 221 | int find(const string_res & s, char search); |
---|
| 222 | int find(const string_res & s, const string_res & search); |
---|
| 223 | int find(const string_res & s, const char * search); |
---|
| 224 | int find(const string_res & s, const char * search, size_t searchsize); |
---|
[f450f2f] | 225 | |
---|
[681e12f] | 226 | int findFrom(const string_res & s, size_t fromPos, char search); |
---|
| 227 | int findFrom(const string_res & s, size_t fromPos, const string_res & search); |
---|
| 228 | int findFrom(const string_res & s, size_t fromPos, const char * search); |
---|
| 229 | int findFrom(const string_res & s, size_t fromPos, const char * search, size_t searchsize); |
---|
[08ed947] | 230 | |
---|
[681e12f] | 231 | bool includes(const string_res & s, const string_res & search); |
---|
| 232 | bool includes(const string_res & s, const char * search); |
---|
| 233 | bool includes(const string_res & s, const char * search, size_t searchsize); |
---|
[f450f2f] | 234 | |
---|
[681e12f] | 235 | bool startsWith(const string_res & s, const string_res & prefix); |
---|
| 236 | bool startsWith(const string_res & s, const char * prefix); |
---|
| 237 | bool startsWith(const string_res & s, const char * prefix, size_t prefixsize); |
---|
[f450f2f] | 238 | |
---|
[681e12f] | 239 | bool endsWith(const string_res & s, const string_res & suffix); |
---|
| 240 | bool endsWith(const string_res & s, const char * suffix); |
---|
| 241 | bool endsWith(const string_res & s, const char * suffix, size_t suffixsize); |
---|
[f450f2f] | 242 | |
---|
[681e12f] | 243 | int include(const string_res & s, const charclass_res & mask); |
---|
| 244 | int exclude(const string_res & s, const charclass_res & mask); |
---|
[f450f2f] | 245 | |
---|
| 246 | // Modifiers |
---|
[681e12f] | 247 | void padStart(string_res & s, size_t n); |
---|
| 248 | void padStart(string_res & s, size_t n, char padding); |
---|
| 249 | void padEnd(string_res & s, size_t n); |
---|
[f450f2f] | 250 | void padEnd(string_res &s, size_t n, char padding); |
---|
| 251 | |
---|