source: libcfa/src/collections/string_res.hfa@ 2572add

Last change on this file since 2572add was ed5023d1, checked in by Peter A. Buhr <pabuhr@…>, 9 months ago

fix substring error being outside of string, simplify comparison operations, start refactoring string search operations

  • Property mode set to 100644
File size: 12.0 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// string_res -- variable-length, mutable run of text, with resource semantics
8//
9// Author : Michael L. Brooks
10// Created On : Fri Sep 03 11:00:00 2021
11// Last Modified By : Peter A. Buhr
12// Last Modified On : Sun Apr 6 07:35:44 2025
13// Update Count : 70
14//
15
16#pragma once
17
18#include <fstream.hfa>
19#include <string.h> // e.g. strlen
20
21
22//######################### HandleNode #########################
23//private
24
25struct VbyteHeap;
26
27struct HandleNode {
28 HandleNode * flink; // forward link
29 HandleNode * blink; // backward link
30 VbyteHeap * ulink; // upward link
31
32 char * s; // pointer to byte string
33 unsigned int lnth; // length of byte string
34}; // HandleNode
35
36VbyteHeap * DEBUG_string_heap();
37size_t DEBUG_string_bytes_in_heap( VbyteHeap * heap );
38size_t DEBUG_string_bytes_avail_until_gc( VbyteHeap * heap );
39const char * DEBUG_string_heap_start( VbyteHeap * heap );
40
41void TUNING_set_string_heap_liveness_threshold( double val );
42
43//######################### String #########################
44
45// A dynamically-sized string
46struct string_res {
47 HandleNode Handle; // chars, start, end, global neighbours
48 bool shareSet_owns_ulink;
49 string_res * shareSet_prev;
50 string_res * shareSet_next;
51};
52
53
54//######################### charclass_res #########################
55
56struct charclass_res {
57 string_res chars;
58};
59
60void ?{}( charclass_res & ) = void;
61void ?{}( charclass_res &, charclass_res) = void;
62charclass_res ?=?( charclass_res &, charclass_res) = void;
63void ?{}( charclass_res &, const string_res & chars);
64void ?{}( charclass_res &, const char * chars );
65void ?{}( charclass_res &, const char * chars, size_t charssize );
66void ^?{}( charclass_res & );
67
68
69//######################### String #########################
70
71// Getters
72size_t len( const string_res & s);
73
74// Constructors, Assignment Operators, Destructor
75void ?{}(string_res & s); // empty string
76void ?{}(string_res & s, const char * buffer, size_t bsize); // copy specific length from buffer
77static inline void ?{}(string_res & s, const char * rhs) { // copy from string literal (NULL-terminated)
78 (s){ rhs, strlen(rhs) };
79}
80static inline void ?{}(string_res & s, char c ) {
81 ?{}( s, &c, 1);
82}
83
84// Deleting the copy constructors makes the compiler reject an attempt to call/return by value
85void ?{}(string_res & s, const string_res & s2) = void;
86void ?{}(string_res & s, string_res & s2) = void;
87
88enum StrResInitMode { COPY_VALUE, SHARE_EDITS };
89void ?{}(string_res & s, const string_res & src, StrResInitMode, size_t start, size_t len );
90static inline void ?{}(string_res & s, const string_res & src, StrResInitMode mode ) {
91 ?{}( s, src, mode, 0, len(src));
92}
93static inline void ?{}(string_res & s, const string_res & src, StrResInitMode mode, size_t maxlen ) {
94 ?{}( s, src, mode, 0, (len(src) > maxlen)?maxlen:len(src) );
95}
96void ?{}( string_res & s, ssize_t rhs );
97void ?{}( string_res & s, size_t rhs );
98void ?{}( string_res & s, double rhs );
99void ?{}( string_res & s, long double rhs );
100void ?{}( string_res & s, double _Complex rhs );
101void ?{}( string_res & s, long double _Complex rhs );
102
103string_res & assign(string_res & s, const string_res & src, size_t maxlen); // copy specific length from other string
104string_res & assign(string_res & s, const char * buffer, size_t bsize); // copy specific length from buffer
105static inline string_res & ?=?(string_res & s, const char * c) { // copy from string literal (NULL-terminated)
106 return assign(s, c, strlen( c));
107}
108string_res & ?=?(string_res & s, const string_res & c);
109string_res & ?=?(string_res & s, string_res & c);
110string_res & ?=?(string_res & s, char c);
111
112string_res & ?=?( string_res & s, ssize_t rhs );
113string_res & ?=?( string_res & s, size_t rhs );
114string_res & ?=?( string_res & s, double rhs );
115string_res & ?=?( string_res & s, long double rhs );
116string_res & ?=?( string_res & s, double _Complex rhs );
117string_res & ?=?( string_res & s, long double _Complex rhs );
118
119void ^?{}(string_res & s);
120
121// IO Operator
122ofstream & ?|?(ofstream & out, const string_res & s);
123void ?|?(ofstream & out, const string_res & s);
124ifstream & ?|?(ifstream & in, string_res & s);
125
126struct _Istream_Rwidth {
127 string_res * s;
128 inline _Istream_str_base;
129}; // _Istream_Rwidth
130
131struct _Istream_Rquoted {
132 // string_res * s;
133 // inline _Istream_str_base;
134 _Istream_Rwidth rstr;
135}; // _Istream_Rquoted
136
137struct _Istream_Rstr {
138 string_res * s;
139 inline _Istream_str_base;
140// _Istream_Rwidth rstr;
141}; // _Istream_Rstr
142
143static inline {
144 // read width does not include null terminator
145 _Istream_Rwidth wdi( unsigned int rwd, string_res & s ) { return (_Istream_Rwidth)@{ .s = &s, { {.scanset = 0p}, .wd = rwd, {.flags.rwd = true} } }; }
146 _Istream_Rstr getline( string_res & s, const char delimiter = '\n' ) {
147// return (_Istream_Rstr)@{ { .s = &s, { {.delimiters = { delimiter, '\0' } }, .wd = -1, {.flags.delimiter = true} } } };
148 return (_Istream_Rstr)@{ .s = &s, { {.delimiters = { delimiter, '\0' } }, .wd = -1, {.flags.delimiter = true} } };
149 }
150 _Istream_Rstr & getline( _Istream_Rwidth & f, const char delimiter = '\n' ) {
151 f.delimiters[0] = delimiter; f.delimiters[1] = '\0'; f.flags.delimiter = true; return (_Istream_Rstr &)f;
152 }
153 _Istream_Rquoted quoted( string_res & s, const char Ldelimiter = '\"', const char Rdelimiter = '\0' ) {
154 return (_Istream_Rquoted)@{ { .s = &s, { {.delimiters = { Ldelimiter, Rdelimiter, '\0' }}, .wd = -1, {.flags.rwd = true} } } };
155 }
156 _Istream_Rquoted & quoted( _Istream_Rwidth & f, const char Ldelimiter = '"', const char Rdelimiter = '\0' ) {
157 f.delimiters[0] = Ldelimiter; f.delimiters[1] = Rdelimiter; f.delimiters[2] = '\0';
158 return (_Istream_Rquoted &)f;
159 }
160 _Istream_Rstr incl( const char scanset[], string_res & s ) { return (_Istream_Rstr)@{ .s = &s, { {.scanset = scanset}, .wd = -1, {.flags.inex = false} } }; }
161 _Istream_Rstr & incl( const char scanset[], _Istream_Rwidth & f ) { f.scanset = scanset; f.flags.inex = false; return (_Istream_Rstr &)f; }
162 _Istream_Rstr excl( const char scanset[], string_res & s ) { return (_Istream_Rstr)@{ .s = &s, { {.scanset = scanset}, .wd = -1, {.flags.inex = true} } }; }
163 _Istream_Rstr & excl( const char scanset[], _Istream_Rwidth & f ) { f.scanset = scanset; f.flags.inex = true; return (_Istream_Rstr &)f; }
164 _Istream_Rstr ignore( string_res & s ) { return (_Istream_Rstr)@{ .s = &s, { {.scanset = 0p}, .wd = -1, {.flags.ignore = true} } }; }
165 _Istream_Rstr & ignore( _Istream_Rwidth & f ) { f.flags.ignore = true; return (_Istream_Rstr &)f; }
166 _Istream_Rquoted & ignore( _Istream_Rquoted & f ) { f.rstr.flags.ignore = true; return (_Istream_Rquoted &)f; }
167 _Istream_Rstr & ignore( _Istream_Rstr & f ) { f.flags.ignore = true; return (_Istream_Rstr &)f; }
168} // distribution
169ifstream & ?|?( ifstream & is, _Istream_Rquoted f );
170ifstream & ?|?( ifstream & is, _Istream_Rstr f );
171static inline ifstream & ?|?( ifstream & is, _Istream_Rwidth f ) { return is | *(_Istream_Rstr *)&f; }
172
173// Concatenation
174void append( string_res & s, const char * buffer, size_t bsize );
175void append( string_res & s, const string_res & s2, size_t maxlen );
176static inline void ?+=?( string_res & s, const string_res & s2 ) { append( s, s2.Handle.s, s2.Handle.lnth ); }
177static inline void ?+=?( string_res & s, char c ) { append( s, & c, 1 ); }
178static inline void ?+=?( string_res & s, const char * c ) { append( s, c, strlen( c ) ); }
179static inline string_res & strcat( string_res & s, const string_res & s2 ) { s += s2; return s; }
180static inline string_res & strcat( string_res & s, const char * c ) { s += c; return s; }
181static inline string_res & strncat( string_res & s, const string_res & s2, size_t maxlen ) { append(s, s2, maxlen); return s; }
182static inline string_res & strncat( string_res & s, const char * buffer, size_t bsize ) { append(s, buffer, bsize); return s; }
183
184// Repetition
185void ?*=?(string_res & s, size_t factor);
186
187// Character access
188void assignAt( const string_res & s, size_t index, char val);
189char ?[?]( const string_res & s, size_t index); // Mike changed to ret by val from Sunjay's ref, to match Peter's
190//char codePointAt( const string_res & s, size_t index); // revisit under Unicode
191
192// Comparisons
193int strcmp$( const char * s1, size_t l1, const char * s2, size_t l2 );
194
195static inline int strcmp( const string_res & s1, const string_res & s2 ) { return strcmp$( s1.Handle.s, s1.Handle.lnth, s2.Handle.s, s2.Handle.lnth ); }
196static inline bool ?==?( const string_res & s1, const string_res & s2 ) { return strcmp( s1, s2 ) == 0; }
197static inline bool ?!=?( const string_res & s1, const string_res & s2 ) { return strcmp( s1, s2 ) != 0; }
198static inline bool ?>? ( const string_res & s1, const string_res & s2 ) { return strcmp( s1, s2 ) > 0; }
199static inline bool ?>=?( const string_res & s1, const string_res & s2 ) { return strcmp( s1, s2 ) >= 0; }
200static inline bool ?<=?( const string_res & s1, const string_res & s2 ) { return strcmp( s1, s2 ) <= 0; }
201static inline bool ?<? ( const string_res & s1, const string_res & s2 ) { return strcmp( s1, s2 ) < 0; }
202
203static inline int strcmp( const string_res & s1, const char * s2 ) { return strcmp$( s1.Handle.s, s1.Handle.lnth, s2, strlen( s2 ) ); }
204static inline bool ?==?( const string_res & s1, const char * s2 ) { return strcmp( s1, s2 ) == 0; }
205static inline bool ?!=?( const string_res & s1, const char * s2 ) { return strcmp( s1, s2 ) != 0; }
206static inline bool ?>? ( const string_res & s1, const char * s2 ) { return strcmp( s1, s2 ) > 0; }
207static inline bool ?>=?( const string_res & s1, const char * s2 ) { return strcmp( s1, s2 ) >= 0; }
208static inline bool ?<=?( const string_res & s1, const char * s2 ) { return strcmp( s1, s2 ) <= 0; }
209static inline bool ?<? ( const string_res & s1, const char * s2 ) { return strcmp( s1, s2 ) < 0; }
210
211static inline int strcmp( const char * s1, const string_res & s2 ) { return strcmp$( s1, strlen( s1 ), s2.Handle.s, s2.Handle.lnth ); }
212static inline bool ?==?( const char * s1, const string_res & s2 ) { return strcmp( s1, s2 ) == 0; }
213static inline bool ?!=?( const char * s1, const string_res & s2 ) { return strcmp( s1, s2 ) != 0; }
214static inline bool ?>? ( const char * s1, const string_res & s2 ) { return strcmp( s1, s2 ) > 0; }
215static inline bool ?>=?( const char * s1, const string_res & s2 ) { return strcmp( s1, s2 ) >= 0; }
216static inline bool ?<=?( const char * s1, const string_res & s2 ) { return strcmp( s1, s2 ) <= 0; }
217static inline bool ?<? ( const char * s1, const string_res & s2 ) { return strcmp( s1, s2 ) < 0; }
218
219// String search
220bool contains( const string_res & s, char ch); // single character
221
222int find( const string_res & s, char search);
223int find( const string_res & s, const string_res & search);
224int find( const string_res & s, const char * search);
225int find( const string_res & s, const char * search, size_t searchsize);
226
227int findFrom( const string_res & s, size_t fromPos, char search);
228int findFrom( const string_res & s, size_t fromPos, const string_res & search);
229int findFrom( const string_res & s, size_t fromPos, const char * search);
230int findFrom( const string_res & s, size_t fromPos, const char * search, size_t searchsize);
231
232bool includes( const string_res & s, const string_res & search);
233bool includes( const string_res & s, const char * search);
234bool includes( const string_res & s, const char * search, size_t searchsize);
235
236bool startsWith( const string_res & s, const string_res & prefix);
237bool startsWith( const string_res & s, const char * prefix);
238bool startsWith( const string_res & s, const char * prefix, size_t prefixsize);
239
240bool endsWith( const string_res & s, const string_res & suffix);
241bool endsWith( const string_res & s, const char * suffix);
242bool endsWith( const string_res & s, const char * suffix, size_t suffixsize);
243
244int include( const string_res & s, const charclass_res & mask);
245int exclude( const string_res & s, const charclass_res & mask);
246
247// Modifiers
248void padStart(string_res & s, size_t n);
249void padStart(string_res & s, size_t n, char padding);
250void padEnd(string_res & s, size_t n);
251void padEnd(string_res &s, size_t n, char padding);
252
Note: See TracBrowser for help on using the repository browser.