source: libcfa/src/containers/string_res.cfa@ 6f7aff3

ADT ast-experimental enum pthread-emulation qualifiedEnum
Last change on this file since 6f7aff3 was 6f7aff3, checked in by Michael Brooks <mlbrooks@…>, 4 years ago

String hybrid assignment to unshared now optimizes to overwrite instead of copy.

  • Property mode set to 100644
File size: 37.5 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// string_res -- variable-length, mutable run of text, with resource semantics
8//
9// Author : Michael L. Brooks
10// Created On : Fri Sep 03 11:00:00 2021
11// Last Modified By : Michael L. Brooks
12// Last Modified On : Fri Sep 03 11:00:00 2021
13// Update Count : 1
14//
15
16#include "string_res.hfa"
17#include "string_sharectx.hfa"
18
19#include <stdlib.hfa> // e.g. malloc
20#include <string.h> // e.g. strlen
21#include <assert.h>
22
23//######################### VbyteHeap "header" #########################
24
25#ifdef VbyteDebug
26HandleNode *HeaderPtr;
27#endif // VbyteDebug
28
29struct VbyteHeap {
30
31 int NoOfCompactions; // number of compactions of the byte area
32 int NoOfExtensions; // number of extensions in the size of the byte area
33 int NoOfReductions; // number of reductions in the size of the byte area
34
35 int InitSize; // initial number of bytes in the byte-string area
36 int CurrSize; // current number of bytes in the byte-string area
37 char *StartVbyte; // pointer to the `st byte of the start of the byte-string area
38 char *EndVbyte; // pointer to the next byte after the end of the currently used portion of byte-string area
39 void *ExtVbyte; // pointer to the next byte after the end of the byte-string area
40
41 HandleNode Header; // header node for handle list
42}; // VbyteHeap
43
44
45static inline void compaction( VbyteHeap & ); // compaction of the byte area
46static inline void garbage( VbyteHeap &, int ); // garbage collect the byte area
47static inline void extend( VbyteHeap &, int ); // extend the size of the byte area
48static inline void reduce( VbyteHeap &, int ); // reduce the size of the byte area
49
50static inline void ?{}( VbyteHeap &, size_t = 1000 );
51static inline void ^?{}( VbyteHeap & );
52static inline void ByteCopy( char *, int, int, char *, int, int ); // copy a block of bytes from one location in the heap to another
53static inline int ByteCmp( char *, int, int, char *, int, int ); // compare 2 blocks of bytes
54static inline char *VbyteAlloc( VbyteHeap &, int ); // allocate a block bytes in the heap
55static inline char *VbyteTryAdjustLast( VbyteHeap &, int );
56
57static inline void AddThisAfter( HandleNode &, HandleNode & );
58static inline void DeleteNode( HandleNode & );
59static inline void MoveThisAfter( HandleNode &, const HandleNode & ); // move current handle after parameter handle
60
61
62// Allocate the storage for the variable sized area and intialize the heap variables.
63
64static inline void ?{}( VbyteHeap & this, size_t Size ) with(this) {
65#ifdef VbyteDebug
66 serr | "enter:VbyteHeap::VbyteHeap, this:" | &this | " Size:" | Size;
67#endif // VbyteDebug
68 NoOfCompactions = NoOfExtensions = NoOfReductions = 0;
69 InitSize = CurrSize = Size;
70 StartVbyte = EndVbyte = alloc(CurrSize);
71 ExtVbyte = (void *)( StartVbyte + CurrSize );
72 Header.flink = Header.blink = &Header;
73 Header.ulink = & this;
74#ifdef VbyteDebug
75 HeaderPtr = &Header;
76 serr | "exit:VbyteHeap::VbyteHeap, this:" | &this;
77#endif // VbyteDebug
78} // VbyteHeap
79
80
81// Release the dynamically allocated storage for the byte area.
82
83static inline void ^?{}( VbyteHeap & this ) with(this) {
84 free( StartVbyte );
85} // ~VbyteHeap
86
87
88//######################### HandleNode #########################
89
90
91// Create a handle node. The handle is not linked into the handle list. This is the responsibilitiy of the handle
92// creator.
93
94static inline void ?{}( HandleNode & this ) with(this) {
95#ifdef VbyteDebug
96 serr | "enter:HandleNode::HandleNode, this:" | &this;
97#endif // VbyteDebug
98 s = 0;
99 lnth = 0;
100#ifdef VbyteDebug
101 serr | "exit:HandleNode::HandleNode, this:" | &this;
102#endif // VbyteDebug
103} // HandleNode
104
105// Create a handle node. The handle is linked into the handle list at the end. This means that this handle will NOT be
106// in order by string address, but this is not a problem because a string with length zero does nothing during garbage
107// collection.
108
109static inline void ?{}( HandleNode & this, VbyteHeap & vh ) with(this) {
110#ifdef VbyteDebug
111 serr | "enter:HandleNode::HandleNode, this:" | &this;
112#endif // VbyteDebug
113 s = 0;
114 lnth = 0;
115 ulink = &vh;
116 AddThisAfter( this, *vh.Header.blink );
117#ifdef VbyteDebug
118 serr | "exit:HandleNode::HandleNode, this:" | &this;
119#endif // VbyteDebug
120} // HandleNode
121
122
123// Delete a node from the handle list by unchaining it from the list. If the handle node was allocated dynamically, it
124// is the responsibility of the creator to destroy it.
125
126static inline void ^?{}( HandleNode & this ) with(this) {
127#ifdef VbyteDebug
128 serr | "enter:HandleNode::~HandleNode, this:" | & this;
129 {
130 serr | nlOff;
131 serr | " lnth:" | lnth | " s:" | (void *)s | ",\"";
132 for ( int i = 0; i < lnth; i += 1 ) {
133 serr | s[i];
134 } // for
135 serr | "\" flink:" | flink | " blink:" | blink | nl;
136 serr | nlOn;
137 }
138#endif // VbyteDebug
139 DeleteNode( this );
140} // ~HandleNode
141
142
143//######################### String Sharing Context #########################
144
145static string_sharectx * ambient_string_sharectx; // fickle top of stack
146static string_sharectx default_string_sharectx = {NEW_SHARING}; // stable bottom of stack
147
148void ?{}( string_sharectx & this, StringSharectx_Mode mode ) with( this ) {
149 (older){ ambient_string_sharectx };
150 if ( mode == NEW_SHARING ) {
151 (activeHeap){ new( (size_t) 1000 ) };
152 } else {
153 verify( mode == NO_SHARING );
154 (activeHeap){ 0p };
155 }
156 ambient_string_sharectx = & this;
157}
158
159void ^?{}( string_sharectx & this ) with( this ) {
160 if ( activeHeap ) delete( activeHeap );
161
162 // unlink this from older-list starting from ambient_string_sharectx
163 // usually, this==ambient_string_sharectx and the loop runs zero times
164 string_sharectx *& c = ambient_string_sharectx;
165 while ( c != &this ) &c = &c->older; // find this
166 c = this.older; // unlink
167}
168
169//######################### String Resource #########################
170
171
172VbyteHeap * DEBUG_string_heap() {
173 assert( ambient_string_sharectx->activeHeap && "No sharing context is active" );
174 return ambient_string_sharectx->activeHeap;
175}
176
177size_t DEBUG_string_bytes_avail_until_gc( VbyteHeap * heap ) {
178 return ((char*)heap->ExtVbyte) - heap->EndVbyte;
179}
180
181size_t DEBUG_string_bytes_in_heap( VbyteHeap * heap ) {
182 return heap->CurrSize;
183}
184
185const char * DEBUG_string_heap_start( VbyteHeap * heap ) {
186 return heap->StartVbyte;
187}
188
189// Returns the size of the string in bytes
190size_t size(const string_res &s) with(s) {
191 return Handle.lnth;
192}
193
194// Output operator
195ofstream & ?|?(ofstream &out, const string_res &s) {
196 // Store auto-newline state so it can be restored
197 bool anl = getANL$(out);
198 nlOff(out);
199 for (size_t i = 0; i < s.Handle.lnth; i++) {
200 out | s[i];
201 }
202 out | sep;
203 // Re-apply newlines after done, for chaining version
204 if (anl) nlOn(out);
205 return out;
206}
207
208void ?|?(ofstream &out, const string_res &s) {
209 // Store auto-newline state so it can be restored
210 bool anl = getANL$(out);
211 nlOff(out);
212 for (size_t i = 0; i < s.Handle.lnth; i++) {
213 // Need to re-apply on the last output operator, for whole-statement version
214 if (anl && i == s.Handle.lnth-1) nlOn(out);
215 out | s[i];
216 }
217 return out;
218}
219
220// Empty constructor
221void ?{}(string_res &s) with(s) {
222 if( ambient_string_sharectx->activeHeap ) {
223 (Handle){ * ambient_string_sharectx->activeHeap };
224 (shareEditSet_owns_ulink){ false };
225 } else {
226 (Handle){ * new( (size_t) 10 ) }; // TODO: can I lazily avoid allocating for empty string
227 (shareEditSet_owns_ulink){ true };
228 }
229 s.shareEditSet_prev = &s;
230 s.shareEditSet_next = &s;
231}
232
233static inline void eagerCopyCtorHelper(string_res &s, const char* rhs, size_t rhslnth) with(s) {
234 if( ambient_string_sharectx->activeHeap ) {
235 (Handle){ * ambient_string_sharectx->activeHeap };
236 (shareEditSet_owns_ulink){ false };
237 } else {
238 (Handle){ * new( rhslnth ) };
239 (shareEditSet_owns_ulink){ true };
240 }
241 Handle.s = VbyteAlloc(*Handle.ulink, rhslnth);
242 Handle.lnth = rhslnth;
243 for ( int i = 0; i < rhslnth; i += 1 ) { // copy characters
244 Handle.s[i] = rhs[i];
245 } // for
246 s.shareEditSet_prev = &s;
247 s.shareEditSet_next = &s;
248}
249
250// Constructor from a raw buffer and size
251void ?{}(string_res &s, const char* rhs, size_t rhslnth) with(s) {
252 eagerCopyCtorHelper(s, rhs, rhslnth);
253}
254
255// String literal constructor
256void ?{}(string_res &s, const char* rhs) {
257 (s){ rhs, strlen(rhs) };
258}
259
260// General copy constructor
261void ?{}(string_res &s, const string_res & s2, StrResInitMode mode, size_t start, size_t end ) {
262
263 verify( start <= end && end <= s2.Handle.lnth );
264
265 if (s2.Handle.ulink == ambient_string_sharectx->activeHeap) {
266 // same heap: allow overlap
267 (s.Handle){};
268 s.Handle.s = s2.Handle.s + start;
269 s.Handle.lnth = end - start;
270 s.Handle.ulink = ambient_string_sharectx->activeHeap;
271 (s.shareEditSet_owns_ulink){ false };
272 AddThisAfter(s.Handle, s2.Handle ); // insert this handle after rhs handle
273 // ^ bug? skip others at early point in string
274
275 if (mode == COPY_VALUE) {
276 // make s alone in its shareEditSet
277 s.shareEditSet_prev = &s;
278 s.shareEditSet_next = &s;
279 } else {
280 verify( mode == SHARE_EDITS );
281
282 // s2 is logically const but not implementation const
283 string_res & s2mod = (string_res &) s2;
284
285 // insert s after s2 on shareEditSet
286 s.shareEditSet_next = s2mod.shareEditSet_next;
287 s.shareEditSet_prev = &s2mod;
288 s.shareEditSet_next->shareEditSet_prev = &s;
289 s.shareEditSet_prev->shareEditSet_next = &s;
290 }
291 } else {
292 // crossing heaps: eager copy
293 assert( mode == COPY_VALUE && "need to solidify context-crossing rules for requesting shared edits");
294 eagerCopyCtorHelper(s, s2.Handle.s + start, end - start);
295 verify(s.shareEditSet_prev == &s);
296 verify(s.shareEditSet_next == &s);
297 }
298}
299
300static void assignEditSet(string_res & this, string_res * shareEditSetStartPeer, string_res * shareEditSetEndPeer,
301 char * resultSesStart,
302 size_t resultSesLnth,
303 HandleNode * resultPadPosition, size_t bsize ) {
304
305 char * beforeBegin = shareEditSetStartPeer->Handle.s;
306 size_t beforeLen = this.Handle.s - beforeBegin;
307
308 char * afterBegin = this.Handle.s + this.Handle.lnth;
309 size_t afterLen = shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - afterBegin;
310
311 size_t oldLnth = this.Handle.lnth;
312
313 this.Handle.s = resultSesStart + beforeLen;
314 this.Handle.lnth = bsize;
315 if (resultPadPosition)
316 MoveThisAfter( this.Handle, *resultPadPosition );
317
318 // adjust all substring string and handle locations, and check if any substring strings are outside the new base string
319 char *limit = resultSesStart + resultSesLnth;
320 for (string_res * p = this.shareEditSet_next; p != &this; p = p->shareEditSet_next) {
321 verify (p->Handle.s >= beforeBegin);
322 if ( p->Handle.s >= afterBegin ) {
323 verify ( p->Handle.s <= afterBegin + afterLen );
324 verify ( p->Handle.s + p->Handle.lnth <= afterBegin + afterLen );
325 // p starts after the edit
326 // take start and end as end-anchored
327 size_t startOffsetFromEnd = afterBegin + afterLen - p->Handle.s;
328 p->Handle.s = limit - startOffsetFromEnd;
329 // p->Handle.lnth unaffected
330 } else if ( p->Handle.s <= beforeBegin + beforeLen ) {
331 // p starts before, or at the start of, the edit
332 if ( p->Handle.s + p->Handle.lnth <= beforeBegin + beforeLen ) {
333 // p ends before the edit
334 // take end as start-anchored too
335 // p->Handle.lnth unaffected
336 } else if ( p->Handle.s + p->Handle.lnth < afterBegin ) {
337 // p ends during the edit; p does not include the last character replaced
338 // clip end of p to end at start of edit
339 p->Handle.lnth = beforeLen - ( p->Handle.s - beforeBegin );
340 } else {
341 // p ends after the edit
342 verify ( p->Handle.s + p->Handle.lnth <= afterBegin + afterLen );
343 // take end as end-anchored
344 // stretch-shrink p according to the edit
345 p->Handle.lnth += this.Handle.lnth;
346 p->Handle.lnth -= oldLnth;
347 }
348 // take start as start-anchored
349 size_t startOffsetFromStart = p->Handle.s - beforeBegin;
350 p->Handle.s = resultSesStart + startOffsetFromStart;
351 } else {
352 verify ( p->Handle.s < afterBegin );
353 // p starts during the edit
354 verify( p->Handle.s + p->Handle.lnth >= beforeBegin + beforeLen );
355 if ( p->Handle.s + p->Handle.lnth < afterBegin ) {
356 // p ends during the edit; p does not include the last character replaced
357 // set p to empty string at start of edit
358 p->Handle.s = this.Handle.s;
359 p->Handle.lnth = 0;
360 } else {
361 // p includes the end of the edit
362 // clip start of p to start at end of edit
363 int charsToClip = afterBegin - p->Handle.s;
364 p->Handle.s = this.Handle.s + this.Handle.lnth;
365 p->Handle.lnth -= charsToClip;
366 }
367 }
368 if (resultPadPosition)
369 MoveThisAfter( p->Handle, *resultPadPosition ); // move substring handle to maintain sorted order by string position
370 }
371}
372
373static void assign_(string_res &this, const char* buffer, size_t bsize, const string_res & valSrc) {
374
375 // traverse the incumbent share-edit set (SES) to recover the range of a base string to which `this` belongs
376 string_res * shareEditSetStartPeer = & this;
377 string_res * shareEditSetEndPeer = & this;
378 for (string_res * editPeer = this.shareEditSet_next; editPeer != &this; editPeer = editPeer->shareEditSet_next) {
379 if ( editPeer->Handle.s < shareEditSetStartPeer->Handle.s ) {
380 shareEditSetStartPeer = editPeer;
381 }
382 if ( shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth < editPeer->Handle.s + editPeer->Handle.lnth) {
383 shareEditSetEndPeer = editPeer;
384 }
385 }
386
387 verify( shareEditSetEndPeer->Handle.s >= shareEditSetStartPeer->Handle.s );
388 size_t editSetLength = shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - shareEditSetStartPeer->Handle.s;
389 verify( editSetLength >= this.Handle.lnth );
390
391 if ( this.shareEditSet_owns_ulink ) { // assigning to private context
392 // ok to overwrite old value within LHS
393 char * prefixStartOrig = shareEditSetStartPeer->Handle.s;
394 int prefixLen = this.Handle.s - prefixStartOrig;
395 char * suffixStartOrig = this.Handle.s + this.Handle.lnth;
396 int suffixLen = shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - suffixStartOrig;
397
398 int delta = bsize - this.Handle.lnth;
399 if ( char * oldBytes = VbyteTryAdjustLast( *this.Handle.ulink, delta ) ) {
400 // growing: copy from old to new
401 char * dest = VbyteAlloc( *this.Handle.ulink, editSetLength );
402 char *destCursor = dest; memcpy(destCursor, prefixStartOrig, prefixLen);
403 destCursor += prefixLen; memcpy(destCursor, buffer , bsize );
404 destCursor += this.Handle.lnth; memcpy(destCursor, suffixStartOrig, suffixLen);
405 assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer,
406 dest,
407 editSetLength,
408 0p, bsize);
409 free( oldBytes );
410 } else {
411 // room is already allocated in-place: bubble suffix and overwite middle
412 memmove( suffixStartOrig + delta, suffixStartOrig, suffixLen );
413 memcpy( this.Handle.s, buffer, bsize );
414
415 assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer,
416 shareEditSetStartPeer->Handle.s,
417 editSetLength,
418 0p, bsize);
419 }
420
421 } else if ( // assigning to shared context
422 this.Handle.lnth == editSetLength && // overwriting entire run of SES
423 & valSrc && // sourcing from a managed string
424 valSrc.Handle.ulink == this.Handle.ulink ) { // sourcing from same heap
425
426 // SES's result will only use characters from the source string => reuse source
427 assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer,
428 valSrc.Handle.s,
429 valSrc.Handle.lnth,
430 &((string_res&)valSrc).Handle, bsize);
431
432 } else {
433 // overwriting a proper substring of some string: mash characters from old and new together (copy on write)
434 // OR we are importing characters: need to copy eagerly (can't refer to source)
435
436 // full string is from start of shareEditSetStartPeer thru end of shareEditSetEndPeer
437 // `this` occurs in the middle of it, to be replaced
438 // build up the new text in `pasting`
439
440 // super private string ctor: ignore ambient context
441 void ?{}( string_res &s, VbyteHeap & heap, const char* rhs, size_t rhslnth ) with(s) {
442 (Handle){ heap };
443 Handle.s = VbyteAlloc(*Handle.ulink, rhslnth);
444 Handle.lnth = rhslnth;
445 (s.shareEditSet_owns_ulink){ false };
446 for ( int i = 0; i < rhslnth; i += 1 ) { // copy characters
447 Handle.s[i] = rhs[i];
448 } // for
449 s.shareEditSet_prev = &s;
450 s.shareEditSet_next = &s;
451 }
452
453 string_res pasting = {
454 * this.Handle.ulink, // maintain same heap, regardless of context
455 shareEditSetStartPeer->Handle.s, // start of SES
456 this.Handle.s - shareEditSetStartPeer->Handle.s }; // length of SES, before this
457 append( pasting,
458 buffer, // start of replacement for this
459 bsize ); // length of replacement for this
460 append( pasting,
461 this.Handle.s + this.Handle.lnth, // start of SES after this
462 shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth -
463 (this.Handle.s + this.Handle.lnth) ); // length of SES, after this
464
465 // The above string building can trigger compaction.
466 // The reference points (that are arguments of the string building) may move during that building.
467 // From this point on, they are stable.
468
469 assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer,
470 pasting.Handle.s,
471 pasting.Handle.lnth,
472 &pasting.Handle, bsize);
473 }
474}
475
476void assign(string_res &this, const char* buffer, size_t bsize) {
477 assign_(this, buffer, bsize, *0p);
478}
479
480void ?=?(string_res &s, const char* other) {
481 assign(s, other, strlen(other));
482}
483
484void ?=?(string_res &s, char other) {
485 assign(s, &other, 1);
486}
487
488// Copy assignment operator
489void ?=?(string_res & this, const string_res & rhs) with( this ) {
490 assign_(this, rhs.Handle.s, rhs.Handle.lnth, rhs);
491}
492
493void ?=?(string_res & this, string_res & rhs) with( this ) {
494 const string_res & rhs2 = rhs;
495 this = rhs2;
496}
497
498
499// Destructor
500void ^?{}(string_res &s) with(s) {
501 // much delegated to implied ^VbyteSM
502
503 // sever s from its share-edit peers, if any (four no-ops when already solo)
504 s.shareEditSet_prev->shareEditSet_next = s.shareEditSet_next;
505 s.shareEditSet_next->shareEditSet_prev = s.shareEditSet_prev;
506 // s.shareEditSet_next = &s;
507 // s.shareEditSet_prev = &s;
508
509 if (shareEditSet_owns_ulink && s.shareEditSet_next == &s) { // last one out
510 delete( s.Handle.ulink );
511 }
512}
513
514
515// Returns the character at the given index
516// With unicode support, this may be different from just the byte at the given
517// offset from the start of the string.
518char ?[?](const string_res &s, size_t index) with(s) {
519 //TODO: Check if index is valid (no exceptions yet)
520 return Handle.s[index];
521}
522
523void assignAt(const string_res &s, size_t index, char val) {
524 string_res editZone = { s, SHARE_EDITS, index, index+1 };
525 assign(editZone, &val, 1);
526}
527
528
529///////////////////////////////////////////////////////////////////
530// Concatenation
531
532void append(string_res &str1, const char * buffer, size_t bsize) {
533 size_t clnth = size(str1) + bsize;
534 if ( str1.Handle.s + size(str1) == buffer ) { // already juxtapose ?
535 // no-op
536 } else { // must copy some text
537 if ( str1.Handle.s + size(str1) == VbyteAlloc(*str1.Handle.ulink, 0) ) { // str1 at end of string area ?
538 VbyteAlloc( *str1.Handle.ulink, bsize ); // create room for 2nd part at the end of string area
539 } else { // copy the two parts
540 char * str1oldBuf = str1.Handle.s;
541 str1.Handle.s = VbyteAlloc( *str1.Handle.ulink, clnth );
542 ByteCopy( str1.Handle.s, 0, str1.Handle.lnth, str1oldBuf, 0, str1.Handle.lnth);
543 } // if
544 ByteCopy( str1.Handle.s, str1.Handle.lnth, bsize, (char*)buffer, 0, (int)bsize);
545 // VbyteHeap & this, char *Dst, int DstStart, int DstLnth, char *Src, int SrcStart, int SrcLnth
546 } // if
547 str1.Handle.lnth = clnth;
548}
549
550void ?+=?(string_res &str1, const string_res &str2) {
551 append( str1, str2.Handle.s, str2.Handle.lnth );
552}
553
554void ?+=?(string_res &s, char other) {
555 append( s, &other, 1 );
556}
557
558void ?+=?(string_res &s, const char* other) {
559 append( s, other, strlen(other) );
560}
561
562
563
564
565//////////////////////////////////////////////////////////
566// Comparisons
567
568
569bool ?==?(const string_res &s1, const string_res &s2) {
570 return ByteCmp( s1.Handle.s, 0, s1.Handle.lnth, s2.Handle.s, 0, s2.Handle.lnth) == 0;
571}
572
573bool ?!=?(const string_res &s1, const string_res &s2) {
574 return !(s1 == s2);
575}
576bool ?==?(const string_res &s, const char* other) {
577 string_res sother = other;
578 return s == sother;
579}
580bool ?!=?(const string_res &s, const char* other) {
581 return !(s == other);
582}
583
584
585//////////////////////////////////////////////////////////
586// Search
587
588bool contains(const string_res &s, char ch) {
589 for (i; size(s)) {
590 if (s[i] == ch) return true;
591 }
592 return false;
593}
594
595int find(const string_res &s, char search) {
596 for (i; size(s)) {
597 if (s[i] == search) return i;
598 }
599 return size(s);
600}
601
602 /* Remaining implementations essentially ported from Sunjay's work */
603
604int find(const string_res &s, const string_res &search) {
605 return find(s, search.Handle.s, search.Handle.lnth);
606}
607
608int find(const string_res &s, const char* search) {
609 return find(s, search, strlen(search));
610}
611
612int find(const string_res &s, const char* search, size_t searchsize) {
613 // FIXME: This is a naive algorithm. We probably want to switch to someting
614 // like Boyer-Moore in the future.
615 // https://en.wikipedia.org/wiki/String_searching_algorithm
616
617 // Always find the empty string
618 if (searchsize == 0) {
619 return 0;
620 }
621
622 for (size_t i = 0; i < s.Handle.lnth; i++) {
623 size_t remaining = s.Handle.lnth - i;
624 // Never going to find the search string if the remaining string is
625 // smaller than search
626 if (remaining < searchsize) {
627 break;
628 }
629
630 bool matched = true;
631 for (size_t j = 0; j < searchsize; j++) {
632 if (search[j] != s.Handle.s[i + j]) {
633 matched = false;
634 break;
635 }
636 }
637 if (matched) {
638 return i;
639 }
640 }
641
642 return s.Handle.lnth;
643}
644
645bool includes(const string_res &s, const string_res &search) {
646 return includes(s, search.Handle.s, search.Handle.lnth);
647}
648
649bool includes(const string_res &s, const char* search) {
650 return includes(s, search, strlen(search));
651}
652
653bool includes(const string_res &s, const char* search, size_t searchsize) {
654 return find(s, search, searchsize) < s.Handle.lnth;
655}
656
657bool startsWith(const string_res &s, const string_res &prefix) {
658 return startsWith(s, prefix.Handle.s, prefix.Handle.lnth);
659}
660
661bool startsWith(const string_res &s, const char* prefix) {
662 return startsWith(s, prefix, strlen(prefix));
663}
664
665bool startsWith(const string_res &s, const char* prefix, size_t prefixsize) {
666 if (s.Handle.lnth < prefixsize) {
667 return false;
668 }
669 return memcmp(s.Handle.s, prefix, prefixsize) == 0;
670}
671
672bool endsWith(const string_res &s, const string_res &suffix) {
673 return endsWith(s, suffix.Handle.s, suffix.Handle.lnth);
674}
675
676bool endsWith(const string_res &s, const char* suffix) {
677 return endsWith(s, suffix, strlen(suffix));
678}
679
680bool endsWith(const string_res &s, const char* suffix, size_t suffixsize) {
681 if (s.Handle.lnth < suffixsize) {
682 return false;
683 }
684 // Amount to offset the bytes pointer so that we are comparing the end of s
685 // to suffix. s.bytes + offset should be the first byte to compare against suffix
686 size_t offset = s.Handle.lnth - suffixsize;
687 return memcmp(s.Handle.s + offset, suffix, suffixsize) == 0;
688}
689
690 /* Back to Mike's work */
691
692
693///////////////////////////////////////////////////////////////////////////
694// charclass, include, exclude
695
696void ?{}( charclass_res & this, const string_res & chars) {
697 (this){ chars.Handle.s, chars.Handle.lnth };
698}
699
700void ?{}( charclass_res & this, const char * chars ) {
701 (this){ chars, strlen(chars) };
702}
703
704void ?{}( charclass_res & this, const char * chars, size_t charssize ) {
705 (this.chars){ chars, charssize };
706 // now sort it ?
707}
708
709void ^?{}( charclass_res & this ) {
710 ^(this.chars){};
711}
712
713static bool test( const charclass_res & mask, char c ) {
714 // instead, use sorted char list?
715 return contains( mask.chars, c );
716}
717
718int exclude(const string_res &s, const charclass_res &mask) {
719 for (int i = 0; i < size(s); i++) {
720 if ( test(mask, s[i]) ) return i;
721 }
722 return size(s);
723}
724
725int include(const string_res &s, const charclass_res &mask) {
726 for (int i = 0; i < size(s); i++) {
727 if ( ! test(mask, s[i]) ) return i;
728 }
729 return size(s);
730}
731
732//######################### VbyteHeap "implementation" #########################
733
734
735// Add a new HandleNode node n after the current HandleNode node.
736
737static inline void AddThisAfter( HandleNode & this, HandleNode & n ) with(this) {
738#ifdef VbyteDebug
739 serr | "enter:AddThisAfter, this:" | &this | " n:" | &n;
740#endif // VbyteDebug
741 verify( n.ulink != 0p );
742 verify( this.ulink == n.ulink );
743 flink = n.flink;
744 blink = &n;
745 n.flink->blink = &this;
746 n.flink = &this;
747#ifdef VbyteDebug
748 {
749 serr | "HandleList:";
750 serr | nlOff;
751 for ( HandleNode *ni = HeaderPtr->flink; ni != HeaderPtr; ni = ni->flink ) {
752 serr | "\tnode:" | ni | " lnth:" | ni->lnth | " s:" | (void *)ni->s | ",\"";
753 for ( int i = 0; i < ni->lnth; i += 1 ) {
754 serr | ni->s[i];
755 } // for
756 serr | "\" flink:" | ni->flink | " blink:" | ni->blink | nl;
757 } // for
758 serr | nlOn;
759 }
760 serr | "exit:AddThisAfter";
761#endif // VbyteDebug
762} // AddThisAfter
763
764
765// Delete the current HandleNode node.
766
767static inline void DeleteNode( HandleNode & this ) with(this) {
768#ifdef VbyteDebug
769 serr | "enter:DeleteNode, this:" | &this;
770#endif // VbyteDebug
771 flink->blink = blink;
772 blink->flink = flink;
773#ifdef VbyteDebug
774 serr | "exit:DeleteNode";
775#endif // VbyteDebug
776} // DeleteNode
777
778
779
780// Allocates specified storage for a string from byte-string area. If not enough space remains to perform the
781// allocation, the garbage collection routine is called.
782
783static inline char * VbyteAlloc( VbyteHeap & this, int size ) with(this) {
784#ifdef VbyteDebug
785 serr | "enter:VbyteAlloc, size:" | size;
786#endif // VbyteDebug
787 uintptr_t NoBytes;
788 char *r;
789
790 NoBytes = ( uintptr_t )EndVbyte + size;
791 if ( NoBytes > ( uintptr_t )ExtVbyte ) { // enough room for new byte-string ?
792 garbage( this, size ); // firer up the garbage collector
793 NoBytes = ( uintptr_t )EndVbyte + size; // try again
794 if ( NoBytes > ( uintptr_t )ExtVbyte ) { // enough room for new byte-string ?
795 assert( 0 && "garbage run did not free up required space" );
796 } // if
797 } // if
798 r = EndVbyte;
799 EndVbyte += size;
800#ifdef VbyteDebug
801 serr | "exit:VbyteAlloc, r:" | (void *)r | " EndVbyte:" | (void *)EndVbyte | " ExtVbyte:" | ExtVbyte;
802#endif // VbyteDebug
803 return r;
804} // VbyteAlloc
805
806
807// Adjusts the last allocation in this heap by delta bytes, or resets this heap to be able to offer
808// new allocations of its original size + delta bytes. Positive delta means bigger;
809// negative means smaller. A null return indicates that the original heap location has room for
810// the requested growth. A non-null return indicates that copying to a new location is required
811// but has not been done; the returned value is the old heap storage location; `this` heap is
812// modified to reference the new location. In the copy-requred case, the caller should use
813// VbyteAlloc to claim the new space, while doing optimal copying from old to new, then free old.
814
815static inline char * VbyteTryAdjustLast( VbyteHeap & this, int delta ) with(this) {
816
817 if ( ( uintptr_t )EndVbyte + delta <= ( uintptr_t )ExtVbyte ) {
818 // room available
819 EndVbyte += delta;
820 return 0p;
821 }
822
823 char *oldBytes = StartVbyte;
824
825 NoOfExtensions += 1;
826 CurrSize *= 2;
827 StartVbyte = EndVbyte = alloc(CurrSize);
828 ExtVbyte = StartVbyte + CurrSize;
829
830 return oldBytes;
831}
832
833
834// Move an existing HandleNode node h somewhere after the current HandleNode node so that it is in ascending order by
835// the address in the byte string area.
836
837static inline void MoveThisAfter( HandleNode & this, const HandleNode & h ) with(this) {
838#ifdef VbyteDebug
839 serr | "enter:MoveThisAfter, this:" | & this | " h:" | & h;
840#endif // VbyteDebug
841 verify( h.ulink != 0p );
842 verify( this.ulink == h.ulink );
843 if ( s < h.s ) { // check argument values
844 // serr | "VbyteSM: Error - Cannot move byte string starting at:" | s | " after byte string starting at:"
845 // | ( h->s ) | " and keep handles in ascending order";
846 // exit(-1 );
847 verify( 0 && "VbyteSM: Error - Cannot move byte strings as requested and keep handles in ascending order");
848 } // if
849
850 HandleNode *i;
851 for ( i = h.flink; i->s != 0 && s > ( i->s ); i = i->flink ); // find the position for this node after h
852 if ( & this != i->blink ) {
853 DeleteNode( this );
854 AddThisAfter( this, *i->blink );
855 } // if
856#ifdef VbyteDebug
857 {
858 serr | "HandleList:";
859 serr | nlOff;
860 for ( HandleNode *n = HeaderPtr->flink; n != HeaderPtr; n = n->flink ) {
861 serr | "\tnode:" | n | " lnth:" | n->lnth | " s:" | (void *)n->s | ",\"";
862 for ( int i = 0; i < n->lnth; i += 1 ) {
863 serr | n->s[i];
864 } // for
865 serr | "\" flink:" | n->flink | " blink:" | n->blink | nl;
866 } // for
867 serr | nlOn;
868 }
869 serr | "exit:MoveThisAfter";
870#endif // VbyteDebug
871} // MoveThisAfter
872
873
874
875
876
877//######################### VbyteHeap #########################
878
879// Move characters from one location in the byte-string area to another. The routine handles the following situations:
880//
881// if the |Src| > |Dst| => truncate
882// if the |Dst| > |Src| => pad Dst with blanks
883
884void ByteCopy( char *Dst, int DstStart, int DstLnth, char *Src, int SrcStart, int SrcLnth ) {
885 for ( int i = 0; i < DstLnth; i += 1 ) {
886 if ( i == SrcLnth ) { // |Dst| > |Src|
887 for ( ; i < DstLnth; i += 1 ) { // pad Dst with blanks
888 Dst[DstStart + i] = ' ';
889 } // for
890 break;
891 } // exit
892 Dst[DstStart + i] = Src[SrcStart + i];
893 } // for
894} // ByteCopy
895
896// Compare two byte strings in the byte-string area. The routine returns the following values:
897//
898// 1 => Src1-byte-string > Src2-byte-string
899// 0 => Src1-byte-string = Src2-byte-string
900// -1 => Src1-byte-string < Src2-byte-string
901
902int ByteCmp( char *Src1, int Src1Start, int Src1Lnth, char *Src2, int Src2Start, int Src2Lnth ) {
903#ifdef VbyteDebug
904 serr | "enter:ByteCmp, Src1Start:" | Src1Start | " Src1Lnth:" | Src1Lnth | " Src2Start:" | Src2Start | " Src2Lnth:" | Src2Lnth;
905#endif // VbyteDebug
906 int cmp;
907
908 CharZip: for ( int i = 0; ; i += 1 ) {
909 if ( i == Src2Lnth - 1 ) {
910 for ( ; ; i += 1 ) {
911 if ( i == Src1Lnth - 1 ) {
912 cmp = 0;
913 break CharZip;
914 } // exit
915 if ( Src1[Src1Start + i] != ' ') {
916 // SUSPECTED BUG: this could be be why Peter got the bug report about == " " (why is this case here at all?)
917 cmp = 1;
918 break CharZip;
919 } // exit
920 } // for
921 } // exit
922 if ( i == Src1Lnth - 1 ) {
923 for ( ; ; i += 1 ) {
924 if ( i == Src2Lnth - 1 ) {
925 cmp = 0;
926 break CharZip;
927 } // exit
928 if ( Src2[Src2Start + i] != ' ') {
929 cmp = -1;
930 break CharZip;
931 } // exit
932 } // for
933 } // exit
934 if ( Src2[Src2Start + i] != Src1[Src1Start+ i]) {
935 cmp = Src1[Src1Start + i] > Src2[Src2Start + i] ? 1 : -1;
936 break CharZip;
937 } // exit
938 } // for
939#ifdef VbyteDebug
940 serr | "exit:ByteCmp, cmp:" | cmp;
941#endif // VbyteDebug
942 return cmp;
943} // ByteCmp
944
945
946// The compaction moves all of the byte strings currently in use to the beginning of the byte-string area and modifies
947// the handles to reflect the new positions of the byte strings. Compaction assumes that the handle list is in ascending
948// order by pointers into the byte-string area. The strings associated with substrings do not have to be moved because
949// the containing string has been moved. Hence, they only require that their string pointers be adjusted.
950
951void compaction(VbyteHeap & this) with(this) {
952 HandleNode *h;
953 char *obase, *nbase, *limit;
954
955 NoOfCompactions += 1;
956 EndVbyte = StartVbyte;
957 h = Header.flink; // ignore header node
958 for (;;) {
959 ByteCopy( EndVbyte, 0, h->lnth, h->s, 0, h->lnth );
960 obase = h->s;
961 h->s = EndVbyte;
962 nbase = h->s;
963 EndVbyte += h->lnth;
964 limit = obase + h->lnth;
965 h = h->flink;
966
967 // check if any substrings are allocated within a string
968
969 for (;;) {
970 if ( h == &Header ) break; // end of header list ?
971 if ( h->s >= limit ) break; // outside of current string ?
972 h->s = nbase + (( uintptr_t )h->s - ( uintptr_t )obase );
973 h = h->flink;
974 } // for
975 if ( h == &Header ) break; // end of header list ?
976 } // for
977} // compaction
978
979
980// Garbage determines the amount of free space left in the heap and then reduces, leave the same, or extends the size of
981// the heap. The heap is then compacted in the existing heap or into the newly allocated heap.
982
983void garbage(VbyteHeap & this, int minreq ) with(this) {
984#ifdef VbyteDebug
985 serr | "enter:garbage";
986 {
987 serr | "HandleList:";
988 for ( HandleNode *n = Header.flink; n != &Header; n = n->flink ) {
989 serr | nlOff;
990 serr | "\tnode:" | n | " lnth:" | n->lnth | " s:" | (void *)n->s | ",\"";
991 for ( int i = 0; i < n->lnth; i += 1 ) {
992 serr | n->s[i];
993 } // for
994 serr | nlOn;
995 serr | "\" flink:" | n->flink | " blink:" | n->blink;
996 } // for
997 }
998#endif // VbyteDebug
999 int AmountUsed, AmountFree;
1000
1001 AmountUsed = 0;
1002 for ( HandleNode *i = Header.flink; i != &Header; i = i->flink ) { // calculate amount of byte area used
1003 AmountUsed += i->lnth;
1004 } // for
1005 AmountFree = ( uintptr_t )ExtVbyte - ( uintptr_t )StartVbyte - AmountUsed;
1006
1007 if ( ( double ) AmountFree < ( CurrSize * 0.1 ) || AmountFree < minreq ) { // free space less than 10% or not enough to serve cur request
1008
1009 extend( this, max( CurrSize, minreq ) ); // extend the heap
1010
1011 // Peter says, "This needs work before it should be used."
1012 // } else if ( AmountFree > CurrSize / 2 ) { // free space greater than 3 times the initial allocation ?
1013 // reduce(( AmountFree / CurrSize - 3 ) * CurrSize ); // reduce the memory
1014
1015 } // if
1016 compaction(this); // compact the byte area, in the same or new heap area
1017#ifdef VbyteDebug
1018 {
1019 serr | "HandleList:";
1020 for ( HandleNode *n = Header.flink; n != &Header; n = n->flink ) {
1021 serr | nlOff;
1022 serr | "\tnode:" | n | " lnth:" | n->lnth | " s:" | (void *)n->s | ",\"";
1023 for ( int i = 0; i < n->lnth; i += 1 ) {
1024 serr | n->s[i];
1025 } // for
1026 serr | nlOn;
1027 serr | "\" flink:" | n->flink | " blink:" | n->blink;
1028 } // for
1029 }
1030 serr | "exit:garbage";
1031#endif // VbyteDebug
1032} // garbage
1033
1034#undef VbyteDebug
1035
1036
1037
1038// Extend the size of the byte-string area by creating a new area and copying the old area into it. The old byte-string
1039// area is deleted.
1040
1041void extend( VbyteHeap & this, int size ) with (this) {
1042#ifdef VbyteDebug
1043 serr | "enter:extend, size:" | size;
1044#endif // VbyteDebug
1045 char *OldStartVbyte;
1046
1047 NoOfExtensions += 1;
1048 OldStartVbyte = StartVbyte; // save previous byte area
1049
1050 CurrSize += size > InitSize ? size : InitSize; // minimum extension, initial size
1051 StartVbyte = EndVbyte = alloc(CurrSize);
1052 ExtVbyte = (void *)( StartVbyte + CurrSize );
1053 compaction(this); // copy from old heap to new & adjust pointers to new heap
1054 free( OldStartVbyte ); // release old heap
1055#ifdef VbyteDebug
1056 serr | "exit:extend, CurrSize:" | CurrSize;
1057#endif // VbyteDebug
1058} // extend
1059
1060//WIP
1061#if 0
1062
1063// Extend the size of the byte-string area by creating a new area and copying the old area into it. The old byte-string
1064// area is deleted.
1065
1066void VbyteHeap::reduce( int size ) {
1067#ifdef VbyteDebug
1068 serr | "enter:reduce, size:" | size;
1069#endif // VbyteDebug
1070 char *OldStartVbyte;
1071
1072 NoOfReductions += 1;
1073 OldStartVbyte = StartVbyte; // save previous byte area
1074
1075 CurrSize -= size;
1076 StartVbyte = EndVbyte = new char[CurrSize];
1077 ExtVbyte = (void *)( StartVbyte + CurrSize );
1078 compaction(); // copy from old heap to new & adjust pointers to new heap
1079 delete OldStartVbyte; // release old heap
1080#ifdef VbyteDebug
1081 serr | "exit:reduce, CurrSize:" | CurrSize;
1082#endif // VbyteDebug
1083} // reduce
1084
1085
1086#endif
Note: See TracBrowser for help on using the repository browser.