source: libcfa/src/containers/string_res.cfa @ 2b30370

ADTast-experimentalenumpthread-emulationqualifiedEnum
Last change on this file since 2b30370 was 2b30370, checked in by Michael Brooks <mlbrooks@…>, 2 years ago

Bug fixes for empty-string from no-arg ctor with sharing off

  • Property mode set to 100644
File size: 38.0 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// string_res -- variable-length, mutable run of text, with resource semantics
8//
9// Author           : Michael L. Brooks
10// Created On       : Fri Sep 03 11:00:00 2021
11// Last Modified By : Michael L. Brooks
12// Last Modified On : Fri Sep 03 11:00:00 2021
13// Update Count     : 1
14//
15
16#include "string_res.hfa"
17#include "string_sharectx.hfa"
18
19#include <stdlib.hfa>  // e.g. malloc
20#include <string.h>    // e.g. strlen
21#include <assert.h>
22
23//######################### VbyteHeap "header" #########################
24
25#ifdef VbyteDebug
26HandleNode *HeaderPtr;
27#endif // VbyteDebug
28
29struct VbyteHeap {
30
31    int NoOfCompactions;                                // number of compactions of the byte area
32    int NoOfExtensions;                                 // number of extensions in the size of the byte area
33    int NoOfReductions;                                 // number of reductions in the size of the byte area
34   
35    int InitSize;                                       // initial number of bytes in the byte-string area
36    int CurrSize;                                       // current number of bytes in the byte-string area
37    char *StartVbyte;                                   // pointer to the `st byte of the start of the byte-string area
38    char *EndVbyte;                                     // pointer to the next byte after the end of the currently used portion of byte-string area
39    void *ExtVbyte;                                     // pointer to the next byte after the end of the byte-string area
40
41    HandleNode Header;                                  // header node for handle list
42}; // VbyteHeap
43
44   
45static inline void compaction( VbyteHeap & );                           // compaction of the byte area
46static inline void garbage( VbyteHeap &, int );                         // garbage collect the byte area
47static inline void extend( VbyteHeap &, int );                  // extend the size of the byte area
48static inline void reduce( VbyteHeap &, int );                  // reduce the size of the byte area
49
50static inline void ?{}( VbyteHeap &, size_t = 1000 );
51static inline void ^?{}( VbyteHeap & );
52static inline void ByteCopy( char *, int, int, char *, int, int ); // copy a block of bytes from one location in the heap to another
53static inline int ByteCmp( char *, int, int, char *, int, int );        // compare 2 blocks of bytes
54static inline char *VbyteAlloc( VbyteHeap &, int );                     // allocate a block bytes in the heap
55static inline char *VbyteTryAdjustLast( VbyteHeap &, int );
56
57static inline void AddThisAfter( HandleNode &, HandleNode & );
58static inline void DeleteNode( HandleNode & );
59static inline void MoveThisAfter( HandleNode &, const HandleNode & );           // move current handle after parameter handle
60
61
62// Allocate the storage for the variable sized area and intialize the heap variables.
63
64static inline void ?{}( VbyteHeap & this, size_t Size ) with(this) {
65#ifdef VbyteDebug
66    serr | "enter:VbyteHeap::VbyteHeap, this:" | &this | " Size:" | Size;
67#endif // VbyteDebug
68    NoOfCompactions = NoOfExtensions = NoOfReductions = 0;
69    InitSize = CurrSize = Size;
70    StartVbyte = EndVbyte = alloc(CurrSize);
71    ExtVbyte = (void *)( StartVbyte + CurrSize );
72    Header.flink = Header.blink = &Header;
73    Header.ulink = & this;
74#ifdef VbyteDebug
75    HeaderPtr = &Header;
76    serr | "exit:VbyteHeap::VbyteHeap, this:" | &this;
77#endif // VbyteDebug
78} // VbyteHeap
79
80
81// Release the dynamically allocated storage for the byte area.
82
83static inline void ^?{}( VbyteHeap & this ) with(this) {
84    free( StartVbyte );
85} // ~VbyteHeap
86
87
88//######################### HandleNode #########################
89
90
91// Create a handle node. The handle is not linked into the handle list.  This is the responsibilitiy of the handle
92// creator.
93
94static inline void ?{}( HandleNode & this ) with(this) {
95#ifdef VbyteDebug
96    serr | "enter:HandleNode::HandleNode, this:" | &this;
97#endif // VbyteDebug
98    s = 0;
99    lnth = 0;
100#ifdef VbyteDebug
101    serr | "exit:HandleNode::HandleNode, this:" | &this;
102#endif // VbyteDebug
103} // HandleNode
104
105// Create a handle node. The handle is linked into the handle list at the end. This means that this handle will NOT be
106// in order by string address, but this is not a problem because a string with length zero does nothing during garbage
107// collection.
108
109static inline void ?{}( HandleNode & this, VbyteHeap & vh ) with(this) {
110#ifdef VbyteDebug
111    serr | "enter:HandleNode::HandleNode, this:" | &this;
112#endif // VbyteDebug
113    s = 0;
114    lnth = 0;
115    ulink = &vh;
116    AddThisAfter( this, *vh.Header.blink );
117#ifdef VbyteDebug
118    serr | "exit:HandleNode::HandleNode, this:" | &this;
119#endif // VbyteDebug
120} // HandleNode
121
122
123// Delete a node from the handle list by unchaining it from the list. If the handle node was allocated dynamically, it
124// is the responsibility of the creator to destroy it.
125
126static inline void ^?{}( HandleNode & this ) with(this) {
127#ifdef VbyteDebug
128    serr | "enter:HandleNode::~HandleNode, this:" | & this;
129    {
130        serr | nlOff;
131        serr | " lnth:" | lnth | " s:" | (void *)s | ",\"";
132        for ( int i = 0; i < lnth; i += 1 ) {
133            serr | s[i];
134        } // for
135        serr | "\" flink:" | flink | " blink:" | blink | nl;
136        serr | nlOn;
137    }
138#endif // VbyteDebug
139    DeleteNode( this );
140} // ~HandleNode
141
142
143//######################### String Sharing Context #########################
144
145static string_sharectx * ambient_string_sharectx;               // fickle top of stack
146static string_sharectx default_string_sharectx = {NEW_SHARING}; // stable bottom of stack
147
148void ?{}( string_sharectx & this, StringSharectx_Mode mode ) with( this ) {
149    (older){ ambient_string_sharectx };
150    if ( mode == NEW_SHARING ) {
151        (activeHeap){ new( (size_t) 1000 ) };
152    } else {
153        verify( mode == NO_SHARING );
154        (activeHeap){ 0p };
155    }
156    ambient_string_sharectx = & this;
157}
158
159void ^?{}( string_sharectx & this ) with( this ) {
160    if ( activeHeap ) delete( activeHeap );
161
162    // unlink this from older-list starting from ambient_string_sharectx
163    // usually, this==ambient_string_sharectx and the loop runs zero times
164    string_sharectx *& c = ambient_string_sharectx;
165    while ( c != &this ) &c = &c->older;              // find this
166    c = this.older;                                   // unlink
167}
168
169//######################### String Resource #########################
170
171
172VbyteHeap * DEBUG_string_heap() {
173    assert( ambient_string_sharectx->activeHeap && "No sharing context is active" );
174    return ambient_string_sharectx->activeHeap;
175}
176
177size_t DEBUG_string_bytes_avail_until_gc( VbyteHeap * heap ) {
178    return ((char*)heap->ExtVbyte) - heap->EndVbyte;
179}
180
181size_t DEBUG_string_bytes_in_heap( VbyteHeap * heap ) {
182    return heap->CurrSize;
183}
184
185const char * DEBUG_string_heap_start( VbyteHeap * heap ) {
186    return heap->StartVbyte;
187}
188
189// Returns the size of the string in bytes
190size_t size(const string_res &s) with(s) {
191    return Handle.lnth;
192}
193
194// Output operator
195ofstream & ?|?(ofstream &out, const string_res &s) {
196    // Store auto-newline state so it can be restored
197    bool anl = getANL$(out);
198    nlOff(out);
199    for (size_t i = 0; i < s.Handle.lnth; i++) {
200        out | s[i];
201    }
202    out | sep;
203    // Re-apply newlines after done, for chaining version
204    if (anl) nlOn(out);
205    return out;
206}
207
208void ?|?(ofstream &out, const string_res &s) {
209    // Store auto-newline state so it can be restored
210    bool anl = getANL$(out);
211    if( s.Handle.lnth == 0 ) {
212        sout | "";
213    } else {
214        nlOff(out);
215        for (size_t i = 0; i < s.Handle.lnth; i++) {
216            // Need to re-apply on the last output operator, for whole-statement version
217            if (anl && i == s.Handle.lnth-1) nlOn(out);
218            out | s[i];
219        }
220    }
221}
222
223// Empty constructor
224void ?{}(string_res &s) with(s) {
225    if( ambient_string_sharectx->activeHeap ) {
226        (Handle){ * ambient_string_sharectx->activeHeap };
227        (shareEditSet_owns_ulink){ false };
228        verify( Handle.s == 0p && Handle.lnth == 0 );
229    } else {
230        (Handle){ * new( (size_t) 10 ) };  // TODO: can I lazily avoid allocating for empty string
231        (shareEditSet_owns_ulink){ true };
232        Handle.s = Handle.ulink->StartVbyte;
233        verify( Handle.lnth == 0 );
234    }
235    s.shareEditSet_prev = &s;
236    s.shareEditSet_next = &s;
237}
238
239static inline void eagerCopyCtorHelper(string_res &s, const char* rhs, size_t rhslnth) with(s) {
240    if( ambient_string_sharectx->activeHeap ) {
241        (Handle){ * ambient_string_sharectx->activeHeap };
242        (shareEditSet_owns_ulink){ false };
243    } else {
244        (Handle){ * new( rhslnth ) };
245        (shareEditSet_owns_ulink){ true };
246    }
247    Handle.s = VbyteAlloc(*Handle.ulink, rhslnth);
248    Handle.lnth = rhslnth;
249    for ( int i = 0; i < rhslnth; i += 1 ) {            // copy characters
250        Handle.s[i] = rhs[i];
251    } // for
252    s.shareEditSet_prev = &s;
253    s.shareEditSet_next = &s;
254}
255
256// Constructor from a raw buffer and size
257void ?{}(string_res &s, const char* rhs, size_t rhslnth) with(s) {
258    eagerCopyCtorHelper(s, rhs, rhslnth);
259}
260
261// String literal constructor
262void ?{}(string_res &s, const char* rhs) {
263    (s){ rhs, strlen(rhs) };
264}
265
266// private ctor (not in header): use specified heap (ignore ambient) and copy chars in
267void ?{}( string_res &s, VbyteHeap & heap, const char* rhs, size_t rhslnth ) with(s) {
268    (Handle){ heap };
269    Handle.s = VbyteAlloc(*Handle.ulink, rhslnth);
270    Handle.lnth = rhslnth;
271    (s.shareEditSet_owns_ulink){ false };
272    for ( int i = 0; i < rhslnth; i += 1 ) {            // copy characters
273        Handle.s[i] = rhs[i];
274    } // for
275    s.shareEditSet_prev = &s;
276    s.shareEditSet_next = &s;
277}
278
279// General copy constructor
280void ?{}(string_res &s, const string_res & s2, StrResInitMode mode, size_t start, size_t end ) {
281
282    verify( start <= end && end <= s2.Handle.lnth );
283
284    if (s2.Handle.ulink != ambient_string_sharectx->activeHeap && mode == COPY_VALUE) {
285        // crossing heaps (including private): copy eagerly
286        eagerCopyCtorHelper(s, s2.Handle.s + start, end - start);
287        verify(s.shareEditSet_prev == &s);
288        verify(s.shareEditSet_next == &s);
289    } else {
290        (s.Handle){};
291        s.Handle.s = s2.Handle.s + start;
292        s.Handle.lnth = end - start;
293        s.Handle.ulink = s2.Handle.ulink;
294
295        AddThisAfter(s.Handle, s2.Handle );                     // insert this handle after rhs handle
296        // ^ bug?  skip others at early point in string
297
298        if (mode == COPY_VALUE) {
299            verify(s2.Handle.ulink == ambient_string_sharectx->activeHeap);
300            // requested logical copy in same heap: defer copy until write
301
302            (s.shareEditSet_owns_ulink){ false };
303
304            // make s alone in its shareEditSet
305            s.shareEditSet_prev = &s;
306            s.shareEditSet_next = &s;
307        } else {
308            verify( mode == SHARE_EDITS );
309            // sharing edits with source forces same heap as source (ignore context)
310
311            (s.shareEditSet_owns_ulink){ s2.shareEditSet_owns_ulink };
312
313            // s2 is logically const but not implementation const
314            string_res & s2mod = (string_res &) s2;
315
316            // insert s after s2 on shareEditSet
317            s.shareEditSet_next = s2mod.shareEditSet_next;
318            s.shareEditSet_prev = &s2mod;
319            s.shareEditSet_next->shareEditSet_prev = &s;
320            s.shareEditSet_prev->shareEditSet_next = &s;
321        }
322    }
323}
324
325static void assignEditSet(string_res & this, string_res * shareEditSetStartPeer, string_res * shareEditSetEndPeer,
326    char * resultSesStart,
327    size_t resultSesLnth,
328    HandleNode * resultPadPosition, size_t bsize ) {
329
330    char * beforeBegin = shareEditSetStartPeer->Handle.s;
331    size_t beforeLen = this.Handle.s - beforeBegin;
332
333    char * afterBegin = this.Handle.s + this.Handle.lnth;
334    size_t afterLen = shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - afterBegin;
335
336    size_t oldLnth = this.Handle.lnth;
337
338    this.Handle.s = resultSesStart + beforeLen;
339    this.Handle.lnth = bsize;
340    if (resultPadPosition)
341        MoveThisAfter( this.Handle, *resultPadPosition );
342
343    // adjust all substring string and handle locations, and check if any substring strings are outside the new base string
344    char *limit = resultSesStart + resultSesLnth;
345    for (string_res * p = this.shareEditSet_next; p != &this; p = p->shareEditSet_next) {
346        verify (p->Handle.s >= beforeBegin);
347        if ( p->Handle.s >= afterBegin ) {
348            verify ( p->Handle.s <= afterBegin + afterLen );
349            verify ( p->Handle.s + p->Handle.lnth <= afterBegin + afterLen );
350            // p starts after the edit
351            // take start and end as end-anchored
352            size_t startOffsetFromEnd = afterBegin + afterLen - p->Handle.s;
353            p->Handle.s = limit - startOffsetFromEnd;
354            // p->Handle.lnth unaffected
355        } else if ( p->Handle.s <= beforeBegin + beforeLen ) {
356            // p starts before, or at the start of, the edit
357            if ( p->Handle.s + p->Handle.lnth <= beforeBegin + beforeLen ) {
358                // p ends before the edit
359                // take end as start-anchored too
360                // p->Handle.lnth unaffected
361            } else if ( p->Handle.s + p->Handle.lnth < afterBegin ) {
362                // p ends during the edit; p does not include the last character replaced
363                // clip end of p to end at start of edit
364                p->Handle.lnth = beforeLen - ( p->Handle.s - beforeBegin );
365            } else {
366                // p ends after the edit
367                verify ( p->Handle.s + p->Handle.lnth <= afterBegin + afterLen );
368                // take end as end-anchored
369                // stretch-shrink p according to the edit
370                p->Handle.lnth += this.Handle.lnth;
371                p->Handle.lnth -= oldLnth;
372            }
373            // take start as start-anchored
374            size_t startOffsetFromStart = p->Handle.s - beforeBegin;
375            p->Handle.s = resultSesStart + startOffsetFromStart;
376        } else {
377            verify ( p->Handle.s < afterBegin );
378            // p starts during the edit
379            verify( p->Handle.s + p->Handle.lnth >= beforeBegin + beforeLen );
380            if ( p->Handle.s + p->Handle.lnth < afterBegin ) {
381                // p ends during the edit; p does not include the last character replaced
382                // set p to empty string at start of edit
383                p->Handle.s = this.Handle.s;
384                p->Handle.lnth = 0;
385            } else {
386                // p includes the end of the edit
387                // clip start of p to start at end of edit
388                int charsToClip = afterBegin - p->Handle.s;
389                p->Handle.s = this.Handle.s + this.Handle.lnth;
390                p->Handle.lnth -= charsToClip;
391            }
392        }
393        if (resultPadPosition)
394            MoveThisAfter( p->Handle, *resultPadPosition );     // move substring handle to maintain sorted order by string position
395    }
396}
397
398static void assign_(string_res &this, const char* buffer, size_t bsize, const string_res & valSrc) {
399
400    // traverse the incumbent share-edit set (SES) to recover the range of a base string to which `this` belongs
401    string_res * shareEditSetStartPeer = & this;
402    string_res * shareEditSetEndPeer = & this;
403    for (string_res * editPeer = this.shareEditSet_next; editPeer != &this; editPeer = editPeer->shareEditSet_next) {
404        if ( editPeer->Handle.s < shareEditSetStartPeer->Handle.s ) {
405            shareEditSetStartPeer = editPeer;
406        }
407        if ( shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth < editPeer->Handle.s + editPeer->Handle.lnth) {
408            shareEditSetEndPeer = editPeer;
409        }
410    }
411
412    verify( shareEditSetEndPeer->Handle.s >= shareEditSetStartPeer->Handle.s );
413    size_t origEditSetLength = shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - shareEditSetStartPeer->Handle.s;
414    verify( origEditSetLength >= this.Handle.lnth );
415
416    if ( this.shareEditSet_owns_ulink ) {                 // assigning to private context
417        // ok to overwrite old value within LHS
418        char * prefixStartOrig = shareEditSetStartPeer->Handle.s;
419        int prefixLen = this.Handle.s - prefixStartOrig;
420        char * suffixStartOrig = this.Handle.s + this.Handle.lnth;
421        int suffixLen = shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - suffixStartOrig;
422
423        int delta = bsize - this.Handle.lnth;
424        if ( char * oldBytes = VbyteTryAdjustLast( *this.Handle.ulink, delta ) ) {
425            // growing: copy from old to new
426            char * dest = VbyteAlloc( *this.Handle.ulink, origEditSetLength + delta );
427            char *destCursor = dest;  memcpy(destCursor, prefixStartOrig, prefixLen);
428            destCursor += prefixLen;  memcpy(destCursor, buffer         , bsize    );
429            destCursor += bsize;      memcpy(destCursor, suffixStartOrig, suffixLen);
430            assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer,
431                dest,
432                origEditSetLength + delta,
433                0p, bsize);
434            free( oldBytes );
435        } else {
436            // room is already allocated in-place: bubble suffix and overwite middle
437            memmove( suffixStartOrig + delta, suffixStartOrig, suffixLen );
438            memcpy( this.Handle.s, buffer, bsize );
439
440            assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer,
441                shareEditSetStartPeer->Handle.s,
442                origEditSetLength + delta,
443                0p, bsize);
444        }
445
446    } else if (                                           // assigning to shared context
447        this.Handle.lnth == origEditSetLength &&          // overwriting entire run of SES
448        & valSrc &&                                       // sourcing from a managed string
449        valSrc.Handle.ulink == this.Handle.ulink  ) {     // sourcing from same heap
450
451        // SES's result will only use characters from the source string => reuse source
452        assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer,
453            valSrc.Handle.s,
454            valSrc.Handle.lnth,
455            &((string_res&)valSrc).Handle, bsize);
456       
457    } else {
458        // overwriting a proper substring of some string: mash characters from old and new together (copy on write)
459        // OR we are importing characters: need to copy eagerly (can't refer to source)
460
461        // full string is from start of shareEditSetStartPeer thru end of shareEditSetEndPeer
462        // `this` occurs in the middle of it, to be replaced
463        // build up the new text in `pasting`
464
465        string_res pasting = {
466            * this.Handle.ulink,                               // maintain same heap, regardless of context
467            shareEditSetStartPeer->Handle.s,                   // start of SES
468            this.Handle.s - shareEditSetStartPeer->Handle.s }; // length of SES, before this
469        append( pasting,
470            buffer,                                            // start of replacement for this
471            bsize );                                           // length of replacement for this
472        append( pasting,
473            this.Handle.s + this.Handle.lnth,                  // start of SES after this
474            shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth -
475            (this.Handle.s + this.Handle.lnth) );              // length of SES, after this
476
477        // The above string building can trigger compaction.
478        // The reference points (that are arguments of the string building) may move during that building.
479        // From this point on, they are stable.
480
481        assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer,
482            pasting.Handle.s,
483            pasting.Handle.lnth,
484            &pasting.Handle, bsize);
485    }
486}
487
488void assign(string_res &this, const char* buffer, size_t bsize) {
489    assign_(this, buffer, bsize, *0p);
490}
491
492void ?=?(string_res &s, const char* other) {
493    assign(s, other, strlen(other));
494}
495
496void ?=?(string_res &s, char other) {
497    assign(s, &other, 1);
498}
499
500// Copy assignment operator
501void ?=?(string_res & this, const string_res & rhs) with( this ) {
502    assign_(this, rhs.Handle.s, rhs.Handle.lnth, rhs);
503}
504
505void ?=?(string_res & this, string_res & rhs) with( this ) {
506    const string_res & rhs2 = rhs;
507    this = rhs2;
508}
509
510
511// Destructor
512void ^?{}(string_res &s) with(s) {
513    // much delegated to implied ^VbyteSM
514
515    // sever s from its share-edit peers, if any (four no-ops when already solo)
516    s.shareEditSet_prev->shareEditSet_next = s.shareEditSet_next;
517    s.shareEditSet_next->shareEditSet_prev = s.shareEditSet_prev;
518    // s.shareEditSet_next = &s;
519    // s.shareEditSet_prev = &s;
520
521    if (shareEditSet_owns_ulink && s.shareEditSet_next == &s) { // last one out
522        delete( s.Handle.ulink );
523    }
524}
525
526
527// Returns the character at the given index
528// With unicode support, this may be different from just the byte at the given
529// offset from the start of the string.
530char ?[?](const string_res &s, size_t index) with(s) {
531    //TODO: Check if index is valid (no exceptions yet)
532    return Handle.s[index];
533}
534
535void assignAt(const string_res &s, size_t index, char val) {
536    string_res editZone = { s, SHARE_EDITS, index, index+1 };
537    assign(editZone, &val, 1);
538}
539
540
541///////////////////////////////////////////////////////////////////
542// Concatenation
543
544void append(string_res &str1, const char * buffer, size_t bsize) {
545    size_t clnth = size(str1) + bsize;
546    if ( str1.Handle.s + size(str1) == buffer ) { // already juxtapose ?
547        // no-op
548    } else {                                            // must copy some text
549        if ( str1.Handle.s + size(str1) == VbyteAlloc(*str1.Handle.ulink, 0) ) { // str1 at end of string area ?
550            VbyteAlloc( *str1.Handle.ulink, bsize ); // create room for 2nd part at the end of string area
551        } else {                                        // copy the two parts
552            char * str1newBuf = VbyteAlloc( *str1.Handle.ulink, clnth );
553            char * str1oldBuf = str1.Handle.s;  // must read after VbyteAlloc call in case it gs's
554            str1.Handle.s = str1newBuf;
555            ByteCopy( str1.Handle.s, 0, str1.Handle.lnth, str1oldBuf, 0, str1.Handle.lnth);
556        } // if
557        ByteCopy( str1.Handle.s, str1.Handle.lnth, bsize, (char*)buffer, 0, (int)bsize);
558        //       VbyteHeap & this, char *Dst, int DstStart, int DstLnth, char *Src, int SrcStart, int SrcLnth
559    } // if
560    str1.Handle.lnth = clnth;
561}
562
563void ?+=?(string_res &str1, const string_res &str2) {
564    append( str1, str2.Handle.s, str2.Handle.lnth );
565}
566
567void ?+=?(string_res &s, char other) {
568    append( s, &other, 1 );
569}
570
571void ?+=?(string_res &s, const char* other) {
572    append( s, other, strlen(other) );
573}
574
575
576
577
578//////////////////////////////////////////////////////////
579// Comparisons
580
581
582bool ?==?(const string_res &s1, const string_res &s2) {
583    return ByteCmp( s1.Handle.s, 0, s1.Handle.lnth, s2.Handle.s, 0, s2.Handle.lnth) == 0;
584}
585
586bool ?!=?(const string_res &s1, const string_res &s2) {
587    return !(s1 == s2);
588}
589bool ?==?(const string_res &s, const char* other) {
590    string_res sother = other;
591    return s == sother;
592}
593bool ?!=?(const string_res &s, const char* other) {
594    return !(s == other);
595}
596
597
598//////////////////////////////////////////////////////////
599// Search
600
601bool contains(const string_res &s, char ch) {
602    for (i; size(s)) {
603        if (s[i] == ch) return true;
604    }
605    return false;
606}
607
608int find(const string_res &s, char search) {
609    for (i; size(s)) {
610        if (s[i] == search) return i;
611    }
612    return size(s);
613}
614
615    /* Remaining implementations essentially ported from Sunjay's work */
616
617int find(const string_res &s, const string_res &search) {
618    return find(s, search.Handle.s, search.Handle.lnth);
619}
620
621int find(const string_res &s, const char* search) {
622    return find(s, search, strlen(search));
623}
624
625int find(const string_res &s, const char* search, size_t searchsize) {
626    // FIXME: This is a naive algorithm. We probably want to switch to someting
627    // like Boyer-Moore in the future.
628    // https://en.wikipedia.org/wiki/String_searching_algorithm
629
630    // Always find the empty string
631    if (searchsize == 0) {
632        return 0;
633    }
634
635    for (size_t i = 0; i < s.Handle.lnth; i++) {
636        size_t remaining = s.Handle.lnth - i;
637        // Never going to find the search string if the remaining string is
638        // smaller than search
639        if (remaining < searchsize) {
640            break;
641        }
642
643        bool matched = true;
644        for (size_t j = 0; j < searchsize; j++) {
645            if (search[j] != s.Handle.s[i + j]) {
646                matched = false;
647                break;
648            }
649        }
650        if (matched) {
651            return i;
652        }
653    }
654
655    return s.Handle.lnth;
656}
657
658bool includes(const string_res &s, const string_res &search) {
659    return includes(s, search.Handle.s, search.Handle.lnth);
660}
661
662bool includes(const string_res &s, const char* search) {
663    return includes(s, search, strlen(search));
664}
665
666bool includes(const string_res &s, const char* search, size_t searchsize) {
667    return find(s, search, searchsize) < s.Handle.lnth;
668}
669
670bool startsWith(const string_res &s, const string_res &prefix) {
671    return startsWith(s, prefix.Handle.s, prefix.Handle.lnth);
672}
673
674bool startsWith(const string_res &s, const char* prefix) {
675    return startsWith(s, prefix, strlen(prefix));
676}
677
678bool startsWith(const string_res &s, const char* prefix, size_t prefixsize) {
679    if (s.Handle.lnth < prefixsize) {
680        return false;
681    }
682    return memcmp(s.Handle.s, prefix, prefixsize) == 0;
683}
684
685bool endsWith(const string_res &s, const string_res &suffix) {
686    return endsWith(s, suffix.Handle.s, suffix.Handle.lnth);
687}
688
689bool endsWith(const string_res &s, const char* suffix) {
690    return endsWith(s, suffix, strlen(suffix));
691}
692
693bool endsWith(const string_res &s, const char* suffix, size_t suffixsize) {
694    if (s.Handle.lnth < suffixsize) {
695        return false;
696    }
697    // Amount to offset the bytes pointer so that we are comparing the end of s
698    // to suffix. s.bytes + offset should be the first byte to compare against suffix
699    size_t offset = s.Handle.lnth - suffixsize;
700    return memcmp(s.Handle.s + offset, suffix, suffixsize) == 0;
701}
702
703    /* Back to Mike's work */
704
705
706///////////////////////////////////////////////////////////////////////////
707// charclass, include, exclude
708
709void ?{}( charclass_res & this, const string_res & chars) {
710    (this){ chars.Handle.s, chars.Handle.lnth };
711}
712
713void ?{}( charclass_res & this, const char * chars ) {
714    (this){ chars, strlen(chars) };
715}
716
717void ?{}( charclass_res & this, const char * chars, size_t charssize ) {
718    (this.chars){ chars, charssize };
719    // now sort it ?
720}
721
722void ^?{}( charclass_res & this ) {
723    ^(this.chars){};
724}
725
726static bool test( const charclass_res & mask, char c ) {
727    // instead, use sorted char list?
728    return contains( mask.chars, c );
729}
730
731int exclude(const string_res &s, const charclass_res &mask) {
732    for (int i = 0; i < size(s); i++) {
733        if ( test(mask, s[i]) ) return i;
734    }
735    return size(s);
736}
737
738int include(const string_res &s, const charclass_res &mask) {
739    for (int i = 0; i < size(s); i++) {
740        if ( ! test(mask, s[i]) ) return i;
741    }
742    return size(s);
743}
744
745//######################### VbyteHeap "implementation" #########################
746
747
748// Add a new HandleNode node n after the current HandleNode node.
749
750static inline void AddThisAfter( HandleNode & this, HandleNode & n ) with(this) {
751#ifdef VbyteDebug
752    serr | "enter:AddThisAfter, this:" | &this | " n:" | &n;
753#endif // VbyteDebug
754    verify( n.ulink != 0p );
755    verify( this.ulink == n.ulink );
756    flink = n.flink;
757    blink = &n;
758    n.flink->blink = &this;
759    n.flink = &this;
760#ifdef VbyteDebug
761    {
762                serr | "HandleList:";
763                serr | nlOff;
764                for ( HandleNode *ni = HeaderPtr->flink; ni != HeaderPtr; ni = ni->flink ) {
765                        serr | "\tnode:" | ni | " lnth:" | ni->lnth | " s:" | (void *)ni->s | ",\"";
766                        for ( int i = 0; i < ni->lnth; i += 1 ) {
767                                serr | ni->s[i];
768                        } // for
769                        serr | "\" flink:" | ni->flink | " blink:" | ni->blink | nl;
770                } // for
771                serr | nlOn;
772    }
773    serr | "exit:AddThisAfter";
774#endif // VbyteDebug
775} // AddThisAfter
776
777
778// Delete the current HandleNode node.
779
780static inline void DeleteNode( HandleNode & this ) with(this) {
781#ifdef VbyteDebug
782    serr | "enter:DeleteNode, this:" | &this;
783#endif // VbyteDebug
784    flink->blink = blink;
785    blink->flink = flink;
786#ifdef VbyteDebug
787    serr | "exit:DeleteNode";
788#endif // VbyteDebug
789} //  DeleteNode
790
791
792
793// Allocates specified storage for a string from byte-string area. If not enough space remains to perform the
794// allocation, the garbage collection routine is called.
795
796static inline char * VbyteAlloc( VbyteHeap & this, int size ) with(this) {
797#ifdef VbyteDebug
798    serr | "enter:VbyteAlloc, size:" | size;
799#endif // VbyteDebug
800    uintptr_t NoBytes;
801    char *r;
802
803    NoBytes = ( uintptr_t )EndVbyte + size;
804    if ( NoBytes > ( uintptr_t )ExtVbyte ) {            // enough room for new byte-string ?
805                garbage( this, size );                                  // firer up the garbage collector
806                NoBytes = ( uintptr_t )EndVbyte + size;         // try again
807                if ( NoBytes > ( uintptr_t )ExtVbyte ) {        // enough room for new byte-string ?
808            assert( 0 && "garbage run did not free up required space" );
809                } // if
810    } // if
811    r = EndVbyte;
812    EndVbyte += size;
813#ifdef VbyteDebug
814    serr | "exit:VbyteAlloc, r:" | (void *)r | " EndVbyte:" | (void *)EndVbyte | " ExtVbyte:" | ExtVbyte;
815#endif // VbyteDebug
816    return r;
817} // VbyteAlloc
818
819
820// Adjusts the last allocation in this heap by delta bytes, or resets this heap to be able to offer
821// new allocations of its original size + delta bytes. Positive delta means bigger;
822// negative means smaller.  A null return indicates that the original heap location has room for
823// the requested growth.  A non-null return indicates that copying to a new location is required
824// but has not been done; the returned value is the old heap storage location; `this` heap is
825// modified to reference the new location.  In the copy-requred case, the caller should use
826// VbyteAlloc to claim the new space, while doing optimal copying from old to new, then free old.
827
828static inline char * VbyteTryAdjustLast( VbyteHeap & this, int delta ) with(this) {
829
830    if ( ( uintptr_t )EndVbyte + delta <= ( uintptr_t )ExtVbyte ) {
831        // room available
832        EndVbyte += delta;
833        return 0p;
834    }
835
836    char *oldBytes = StartVbyte;
837
838    NoOfExtensions += 1;
839    CurrSize *= 2;
840    StartVbyte = EndVbyte = alloc(CurrSize);
841    ExtVbyte = StartVbyte + CurrSize;
842
843    return oldBytes;
844}
845
846
847// Move an existing HandleNode node h somewhere after the current HandleNode node so that it is in ascending order by
848// the address in the byte string area.
849
850static inline void MoveThisAfter( HandleNode & this, const HandleNode  & h ) with(this) {
851#ifdef VbyteDebug
852    serr | "enter:MoveThisAfter, this:" | & this | " h:" | & h;
853#endif // VbyteDebug
854    verify( h.ulink != 0p );
855    verify( this.ulink == h.ulink );
856    if ( s < h.s ) {                                    // check argument values
857                // serr | "VbyteSM: Error - Cannot move byte string starting at:" | s | " after byte string starting at:"
858                //      | ( h->s ) | " and keep handles in ascending order";
859                // exit(-1 );
860                verify( 0 && "VbyteSM: Error - Cannot move byte strings as requested and keep handles in ascending order");
861    } // if
862
863    HandleNode *i;
864    for ( i = h.flink; i->s != 0 && s > ( i->s ); i = i->flink ); // find the position for this node after h
865    if ( & this != i->blink ) {
866                DeleteNode( this );
867                AddThisAfter( this, *i->blink );
868    } // if
869#ifdef VbyteDebug
870    {
871        serr | "HandleList:";
872        serr | nlOff;
873        for ( HandleNode *n = HeaderPtr->flink; n != HeaderPtr; n = n->flink ) {
874            serr | "\tnode:" | n | " lnth:" | n->lnth | " s:" | (void *)n->s | ",\"";
875            for ( int i = 0; i < n->lnth; i += 1 ) {
876                serr | n->s[i];
877            } // for
878            serr | "\" flink:" | n->flink | " blink:" | n->blink | nl;
879        } // for
880        serr | nlOn;
881    }
882    serr | "exit:MoveThisAfter";
883#endif // VbyteDebug
884} // MoveThisAfter
885
886
887
888
889
890//######################### VbyteHeap #########################
891
892// Move characters from one location in the byte-string area to another. The routine handles the following situations:
893//
894// if the |Src| > |Dst| => truncate
895// if the |Dst| > |Src| => pad Dst with blanks
896
897void ByteCopy( char *Dst, int DstStart, int DstLnth, char *Src, int SrcStart, int SrcLnth ) {
898    for ( int i = 0; i < DstLnth; i += 1 ) {
899      if ( i == SrcLnth ) {                             // |Dst| > |Src|
900            for ( ; i < DstLnth; i += 1 ) {             // pad Dst with blanks
901                Dst[DstStart + i] = ' ';
902            } // for
903            break;
904        } // exit
905        Dst[DstStart + i] = Src[SrcStart + i];
906    } // for
907} // ByteCopy
908
909// Compare two byte strings in the byte-string area. The routine returns the following values:
910//
911// 1 => Src1-byte-string > Src2-byte-string
912// 0 => Src1-byte-string = Src2-byte-string
913// -1 => Src1-byte-string < Src2-byte-string
914
915int ByteCmp( char *Src1, int Src1Start, int Src1Lnth, char *Src2, int Src2Start, int Src2Lnth )  {
916#ifdef VbyteDebug
917    serr | "enter:ByteCmp, Src1Start:" | Src1Start | " Src1Lnth:" | Src1Lnth | " Src2Start:" | Src2Start | " Src2Lnth:" | Src2Lnth;
918#endif // VbyteDebug
919    int cmp;
920
921    CharZip: for ( int i = 0; ; i += 1 ) {
922        if ( i == Src2Lnth - 1 ) {
923            for ( ; ; i += 1 ) {
924                if ( i == Src1Lnth - 1 ) {
925                    cmp = 0;
926                    break CharZip;
927                } // exit
928                if ( Src1[Src1Start + i] != ' ') {
929                        // SUSPECTED BUG:  this could be be why Peter got the bug report about == " "  (why is this case here at all?)
930                    cmp = 1;
931                    break CharZip;
932                } // exit
933            } // for
934        } // exit
935        if ( i == Src1Lnth - 1 ) {
936            for ( ; ; i += 1 ) {
937                if ( i == Src2Lnth - 1 ) {
938                    cmp = 0;
939                    break CharZip;
940                } // exit
941                if ( Src2[Src2Start + i] != ' ') {
942                    cmp = -1;
943                    break CharZip;
944                } // exit
945            } // for
946        } // exit
947      if ( Src2[Src2Start + i] != Src1[Src1Start+ i]) {
948            cmp = Src1[Src1Start + i] > Src2[Src2Start + i] ? 1 : -1;
949            break CharZip;
950        } // exit
951    } // for
952#ifdef VbyteDebug
953    serr | "exit:ByteCmp, cmp:" | cmp;
954#endif // VbyteDebug
955    return cmp;
956} // ByteCmp
957
958
959// The compaction moves all of the byte strings currently in use to the beginning of the byte-string area and modifies
960// the handles to reflect the new positions of the byte strings. Compaction assumes that the handle list is in ascending
961// order by pointers into the byte-string area.  The strings associated with substrings do not have to be moved because
962// the containing string has been moved. Hence, they only require that their string pointers be adjusted.
963
964void compaction(VbyteHeap & this) with(this) {
965    HandleNode *h;
966    char *obase, *nbase, *limit;
967   
968    NoOfCompactions += 1;
969    EndVbyte = StartVbyte;
970    h = Header.flink;                                   // ignore header node
971    for (;;) {
972                ByteCopy( EndVbyte, 0, h->lnth, h->s, 0, h->lnth );
973                obase = h->s;
974                h->s = EndVbyte;
975                nbase = h->s;
976                EndVbyte += h->lnth;
977                limit = obase + h->lnth;
978                h = h->flink;
979               
980                // check if any substrings are allocated within a string
981               
982                for (;;) {
983                        if ( h == &Header ) break;                      // end of header list ?
984                        if ( h->s >= limit ) break;                     // outside of current string ?
985                        h->s = nbase + (( uintptr_t )h->s - ( uintptr_t )obase );
986                        h = h->flink;
987                } // for
988                if ( h == &Header ) break;                      // end of header list ?
989    } // for
990} // compaction
991
992
993// Garbage determines the amount of free space left in the heap and then reduces, leave the same, or extends the size of
994// the heap.  The heap is then compacted in the existing heap or into the newly allocated heap.
995
996void garbage(VbyteHeap & this, int minreq ) with(this) {
997#ifdef VbyteDebug
998    serr | "enter:garbage";
999    {
1000                serr | "HandleList:";
1001                for ( HandleNode *n = Header.flink; n != &Header; n = n->flink ) {
1002                        serr | nlOff;
1003                        serr | "\tnode:" | n | " lnth:" | n->lnth | " s:" | (void *)n->s | ",\"";
1004                        for ( int i = 0; i < n->lnth; i += 1 ) {
1005                                serr | n->s[i];
1006                        } // for
1007                        serr | nlOn;
1008                        serr | "\" flink:" | n->flink | " blink:" | n->blink;
1009                } // for
1010    }
1011#endif // VbyteDebug
1012    int AmountUsed, AmountFree;
1013
1014    AmountUsed = 0;
1015    for ( HandleNode *i = Header.flink; i != &Header; i = i->flink ) { // calculate amount of byte area used
1016                AmountUsed += i->lnth;
1017    } // for
1018    AmountFree = ( uintptr_t )ExtVbyte - ( uintptr_t )StartVbyte - AmountUsed;
1019   
1020    if ( ( double ) AmountFree < ( CurrSize * 0.1 ) || AmountFree < minreq ) {  // free space less than 10%  or not enough to serve cur request
1021
1022                extend( this, max( CurrSize, minreq ) );                                // extend the heap
1023
1024                        //  Peter says, "This needs work before it should be used."
1025                        //  } else if ( AmountFree > CurrSize / 2 ) {           // free space greater than 3 times the initial allocation ?
1026                        //              reduce(( AmountFree / CurrSize - 3 ) * CurrSize ); // reduce the memory
1027
1028    } // if
1029    compaction(this);                                   // compact the byte area, in the same or new heap area
1030#ifdef VbyteDebug
1031    {
1032                serr | "HandleList:";
1033                for ( HandleNode *n = Header.flink; n != &Header; n = n->flink ) {
1034                        serr | nlOff;
1035                        serr | "\tnode:" | n | " lnth:" | n->lnth | " s:" | (void *)n->s | ",\"";
1036                        for ( int i = 0; i < n->lnth; i += 1 ) {
1037                                serr | n->s[i];
1038                        } // for
1039                        serr | nlOn;
1040                        serr | "\" flink:" | n->flink | " blink:" | n->blink;
1041                } // for
1042    }
1043    serr | "exit:garbage";
1044#endif // VbyteDebug
1045} // garbage
1046
1047#undef VbyteDebug
1048
1049
1050
1051// Extend the size of the byte-string area by creating a new area and copying the old area into it. The old byte-string
1052// area is deleted.
1053
1054void extend( VbyteHeap & this, int size ) with (this) {
1055#ifdef VbyteDebug
1056    serr | "enter:extend, size:" | size;
1057#endif // VbyteDebug
1058    char *OldStartVbyte;
1059
1060    NoOfExtensions += 1;
1061    OldStartVbyte = StartVbyte;                         // save previous byte area
1062   
1063    CurrSize += size > InitSize ? size : InitSize;      // minimum extension, initial size
1064    StartVbyte = EndVbyte = alloc(CurrSize);
1065    ExtVbyte = (void *)( StartVbyte + CurrSize );
1066    compaction(this);                                   // copy from old heap to new & adjust pointers to new heap
1067    free( OldStartVbyte );                              // release old heap
1068#ifdef VbyteDebug
1069    serr | "exit:extend, CurrSize:" | CurrSize;
1070#endif // VbyteDebug
1071} // extend
1072
1073//WIP
1074#if 0
1075
1076// Extend the size of the byte-string area by creating a new area and copying the old area into it. The old byte-string
1077// area is deleted.
1078
1079void VbyteHeap::reduce( int size ) {
1080#ifdef VbyteDebug
1081    serr | "enter:reduce, size:" | size;
1082#endif // VbyteDebug
1083    char *OldStartVbyte;
1084
1085    NoOfReductions += 1;
1086    OldStartVbyte = StartVbyte;                         // save previous byte area
1087   
1088    CurrSize -= size;
1089    StartVbyte = EndVbyte = new char[CurrSize];
1090    ExtVbyte = (void *)( StartVbyte + CurrSize );
1091    compaction();                                       // copy from old heap to new & adjust pointers to new heap
1092    delete  OldStartVbyte;                              // release old heap
1093#ifdef VbyteDebug
1094    serr | "exit:reduce, CurrSize:" | CurrSize;
1095#endif // VbyteDebug
1096} // reduce
1097
1098
1099#endif
Note: See TracBrowser for help on using the repository browser.