source: libcfa/src/containers/string_res.cfa @ 6f7aff3

ADTast-experimentalenumpthread-emulationqualifiedEnum
Last change on this file since 6f7aff3 was 6f7aff3, checked in by Michael Brooks <mlbrooks@…>, 3 years ago

String hybrid assignment to unshared now optimizes to overwrite instead of copy.

  • Property mode set to 100644
File size: 37.5 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// string_res -- variable-length, mutable run of text, with resource semantics
8//
9// Author           : Michael L. Brooks
10// Created On       : Fri Sep 03 11:00:00 2021
11// Last Modified By : Michael L. Brooks
12// Last Modified On : Fri Sep 03 11:00:00 2021
13// Update Count     : 1
14//
15
16#include "string_res.hfa"
17#include "string_sharectx.hfa"
18
19#include <stdlib.hfa>  // e.g. malloc
20#include <string.h>    // e.g. strlen
21#include <assert.h>
22
23//######################### VbyteHeap "header" #########################
24
25#ifdef VbyteDebug
26HandleNode *HeaderPtr;
27#endif // VbyteDebug
28
29struct VbyteHeap {
30
31    int NoOfCompactions;                                // number of compactions of the byte area
32    int NoOfExtensions;                                 // number of extensions in the size of the byte area
33    int NoOfReductions;                                 // number of reductions in the size of the byte area
34   
35    int InitSize;                                       // initial number of bytes in the byte-string area
36    int CurrSize;                                       // current number of bytes in the byte-string area
37    char *StartVbyte;                                   // pointer to the `st byte of the start of the byte-string area
38    char *EndVbyte;                                     // pointer to the next byte after the end of the currently used portion of byte-string area
39    void *ExtVbyte;                                     // pointer to the next byte after the end of the byte-string area
40
41    HandleNode Header;                                  // header node for handle list
42}; // VbyteHeap
43
44   
45static inline void compaction( VbyteHeap & );                           // compaction of the byte area
46static inline void garbage( VbyteHeap &, int );                         // garbage collect the byte area
47static inline void extend( VbyteHeap &, int );                  // extend the size of the byte area
48static inline void reduce( VbyteHeap &, int );                  // reduce the size of the byte area
49
50static inline void ?{}( VbyteHeap &, size_t = 1000 );
51static inline void ^?{}( VbyteHeap & );
52static inline void ByteCopy( char *, int, int, char *, int, int ); // copy a block of bytes from one location in the heap to another
53static inline int ByteCmp( char *, int, int, char *, int, int );        // compare 2 blocks of bytes
54static inline char *VbyteAlloc( VbyteHeap &, int );                     // allocate a block bytes in the heap
55static inline char *VbyteTryAdjustLast( VbyteHeap &, int );
56
57static inline void AddThisAfter( HandleNode &, HandleNode & );
58static inline void DeleteNode( HandleNode & );
59static inline void MoveThisAfter( HandleNode &, const HandleNode & );           // move current handle after parameter handle
60
61
62// Allocate the storage for the variable sized area and intialize the heap variables.
63
64static inline void ?{}( VbyteHeap & this, size_t Size ) with(this) {
65#ifdef VbyteDebug
66    serr | "enter:VbyteHeap::VbyteHeap, this:" | &this | " Size:" | Size;
67#endif // VbyteDebug
68    NoOfCompactions = NoOfExtensions = NoOfReductions = 0;
69    InitSize = CurrSize = Size;
70    StartVbyte = EndVbyte = alloc(CurrSize);
71    ExtVbyte = (void *)( StartVbyte + CurrSize );
72    Header.flink = Header.blink = &Header;
73    Header.ulink = & this;
74#ifdef VbyteDebug
75    HeaderPtr = &Header;
76    serr | "exit:VbyteHeap::VbyteHeap, this:" | &this;
77#endif // VbyteDebug
78} // VbyteHeap
79
80
81// Release the dynamically allocated storage for the byte area.
82
83static inline void ^?{}( VbyteHeap & this ) with(this) {
84    free( StartVbyte );
85} // ~VbyteHeap
86
87
88//######################### HandleNode #########################
89
90
91// Create a handle node. The handle is not linked into the handle list.  This is the responsibilitiy of the handle
92// creator.
93
94static inline void ?{}( HandleNode & this ) with(this) {
95#ifdef VbyteDebug
96    serr | "enter:HandleNode::HandleNode, this:" | &this;
97#endif // VbyteDebug
98    s = 0;
99    lnth = 0;
100#ifdef VbyteDebug
101    serr | "exit:HandleNode::HandleNode, this:" | &this;
102#endif // VbyteDebug
103} // HandleNode
104
105// Create a handle node. The handle is linked into the handle list at the end. This means that this handle will NOT be
106// in order by string address, but this is not a problem because a string with length zero does nothing during garbage
107// collection.
108
109static inline void ?{}( HandleNode & this, VbyteHeap & vh ) with(this) {
110#ifdef VbyteDebug
111    serr | "enter:HandleNode::HandleNode, this:" | &this;
112#endif // VbyteDebug
113    s = 0;
114    lnth = 0;
115    ulink = &vh;
116    AddThisAfter( this, *vh.Header.blink );
117#ifdef VbyteDebug
118    serr | "exit:HandleNode::HandleNode, this:" | &this;
119#endif // VbyteDebug
120} // HandleNode
121
122
123// Delete a node from the handle list by unchaining it from the list. If the handle node was allocated dynamically, it
124// is the responsibility of the creator to destroy it.
125
126static inline void ^?{}( HandleNode & this ) with(this) {
127#ifdef VbyteDebug
128    serr | "enter:HandleNode::~HandleNode, this:" | & this;
129    {
130        serr | nlOff;
131        serr | " lnth:" | lnth | " s:" | (void *)s | ",\"";
132        for ( int i = 0; i < lnth; i += 1 ) {
133            serr | s[i];
134        } // for
135        serr | "\" flink:" | flink | " blink:" | blink | nl;
136        serr | nlOn;
137    }
138#endif // VbyteDebug
139    DeleteNode( this );
140} // ~HandleNode
141
142
143//######################### String Sharing Context #########################
144
145static string_sharectx * ambient_string_sharectx;               // fickle top of stack
146static string_sharectx default_string_sharectx = {NEW_SHARING}; // stable bottom of stack
147
148void ?{}( string_sharectx & this, StringSharectx_Mode mode ) with( this ) {
149    (older){ ambient_string_sharectx };
150    if ( mode == NEW_SHARING ) {
151        (activeHeap){ new( (size_t) 1000 ) };
152    } else {
153        verify( mode == NO_SHARING );
154        (activeHeap){ 0p };
155    }
156    ambient_string_sharectx = & this;
157}
158
159void ^?{}( string_sharectx & this ) with( this ) {
160    if ( activeHeap ) delete( activeHeap );
161
162    // unlink this from older-list starting from ambient_string_sharectx
163    // usually, this==ambient_string_sharectx and the loop runs zero times
164    string_sharectx *& c = ambient_string_sharectx;
165    while ( c != &this ) &c = &c->older;              // find this
166    c = this.older;                                   // unlink
167}
168
169//######################### String Resource #########################
170
171
172VbyteHeap * DEBUG_string_heap() {
173    assert( ambient_string_sharectx->activeHeap && "No sharing context is active" );
174    return ambient_string_sharectx->activeHeap;
175}
176
177size_t DEBUG_string_bytes_avail_until_gc( VbyteHeap * heap ) {
178    return ((char*)heap->ExtVbyte) - heap->EndVbyte;
179}
180
181size_t DEBUG_string_bytes_in_heap( VbyteHeap * heap ) {
182    return heap->CurrSize;
183}
184
185const char * DEBUG_string_heap_start( VbyteHeap * heap ) {
186    return heap->StartVbyte;
187}
188
189// Returns the size of the string in bytes
190size_t size(const string_res &s) with(s) {
191    return Handle.lnth;
192}
193
194// Output operator
195ofstream & ?|?(ofstream &out, const string_res &s) {
196    // Store auto-newline state so it can be restored
197    bool anl = getANL$(out);
198    nlOff(out);
199    for (size_t i = 0; i < s.Handle.lnth; i++) {
200        out | s[i];
201    }
202    out | sep;
203    // Re-apply newlines after done, for chaining version
204    if (anl) nlOn(out);
205    return out;
206}
207
208void ?|?(ofstream &out, const string_res &s) {
209    // Store auto-newline state so it can be restored
210    bool anl = getANL$(out);
211    nlOff(out);
212    for (size_t i = 0; i < s.Handle.lnth; i++) {
213        // Need to re-apply on the last output operator, for whole-statement version
214        if (anl && i == s.Handle.lnth-1) nlOn(out);
215        out | s[i];
216    }
217    return out;
218}
219
220// Empty constructor
221void ?{}(string_res &s) with(s) {
222    if( ambient_string_sharectx->activeHeap ) {
223        (Handle){ * ambient_string_sharectx->activeHeap };
224        (shareEditSet_owns_ulink){ false };
225    } else {
226        (Handle){ * new( (size_t) 10 ) };  // TODO: can I lazily avoid allocating for empty string
227        (shareEditSet_owns_ulink){ true };
228    }
229    s.shareEditSet_prev = &s;
230    s.shareEditSet_next = &s;
231}
232
233static inline void eagerCopyCtorHelper(string_res &s, const char* rhs, size_t rhslnth) with(s) {
234    if( ambient_string_sharectx->activeHeap ) {
235        (Handle){ * ambient_string_sharectx->activeHeap };
236        (shareEditSet_owns_ulink){ false };
237    } else {
238        (Handle){ * new( rhslnth ) };
239        (shareEditSet_owns_ulink){ true };
240    }
241    Handle.s = VbyteAlloc(*Handle.ulink, rhslnth);
242    Handle.lnth = rhslnth;
243    for ( int i = 0; i < rhslnth; i += 1 ) {            // copy characters
244        Handle.s[i] = rhs[i];
245    } // for
246    s.shareEditSet_prev = &s;
247    s.shareEditSet_next = &s;
248}
249
250// Constructor from a raw buffer and size
251void ?{}(string_res &s, const char* rhs, size_t rhslnth) with(s) {
252    eagerCopyCtorHelper(s, rhs, rhslnth);
253}
254
255// String literal constructor
256void ?{}(string_res &s, const char* rhs) {
257    (s){ rhs, strlen(rhs) };
258}
259
260// General copy constructor
261void ?{}(string_res &s, const string_res & s2, StrResInitMode mode, size_t start, size_t end ) {
262
263    verify( start <= end && end <= s2.Handle.lnth );
264
265    if (s2.Handle.ulink == ambient_string_sharectx->activeHeap) {
266        // same heap: allow overlap
267        (s.Handle){};
268        s.Handle.s = s2.Handle.s + start;
269        s.Handle.lnth = end - start;
270        s.Handle.ulink = ambient_string_sharectx->activeHeap;
271        (s.shareEditSet_owns_ulink){ false };
272        AddThisAfter(s.Handle, s2.Handle );                     // insert this handle after rhs handle
273        // ^ bug?  skip others at early point in string
274   
275        if (mode == COPY_VALUE) {
276            // make s alone in its shareEditSet
277            s.shareEditSet_prev = &s;
278            s.shareEditSet_next = &s;
279        } else {
280            verify( mode == SHARE_EDITS );
281
282            // s2 is logically const but not implementation const
283            string_res & s2mod = (string_res &) s2;
284
285            // insert s after s2 on shareEditSet
286            s.shareEditSet_next = s2mod.shareEditSet_next;
287            s.shareEditSet_prev = &s2mod;
288            s.shareEditSet_next->shareEditSet_prev = &s;
289            s.shareEditSet_prev->shareEditSet_next = &s;
290        }
291    } else {
292        // crossing heaps: eager copy
293        assert( mode == COPY_VALUE && "need to solidify context-crossing rules for requesting shared edits");
294        eagerCopyCtorHelper(s, s2.Handle.s + start, end - start);
295        verify(s.shareEditSet_prev == &s);
296        verify(s.shareEditSet_next == &s);
297    }
298}
299
300static void assignEditSet(string_res & this, string_res * shareEditSetStartPeer, string_res * shareEditSetEndPeer,
301    char * resultSesStart,
302    size_t resultSesLnth,
303    HandleNode * resultPadPosition, size_t bsize ) {
304
305    char * beforeBegin = shareEditSetStartPeer->Handle.s;
306    size_t beforeLen = this.Handle.s - beforeBegin;
307
308    char * afterBegin = this.Handle.s + this.Handle.lnth;
309    size_t afterLen = shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - afterBegin;
310
311    size_t oldLnth = this.Handle.lnth;
312
313    this.Handle.s = resultSesStart + beforeLen;
314    this.Handle.lnth = bsize;
315    if (resultPadPosition)
316        MoveThisAfter( this.Handle, *resultPadPosition );
317
318    // adjust all substring string and handle locations, and check if any substring strings are outside the new base string
319    char *limit = resultSesStart + resultSesLnth;
320    for (string_res * p = this.shareEditSet_next; p != &this; p = p->shareEditSet_next) {
321        verify (p->Handle.s >= beforeBegin);
322        if ( p->Handle.s >= afterBegin ) {
323            verify ( p->Handle.s <= afterBegin + afterLen );
324            verify ( p->Handle.s + p->Handle.lnth <= afterBegin + afterLen );
325            // p starts after the edit
326            // take start and end as end-anchored
327            size_t startOffsetFromEnd = afterBegin + afterLen - p->Handle.s;
328            p->Handle.s = limit - startOffsetFromEnd;
329            // p->Handle.lnth unaffected
330        } else if ( p->Handle.s <= beforeBegin + beforeLen ) {
331            // p starts before, or at the start of, the edit
332            if ( p->Handle.s + p->Handle.lnth <= beforeBegin + beforeLen ) {
333                // p ends before the edit
334                // take end as start-anchored too
335                // p->Handle.lnth unaffected
336            } else if ( p->Handle.s + p->Handle.lnth < afterBegin ) {
337                // p ends during the edit; p does not include the last character replaced
338                // clip end of p to end at start of edit
339                p->Handle.lnth = beforeLen - ( p->Handle.s - beforeBegin );
340            } else {
341                // p ends after the edit
342                verify ( p->Handle.s + p->Handle.lnth <= afterBegin + afterLen );
343                // take end as end-anchored
344                // stretch-shrink p according to the edit
345                p->Handle.lnth += this.Handle.lnth;
346                p->Handle.lnth -= oldLnth;
347            }
348            // take start as start-anchored
349            size_t startOffsetFromStart = p->Handle.s - beforeBegin;
350            p->Handle.s = resultSesStart + startOffsetFromStart;
351        } else {
352            verify ( p->Handle.s < afterBegin );
353            // p starts during the edit
354            verify( p->Handle.s + p->Handle.lnth >= beforeBegin + beforeLen );
355            if ( p->Handle.s + p->Handle.lnth < afterBegin ) {
356                // p ends during the edit; p does not include the last character replaced
357                // set p to empty string at start of edit
358                p->Handle.s = this.Handle.s;
359                p->Handle.lnth = 0;
360            } else {
361                // p includes the end of the edit
362                // clip start of p to start at end of edit
363                int charsToClip = afterBegin - p->Handle.s;
364                p->Handle.s = this.Handle.s + this.Handle.lnth;
365                p->Handle.lnth -= charsToClip;
366            }
367        }
368        if (resultPadPosition)
369            MoveThisAfter( p->Handle, *resultPadPosition );     // move substring handle to maintain sorted order by string position
370    }
371}
372
373static void assign_(string_res &this, const char* buffer, size_t bsize, const string_res & valSrc) {
374
375    // traverse the incumbent share-edit set (SES) to recover the range of a base string to which `this` belongs
376    string_res * shareEditSetStartPeer = & this;
377    string_res * shareEditSetEndPeer = & this;
378    for (string_res * editPeer = this.shareEditSet_next; editPeer != &this; editPeer = editPeer->shareEditSet_next) {
379        if ( editPeer->Handle.s < shareEditSetStartPeer->Handle.s ) {
380            shareEditSetStartPeer = editPeer;
381        }
382        if ( shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth < editPeer->Handle.s + editPeer->Handle.lnth) {
383            shareEditSetEndPeer = editPeer;
384        }
385    }
386
387    verify( shareEditSetEndPeer->Handle.s >= shareEditSetStartPeer->Handle.s );
388    size_t editSetLength = shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - shareEditSetStartPeer->Handle.s;
389    verify( editSetLength >= this.Handle.lnth );
390
391    if ( this.shareEditSet_owns_ulink ) {                 // assigning to private context
392        // ok to overwrite old value within LHS
393        char * prefixStartOrig = shareEditSetStartPeer->Handle.s;
394        int prefixLen = this.Handle.s - prefixStartOrig;
395        char * suffixStartOrig = this.Handle.s + this.Handle.lnth;
396        int suffixLen = shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - suffixStartOrig;
397
398        int delta = bsize - this.Handle.lnth;
399        if ( char * oldBytes = VbyteTryAdjustLast( *this.Handle.ulink, delta ) ) {
400            // growing: copy from old to new
401            char * dest = VbyteAlloc( *this.Handle.ulink, editSetLength );
402            char *destCursor = dest;        memcpy(destCursor, prefixStartOrig, prefixLen);
403            destCursor += prefixLen;        memcpy(destCursor, buffer         , bsize    );
404            destCursor += this.Handle.lnth; memcpy(destCursor, suffixStartOrig, suffixLen);
405            assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer,
406                dest,
407                editSetLength,
408                0p, bsize);
409            free( oldBytes );
410        } else {
411            // room is already allocated in-place: bubble suffix and overwite middle
412            memmove( suffixStartOrig + delta, suffixStartOrig, suffixLen );
413            memcpy( this.Handle.s, buffer, bsize );
414
415            assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer,
416                shareEditSetStartPeer->Handle.s,
417                editSetLength,
418                0p, bsize);
419        }
420
421    } else if (                                           // assigning to shared context
422        this.Handle.lnth == editSetLength &&              // overwriting entire run of SES
423        & valSrc &&                                       // sourcing from a managed string
424        valSrc.Handle.ulink == this.Handle.ulink  ) {     // sourcing from same heap
425
426        // SES's result will only use characters from the source string => reuse source
427        assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer,
428            valSrc.Handle.s,
429            valSrc.Handle.lnth,
430            &((string_res&)valSrc).Handle, bsize);
431       
432    } else {
433        // overwriting a proper substring of some string: mash characters from old and new together (copy on write)
434        // OR we are importing characters: need to copy eagerly (can't refer to source)
435
436        // full string is from start of shareEditSetStartPeer thru end of shareEditSetEndPeer
437        // `this` occurs in the middle of it, to be replaced
438        // build up the new text in `pasting`
439
440        // super private string ctor: ignore ambient context
441        void ?{}( string_res &s, VbyteHeap & heap, const char* rhs, size_t rhslnth ) with(s) {
442            (Handle){ heap };
443            Handle.s = VbyteAlloc(*Handle.ulink, rhslnth);
444            Handle.lnth = rhslnth;
445            (s.shareEditSet_owns_ulink){ false };
446            for ( int i = 0; i < rhslnth; i += 1 ) {            // copy characters
447                Handle.s[i] = rhs[i];
448            } // for
449            s.shareEditSet_prev = &s;
450            s.shareEditSet_next = &s;
451        }
452
453        string_res pasting = {
454            * this.Handle.ulink,                               // maintain same heap, regardless of context
455            shareEditSetStartPeer->Handle.s,                   // start of SES
456            this.Handle.s - shareEditSetStartPeer->Handle.s }; // length of SES, before this
457        append( pasting,
458            buffer,                                            // start of replacement for this
459            bsize );                                           // length of replacement for this
460        append( pasting,
461            this.Handle.s + this.Handle.lnth,                  // start of SES after this
462            shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth -
463            (this.Handle.s + this.Handle.lnth) );              // length of SES, after this
464
465        // The above string building can trigger compaction.
466        // The reference points (that are arguments of the string building) may move during that building.
467        // From this point on, they are stable.
468
469        assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer,
470            pasting.Handle.s,
471            pasting.Handle.lnth,
472            &pasting.Handle, bsize);
473    }
474}
475
476void assign(string_res &this, const char* buffer, size_t bsize) {
477    assign_(this, buffer, bsize, *0p);
478}
479
480void ?=?(string_res &s, const char* other) {
481    assign(s, other, strlen(other));
482}
483
484void ?=?(string_res &s, char other) {
485    assign(s, &other, 1);
486}
487
488// Copy assignment operator
489void ?=?(string_res & this, const string_res & rhs) with( this ) {
490    assign_(this, rhs.Handle.s, rhs.Handle.lnth, rhs);
491}
492
493void ?=?(string_res & this, string_res & rhs) with( this ) {
494    const string_res & rhs2 = rhs;
495    this = rhs2;
496}
497
498
499// Destructor
500void ^?{}(string_res &s) with(s) {
501    // much delegated to implied ^VbyteSM
502
503    // sever s from its share-edit peers, if any (four no-ops when already solo)
504    s.shareEditSet_prev->shareEditSet_next = s.shareEditSet_next;
505    s.shareEditSet_next->shareEditSet_prev = s.shareEditSet_prev;
506    // s.shareEditSet_next = &s;
507    // s.shareEditSet_prev = &s;
508
509    if (shareEditSet_owns_ulink && s.shareEditSet_next == &s) { // last one out
510        delete( s.Handle.ulink );
511    }
512}
513
514
515// Returns the character at the given index
516// With unicode support, this may be different from just the byte at the given
517// offset from the start of the string.
518char ?[?](const string_res &s, size_t index) with(s) {
519    //TODO: Check if index is valid (no exceptions yet)
520    return Handle.s[index];
521}
522
523void assignAt(const string_res &s, size_t index, char val) {
524    string_res editZone = { s, SHARE_EDITS, index, index+1 };
525    assign(editZone, &val, 1);
526}
527
528
529///////////////////////////////////////////////////////////////////
530// Concatenation
531
532void append(string_res &str1, const char * buffer, size_t bsize) {
533    size_t clnth = size(str1) + bsize;
534    if ( str1.Handle.s + size(str1) == buffer ) { // already juxtapose ?
535        // no-op
536    } else {                                            // must copy some text
537        if ( str1.Handle.s + size(str1) == VbyteAlloc(*str1.Handle.ulink, 0) ) { // str1 at end of string area ?
538            VbyteAlloc( *str1.Handle.ulink, bsize ); // create room for 2nd part at the end of string area
539        } else {                                        // copy the two parts
540            char * str1oldBuf = str1.Handle.s;
541            str1.Handle.s = VbyteAlloc( *str1.Handle.ulink, clnth );
542            ByteCopy( str1.Handle.s, 0, str1.Handle.lnth, str1oldBuf, 0, str1.Handle.lnth);
543        } // if
544        ByteCopy( str1.Handle.s, str1.Handle.lnth, bsize, (char*)buffer, 0, (int)bsize);
545        //       VbyteHeap & this, char *Dst, int DstStart, int DstLnth, char *Src, int SrcStart, int SrcLnth
546    } // if
547    str1.Handle.lnth = clnth;
548}
549
550void ?+=?(string_res &str1, const string_res &str2) {
551    append( str1, str2.Handle.s, str2.Handle.lnth );
552}
553
554void ?+=?(string_res &s, char other) {
555    append( s, &other, 1 );
556}
557
558void ?+=?(string_res &s, const char* other) {
559    append( s, other, strlen(other) );
560}
561
562
563
564
565//////////////////////////////////////////////////////////
566// Comparisons
567
568
569bool ?==?(const string_res &s1, const string_res &s2) {
570    return ByteCmp( s1.Handle.s, 0, s1.Handle.lnth, s2.Handle.s, 0, s2.Handle.lnth) == 0;
571}
572
573bool ?!=?(const string_res &s1, const string_res &s2) {
574    return !(s1 == s2);
575}
576bool ?==?(const string_res &s, const char* other) {
577    string_res sother = other;
578    return s == sother;
579}
580bool ?!=?(const string_res &s, const char* other) {
581    return !(s == other);
582}
583
584
585//////////////////////////////////////////////////////////
586// Search
587
588bool contains(const string_res &s, char ch) {
589    for (i; size(s)) {
590        if (s[i] == ch) return true;
591    }
592    return false;
593}
594
595int find(const string_res &s, char search) {
596    for (i; size(s)) {
597        if (s[i] == search) return i;
598    }
599    return size(s);
600}
601
602    /* Remaining implementations essentially ported from Sunjay's work */
603
604int find(const string_res &s, const string_res &search) {
605    return find(s, search.Handle.s, search.Handle.lnth);
606}
607
608int find(const string_res &s, const char* search) {
609    return find(s, search, strlen(search));
610}
611
612int find(const string_res &s, const char* search, size_t searchsize) {
613    // FIXME: This is a naive algorithm. We probably want to switch to someting
614    // like Boyer-Moore in the future.
615    // https://en.wikipedia.org/wiki/String_searching_algorithm
616
617    // Always find the empty string
618    if (searchsize == 0) {
619        return 0;
620    }
621
622    for (size_t i = 0; i < s.Handle.lnth; i++) {
623        size_t remaining = s.Handle.lnth - i;
624        // Never going to find the search string if the remaining string is
625        // smaller than search
626        if (remaining < searchsize) {
627            break;
628        }
629
630        bool matched = true;
631        for (size_t j = 0; j < searchsize; j++) {
632            if (search[j] != s.Handle.s[i + j]) {
633                matched = false;
634                break;
635            }
636        }
637        if (matched) {
638            return i;
639        }
640    }
641
642    return s.Handle.lnth;
643}
644
645bool includes(const string_res &s, const string_res &search) {
646    return includes(s, search.Handle.s, search.Handle.lnth);
647}
648
649bool includes(const string_res &s, const char* search) {
650    return includes(s, search, strlen(search));
651}
652
653bool includes(const string_res &s, const char* search, size_t searchsize) {
654    return find(s, search, searchsize) < s.Handle.lnth;
655}
656
657bool startsWith(const string_res &s, const string_res &prefix) {
658    return startsWith(s, prefix.Handle.s, prefix.Handle.lnth);
659}
660
661bool startsWith(const string_res &s, const char* prefix) {
662    return startsWith(s, prefix, strlen(prefix));
663}
664
665bool startsWith(const string_res &s, const char* prefix, size_t prefixsize) {
666    if (s.Handle.lnth < prefixsize) {
667        return false;
668    }
669    return memcmp(s.Handle.s, prefix, prefixsize) == 0;
670}
671
672bool endsWith(const string_res &s, const string_res &suffix) {
673    return endsWith(s, suffix.Handle.s, suffix.Handle.lnth);
674}
675
676bool endsWith(const string_res &s, const char* suffix) {
677    return endsWith(s, suffix, strlen(suffix));
678}
679
680bool endsWith(const string_res &s, const char* suffix, size_t suffixsize) {
681    if (s.Handle.lnth < suffixsize) {
682        return false;
683    }
684    // Amount to offset the bytes pointer so that we are comparing the end of s
685    // to suffix. s.bytes + offset should be the first byte to compare against suffix
686    size_t offset = s.Handle.lnth - suffixsize;
687    return memcmp(s.Handle.s + offset, suffix, suffixsize) == 0;
688}
689
690    /* Back to Mike's work */
691
692
693///////////////////////////////////////////////////////////////////////////
694// charclass, include, exclude
695
696void ?{}( charclass_res & this, const string_res & chars) {
697    (this){ chars.Handle.s, chars.Handle.lnth };
698}
699
700void ?{}( charclass_res & this, const char * chars ) {
701    (this){ chars, strlen(chars) };
702}
703
704void ?{}( charclass_res & this, const char * chars, size_t charssize ) {
705    (this.chars){ chars, charssize };
706    // now sort it ?
707}
708
709void ^?{}( charclass_res & this ) {
710    ^(this.chars){};
711}
712
713static bool test( const charclass_res & mask, char c ) {
714    // instead, use sorted char list?
715    return contains( mask.chars, c );
716}
717
718int exclude(const string_res &s, const charclass_res &mask) {
719    for (int i = 0; i < size(s); i++) {
720        if ( test(mask, s[i]) ) return i;
721    }
722    return size(s);
723}
724
725int include(const string_res &s, const charclass_res &mask) {
726    for (int i = 0; i < size(s); i++) {
727        if ( ! test(mask, s[i]) ) return i;
728    }
729    return size(s);
730}
731
732//######################### VbyteHeap "implementation" #########################
733
734
735// Add a new HandleNode node n after the current HandleNode node.
736
737static inline void AddThisAfter( HandleNode & this, HandleNode & n ) with(this) {
738#ifdef VbyteDebug
739    serr | "enter:AddThisAfter, this:" | &this | " n:" | &n;
740#endif // VbyteDebug
741    verify( n.ulink != 0p );
742    verify( this.ulink == n.ulink );
743    flink = n.flink;
744    blink = &n;
745    n.flink->blink = &this;
746    n.flink = &this;
747#ifdef VbyteDebug
748    {
749                serr | "HandleList:";
750                serr | nlOff;
751                for ( HandleNode *ni = HeaderPtr->flink; ni != HeaderPtr; ni = ni->flink ) {
752                        serr | "\tnode:" | ni | " lnth:" | ni->lnth | " s:" | (void *)ni->s | ",\"";
753                        for ( int i = 0; i < ni->lnth; i += 1 ) {
754                                serr | ni->s[i];
755                        } // for
756                        serr | "\" flink:" | ni->flink | " blink:" | ni->blink | nl;
757                } // for
758                serr | nlOn;
759    }
760    serr | "exit:AddThisAfter";
761#endif // VbyteDebug
762} // AddThisAfter
763
764
765// Delete the current HandleNode node.
766
767static inline void DeleteNode( HandleNode & this ) with(this) {
768#ifdef VbyteDebug
769    serr | "enter:DeleteNode, this:" | &this;
770#endif // VbyteDebug
771    flink->blink = blink;
772    blink->flink = flink;
773#ifdef VbyteDebug
774    serr | "exit:DeleteNode";
775#endif // VbyteDebug
776} //  DeleteNode
777
778
779
780// Allocates specified storage for a string from byte-string area. If not enough space remains to perform the
781// allocation, the garbage collection routine is called.
782
783static inline char * VbyteAlloc( VbyteHeap & this, int size ) with(this) {
784#ifdef VbyteDebug
785    serr | "enter:VbyteAlloc, size:" | size;
786#endif // VbyteDebug
787    uintptr_t NoBytes;
788    char *r;
789
790    NoBytes = ( uintptr_t )EndVbyte + size;
791    if ( NoBytes > ( uintptr_t )ExtVbyte ) {            // enough room for new byte-string ?
792                garbage( this, size );                                  // firer up the garbage collector
793                NoBytes = ( uintptr_t )EndVbyte + size;         // try again
794                if ( NoBytes > ( uintptr_t )ExtVbyte ) {        // enough room for new byte-string ?
795            assert( 0 && "garbage run did not free up required space" );
796                } // if
797    } // if
798    r = EndVbyte;
799    EndVbyte += size;
800#ifdef VbyteDebug
801    serr | "exit:VbyteAlloc, r:" | (void *)r | " EndVbyte:" | (void *)EndVbyte | " ExtVbyte:" | ExtVbyte;
802#endif // VbyteDebug
803    return r;
804} // VbyteAlloc
805
806
807// Adjusts the last allocation in this heap by delta bytes, or resets this heap to be able to offer
808// new allocations of its original size + delta bytes. Positive delta means bigger;
809// negative means smaller.  A null return indicates that the original heap location has room for
810// the requested growth.  A non-null return indicates that copying to a new location is required
811// but has not been done; the returned value is the old heap storage location; `this` heap is
812// modified to reference the new location.  In the copy-requred case, the caller should use
813// VbyteAlloc to claim the new space, while doing optimal copying from old to new, then free old.
814
815static inline char * VbyteTryAdjustLast( VbyteHeap & this, int delta ) with(this) {
816
817    if ( ( uintptr_t )EndVbyte + delta <= ( uintptr_t )ExtVbyte ) {
818        // room available
819        EndVbyte += delta;
820        return 0p;
821    }
822
823    char *oldBytes = StartVbyte;
824
825    NoOfExtensions += 1;
826    CurrSize *= 2;
827    StartVbyte = EndVbyte = alloc(CurrSize);
828    ExtVbyte = StartVbyte + CurrSize;
829
830    return oldBytes;
831}
832
833
834// Move an existing HandleNode node h somewhere after the current HandleNode node so that it is in ascending order by
835// the address in the byte string area.
836
837static inline void MoveThisAfter( HandleNode & this, const HandleNode  & h ) with(this) {
838#ifdef VbyteDebug
839    serr | "enter:MoveThisAfter, this:" | & this | " h:" | & h;
840#endif // VbyteDebug
841    verify( h.ulink != 0p );
842    verify( this.ulink == h.ulink );
843    if ( s < h.s ) {                                    // check argument values
844                // serr | "VbyteSM: Error - Cannot move byte string starting at:" | s | " after byte string starting at:"
845                //      | ( h->s ) | " and keep handles in ascending order";
846                // exit(-1 );
847                verify( 0 && "VbyteSM: Error - Cannot move byte strings as requested and keep handles in ascending order");
848    } // if
849
850    HandleNode *i;
851    for ( i = h.flink; i->s != 0 && s > ( i->s ); i = i->flink ); // find the position for this node after h
852    if ( & this != i->blink ) {
853                DeleteNode( this );
854                AddThisAfter( this, *i->blink );
855    } // if
856#ifdef VbyteDebug
857    {
858        serr | "HandleList:";
859        serr | nlOff;
860        for ( HandleNode *n = HeaderPtr->flink; n != HeaderPtr; n = n->flink ) {
861            serr | "\tnode:" | n | " lnth:" | n->lnth | " s:" | (void *)n->s | ",\"";
862            for ( int i = 0; i < n->lnth; i += 1 ) {
863                serr | n->s[i];
864            } // for
865            serr | "\" flink:" | n->flink | " blink:" | n->blink | nl;
866        } // for
867        serr | nlOn;
868    }
869    serr | "exit:MoveThisAfter";
870#endif // VbyteDebug
871} // MoveThisAfter
872
873
874
875
876
877//######################### VbyteHeap #########################
878
879// Move characters from one location in the byte-string area to another. The routine handles the following situations:
880//
881// if the |Src| > |Dst| => truncate
882// if the |Dst| > |Src| => pad Dst with blanks
883
884void ByteCopy( char *Dst, int DstStart, int DstLnth, char *Src, int SrcStart, int SrcLnth ) {
885    for ( int i = 0; i < DstLnth; i += 1 ) {
886      if ( i == SrcLnth ) {                             // |Dst| > |Src|
887            for ( ; i < DstLnth; i += 1 ) {             // pad Dst with blanks
888                Dst[DstStart + i] = ' ';
889            } // for
890            break;
891        } // exit
892        Dst[DstStart + i] = Src[SrcStart + i];
893    } // for
894} // ByteCopy
895
896// Compare two byte strings in the byte-string area. The routine returns the following values:
897//
898// 1 => Src1-byte-string > Src2-byte-string
899// 0 => Src1-byte-string = Src2-byte-string
900// -1 => Src1-byte-string < Src2-byte-string
901
902int ByteCmp( char *Src1, int Src1Start, int Src1Lnth, char *Src2, int Src2Start, int Src2Lnth )  {
903#ifdef VbyteDebug
904    serr | "enter:ByteCmp, Src1Start:" | Src1Start | " Src1Lnth:" | Src1Lnth | " Src2Start:" | Src2Start | " Src2Lnth:" | Src2Lnth;
905#endif // VbyteDebug
906    int cmp;
907
908    CharZip: for ( int i = 0; ; i += 1 ) {
909        if ( i == Src2Lnth - 1 ) {
910            for ( ; ; i += 1 ) {
911                if ( i == Src1Lnth - 1 ) {
912                    cmp = 0;
913                    break CharZip;
914                } // exit
915                if ( Src1[Src1Start + i] != ' ') {
916                        // SUSPECTED BUG:  this could be be why Peter got the bug report about == " "  (why is this case here at all?)
917                    cmp = 1;
918                    break CharZip;
919                } // exit
920            } // for
921        } // exit
922        if ( i == Src1Lnth - 1 ) {
923            for ( ; ; i += 1 ) {
924                if ( i == Src2Lnth - 1 ) {
925                    cmp = 0;
926                    break CharZip;
927                } // exit
928                if ( Src2[Src2Start + i] != ' ') {
929                    cmp = -1;
930                    break CharZip;
931                } // exit
932            } // for
933        } // exit
934      if ( Src2[Src2Start + i] != Src1[Src1Start+ i]) {
935            cmp = Src1[Src1Start + i] > Src2[Src2Start + i] ? 1 : -1;
936            break CharZip;
937        } // exit
938    } // for
939#ifdef VbyteDebug
940    serr | "exit:ByteCmp, cmp:" | cmp;
941#endif // VbyteDebug
942    return cmp;
943} // ByteCmp
944
945
946// The compaction moves all of the byte strings currently in use to the beginning of the byte-string area and modifies
947// the handles to reflect the new positions of the byte strings. Compaction assumes that the handle list is in ascending
948// order by pointers into the byte-string area.  The strings associated with substrings do not have to be moved because
949// the containing string has been moved. Hence, they only require that their string pointers be adjusted.
950
951void compaction(VbyteHeap & this) with(this) {
952    HandleNode *h;
953    char *obase, *nbase, *limit;
954   
955    NoOfCompactions += 1;
956    EndVbyte = StartVbyte;
957    h = Header.flink;                                   // ignore header node
958    for (;;) {
959                ByteCopy( EndVbyte, 0, h->lnth, h->s, 0, h->lnth );
960                obase = h->s;
961                h->s = EndVbyte;
962                nbase = h->s;
963                EndVbyte += h->lnth;
964                limit = obase + h->lnth;
965                h = h->flink;
966               
967                // check if any substrings are allocated within a string
968               
969                for (;;) {
970                        if ( h == &Header ) break;                      // end of header list ?
971                        if ( h->s >= limit ) break;                     // outside of current string ?
972                        h->s = nbase + (( uintptr_t )h->s - ( uintptr_t )obase );
973                        h = h->flink;
974                } // for
975                if ( h == &Header ) break;                      // end of header list ?
976    } // for
977} // compaction
978
979
980// Garbage determines the amount of free space left in the heap and then reduces, leave the same, or extends the size of
981// the heap.  The heap is then compacted in the existing heap or into the newly allocated heap.
982
983void garbage(VbyteHeap & this, int minreq ) with(this) {
984#ifdef VbyteDebug
985    serr | "enter:garbage";
986    {
987                serr | "HandleList:";
988                for ( HandleNode *n = Header.flink; n != &Header; n = n->flink ) {
989                        serr | nlOff;
990                        serr | "\tnode:" | n | " lnth:" | n->lnth | " s:" | (void *)n->s | ",\"";
991                        for ( int i = 0; i < n->lnth; i += 1 ) {
992                                serr | n->s[i];
993                        } // for
994                        serr | nlOn;
995                        serr | "\" flink:" | n->flink | " blink:" | n->blink;
996                } // for
997    }
998#endif // VbyteDebug
999    int AmountUsed, AmountFree;
1000
1001    AmountUsed = 0;
1002    for ( HandleNode *i = Header.flink; i != &Header; i = i->flink ) { // calculate amount of byte area used
1003                AmountUsed += i->lnth;
1004    } // for
1005    AmountFree = ( uintptr_t )ExtVbyte - ( uintptr_t )StartVbyte - AmountUsed;
1006   
1007    if ( ( double ) AmountFree < ( CurrSize * 0.1 ) || AmountFree < minreq ) {  // free space less than 10%  or not enough to serve cur request
1008
1009                extend( this, max( CurrSize, minreq ) );                                // extend the heap
1010
1011                        //  Peter says, "This needs work before it should be used."
1012                        //  } else if ( AmountFree > CurrSize / 2 ) {           // free space greater than 3 times the initial allocation ?
1013                        //              reduce(( AmountFree / CurrSize - 3 ) * CurrSize ); // reduce the memory
1014
1015    } // if
1016    compaction(this);                                   // compact the byte area, in the same or new heap area
1017#ifdef VbyteDebug
1018    {
1019                serr | "HandleList:";
1020                for ( HandleNode *n = Header.flink; n != &Header; n = n->flink ) {
1021                        serr | nlOff;
1022                        serr | "\tnode:" | n | " lnth:" | n->lnth | " s:" | (void *)n->s | ",\"";
1023                        for ( int i = 0; i < n->lnth; i += 1 ) {
1024                                serr | n->s[i];
1025                        } // for
1026                        serr | nlOn;
1027                        serr | "\" flink:" | n->flink | " blink:" | n->blink;
1028                } // for
1029    }
1030    serr | "exit:garbage";
1031#endif // VbyteDebug
1032} // garbage
1033
1034#undef VbyteDebug
1035
1036
1037
1038// Extend the size of the byte-string area by creating a new area and copying the old area into it. The old byte-string
1039// area is deleted.
1040
1041void extend( VbyteHeap & this, int size ) with (this) {
1042#ifdef VbyteDebug
1043    serr | "enter:extend, size:" | size;
1044#endif // VbyteDebug
1045    char *OldStartVbyte;
1046
1047    NoOfExtensions += 1;
1048    OldStartVbyte = StartVbyte;                         // save previous byte area
1049   
1050    CurrSize += size > InitSize ? size : InitSize;      // minimum extension, initial size
1051    StartVbyte = EndVbyte = alloc(CurrSize);
1052    ExtVbyte = (void *)( StartVbyte + CurrSize );
1053    compaction(this);                                   // copy from old heap to new & adjust pointers to new heap
1054    free( OldStartVbyte );                              // release old heap
1055#ifdef VbyteDebug
1056    serr | "exit:extend, CurrSize:" | CurrSize;
1057#endif // VbyteDebug
1058} // extend
1059
1060//WIP
1061#if 0
1062
1063// Extend the size of the byte-string area by creating a new area and copying the old area into it. The old byte-string
1064// area is deleted.
1065
1066void VbyteHeap::reduce( int size ) {
1067#ifdef VbyteDebug
1068    serr | "enter:reduce, size:" | size;
1069#endif // VbyteDebug
1070    char *OldStartVbyte;
1071
1072    NoOfReductions += 1;
1073    OldStartVbyte = StartVbyte;                         // save previous byte area
1074   
1075    CurrSize -= size;
1076    StartVbyte = EndVbyte = new char[CurrSize];
1077    ExtVbyte = (void *)( StartVbyte + CurrSize );
1078    compaction();                                       // copy from old heap to new & adjust pointers to new heap
1079    delete  OldStartVbyte;                              // release old heap
1080#ifdef VbyteDebug
1081    serr | "exit:reduce, CurrSize:" | CurrSize;
1082#endif // VbyteDebug
1083} // reduce
1084
1085
1086#endif
Note: See TracBrowser for help on using the repository browser.