source: libcfa/src/containers/string_res.cfa @ 804bf677

ADTast-experimentalenumpthread-emulationqualifiedEnum
Last change on this file since 804bf677 was 804bf677, checked in by Michael Brooks <mlbrooks@…>, 3 years ago

String hybrid: Basic cases of solo alloc now working

  • Property mode set to 100644
File size: 35.0 KB
Line 
1//
2// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// string_res -- variable-length, mutable run of text, with resource semantics
8//
9// Author           : Michael L. Brooks
10// Created On       : Fri Sep 03 11:00:00 2021
11// Last Modified By : Michael L. Brooks
12// Last Modified On : Fri Sep 03 11:00:00 2021
13// Update Count     : 1
14//
15
16#include "string_res.hfa"
17#include "string_sharectx.hfa"
18
19#include <stdlib.hfa>  // e.g. malloc
20#include <string.h>    // e.g. strlen
21#include <assert.h>
22
23//######################### VbyteHeap "header" #########################
24
25#ifdef VbyteDebug
26HandleNode *HeaderPtr;
27#endif // VbyteDebug
28
29struct VbyteHeap {
30
31    int NoOfCompactions;                                // number of compactions of the byte area
32    int NoOfExtensions;                                 // number of extensions in the size of the byte area
33    int NoOfReductions;                                 // number of reductions in the size of the byte area
34   
35    int InitSize;                                       // initial number of bytes in the byte-string area
36    int CurrSize;                                       // current number of bytes in the byte-string area
37    char *StartVbyte;                                   // pointer to the `st byte of the start of the byte-string area
38    char *EndVbyte;                                     // pointer to the next byte after the end of the currently used portion of byte-string area
39    void *ExtVbyte;                                     // pointer to the next byte after the end of the byte-string area
40
41    HandleNode Header;                                  // header node for handle list
42}; // VbyteHeap
43
44   
45static inline void compaction( VbyteHeap & );                           // compaction of the byte area
46static inline void garbage( VbyteHeap & );                              // garbage collect the byte area
47static inline void extend( VbyteHeap &, int );                  // extend the size of the byte area
48static inline void reduce( VbyteHeap &, int );                  // reduce the size of the byte area
49
50static inline void ?{}( VbyteHeap &, size_t = 1000 );
51static inline void ^?{}( VbyteHeap & );
52static inline void ByteCopy( char *, int, int, char *, int, int ); // copy a block of bytes from one location in the heap to another
53static inline int ByteCmp( char *, int, int, char *, int, int );        // compare 2 blocks of bytes
54static inline char *VbyteAlloc( VbyteHeap &, int );                     // allocate a block bytes in the heap
55
56
57static inline void AddThisAfter( HandleNode &, HandleNode & );
58static inline void DeleteNode( HandleNode & );
59static inline void MoveThisAfter( HandleNode &, const HandleNode & );           // move current handle after parameter handle
60
61
62// Allocate the storage for the variable sized area and intialize the heap variables.
63
64static inline void ?{}( VbyteHeap & this, size_t Size ) with(this) {
65#ifdef VbyteDebug
66    serr | "enter:VbyteHeap::VbyteHeap, this:" | &this | " Size:" | Size;
67#endif // VbyteDebug
68    NoOfCompactions = NoOfExtensions = NoOfReductions = 0;
69    InitSize = CurrSize = Size;
70    StartVbyte = EndVbyte = alloc(CurrSize);
71    ExtVbyte = (void *)( StartVbyte + CurrSize );
72    Header.flink = Header.blink = &Header;
73    Header.ulink = & this;
74#ifdef VbyteDebug
75    HeaderPtr = &Header;
76    serr | "exit:VbyteHeap::VbyteHeap, this:" | &this;
77#endif // VbyteDebug
78} // VbyteHeap
79
80
81// Release the dynamically allocated storage for the byte area.
82
83static inline void ^?{}( VbyteHeap & this ) with(this) {
84    free( StartVbyte );
85} // ~VbyteHeap
86
87
88//######################### HandleNode #########################
89
90
91// Create a handle node. The handle is not linked into the handle list.  This is the responsibilitiy of the handle
92// creator.
93
94static inline void ?{}( HandleNode & this ) with(this) {
95#ifdef VbyteDebug
96    serr | "enter:HandleNode::HandleNode, this:" | &this;
97#endif // VbyteDebug
98    s = 0;
99    lnth = 0;
100#ifdef VbyteDebug
101    serr | "exit:HandleNode::HandleNode, this:" | &this;
102#endif // VbyteDebug
103} // HandleNode
104
105// Create a handle node. The handle is linked into the handle list at the end. This means that this handle will NOT be
106// in order by string address, but this is not a problem because a string with length zero does nothing during garbage
107// collection.
108
109static inline void ?{}( HandleNode & this, VbyteHeap & vh ) with(this) {
110#ifdef VbyteDebug
111    serr | "enter:HandleNode::HandleNode, this:" | &this;
112#endif // VbyteDebug
113    s = 0;
114    lnth = 0;
115    ulink = &vh;
116    AddThisAfter( this, *vh.Header.blink );
117#ifdef VbyteDebug
118    serr | "exit:HandleNode::HandleNode, this:" | &this;
119#endif // VbyteDebug
120} // HandleNode
121
122
123// Delete a node from the handle list by unchaining it from the list. If the handle node was allocated dynamically, it
124// is the responsibility of the creator to destroy it.
125
126static inline void ^?{}( HandleNode & this ) with(this) {
127#ifdef VbyteDebug
128    serr | "enter:HandleNode::~HandleNode, this:" | & this;
129    {
130        serr | nlOff;
131        serr | " lnth:" | lnth | " s:" | (void *)s | ",\"";
132        for ( int i = 0; i < lnth; i += 1 ) {
133            serr | s[i];
134        } // for
135        serr | "\" flink:" | flink | " blink:" | blink | nl;
136        serr | nlOn;
137    }
138#endif // VbyteDebug
139    DeleteNode( this );
140} // ~HandleNode
141
142
143//######################### String Sharing Context #########################
144
145static string_sharectx * ambient_string_sharectx;               // fickle top of stack
146static string_sharectx default_string_sharectx = {NEW_SHARING}; // stable bottom of stack
147
148void ?{}( string_sharectx & this, StringSharectx_Mode mode ) with( this ) {
149    (older){ ambient_string_sharectx };
150    if ( mode == NEW_SHARING ) {
151        (activeHeap){ new( (size_t) 1000 ) };
152    } else {
153        verify( mode == NO_SHARING );
154        (activeHeap){ 0p };
155    }
156    ambient_string_sharectx = & this;
157}
158
159void ^?{}( string_sharectx & this ) with( this ) {
160    if ( activeHeap ) delete( activeHeap );
161
162    // unlink this from older-list starting from ambient_string_sharectx
163    // usually, this==ambient_string_sharectx and the loop runs zero times
164    string_sharectx *& c = ambient_string_sharectx;
165    while ( c != &this ) &c = &c->older;              // find this
166    c = this.older;                                   // unlink
167}
168
169//######################### String Resource #########################
170
171
172VbyteHeap * DEBUG_string_heap() {
173    assert( ambient_string_sharectx->activeHeap && "No sharing context is active" );
174    return ambient_string_sharectx->activeHeap;
175}
176
177size_t DEBUG_string_bytes_avail_until_gc( VbyteHeap * heap ) {
178    return ((char*)heap->ExtVbyte) - heap->EndVbyte;
179}
180
181const char * DEBUG_string_heap_start( VbyteHeap * heap ) {
182    return heap->StartVbyte;
183}
184
185
186// Returns the size of the string in bytes
187size_t size(const string_res &s) with(s) {
188    return Handle.lnth;
189}
190
191// Output operator
192ofstream & ?|?(ofstream &out, const string_res &s) {
193    // Store auto-newline state so it can be restored
194    bool anl = getANL$(out);
195    nlOff(out);
196    for (size_t i = 0; i < s.Handle.lnth; i++) {
197        out | s[i];
198    }
199    out | sep;
200    // Re-apply newlines after done, for chaining version
201    if (anl) nlOn(out);
202    return out;
203}
204
205void ?|?(ofstream &out, const string_res &s) {
206    // Store auto-newline state so it can be restored
207    bool anl = getANL$(out);
208    nlOff(out);
209    for (size_t i = 0; i < s.Handle.lnth; i++) {
210        // Need to re-apply on the last output operator, for whole-statement version
211        if (anl && i == s.Handle.lnth-1) nlOn(out);
212        out | s[i];
213    }
214    return out;
215}
216
217// Empty constructor
218void ?{}(string_res &s) with(s) {
219    if( ambient_string_sharectx->activeHeap ) {
220        (Handle){ * ambient_string_sharectx->activeHeap };
221        (shareEditSet_owns_ulink){ false };
222    } else {
223        (Handle){ * new( (size_t) 10 ) };  // TODO: can I lazily avoid allocating for empty string
224        (shareEditSet_owns_ulink){ true };
225    }
226    s.shareEditSet_prev = &s;
227    s.shareEditSet_next = &s;
228}
229
230static inline void eagerCopyCtorHelper(string_res &s, const char* rhs, size_t rhslnth) with(s) {
231    if( ambient_string_sharectx->activeHeap ) {
232        (Handle){ * ambient_string_sharectx->activeHeap };
233        (shareEditSet_owns_ulink){ false };
234    } else {
235        (Handle){ * new( rhslnth ) };
236        (shareEditSet_owns_ulink){ true };
237    }
238    Handle.s = VbyteAlloc(*Handle.ulink, rhslnth);
239    Handle.lnth = rhslnth;
240    for ( int i = 0; i < rhslnth; i += 1 ) {            // copy characters
241        Handle.s[i] = rhs[i];
242    } // for
243    s.shareEditSet_prev = &s;
244    s.shareEditSet_next = &s;
245}
246
247// Constructor from a raw buffer and size
248void ?{}(string_res &s, const char* rhs, size_t rhslnth) with(s) {
249    eagerCopyCtorHelper(s, rhs, rhslnth);
250}
251
252// String literal constructor
253void ?{}(string_res &s, const char* rhs) {
254    (s){ rhs, strlen(rhs) };
255}
256
257// General copy constructor
258void ?{}(string_res &s, const string_res & s2, StrResInitMode mode, size_t start, size_t end ) {
259
260    verify( start <= end && end <= s2.Handle.lnth );
261
262    if (s2.Handle.ulink == ambient_string_sharectx->activeHeap) {
263        // same heap: allow overlap
264        (s.Handle){};
265        s.Handle.s = s2.Handle.s + start;
266        s.Handle.lnth = end - start;
267        s.Handle.ulink = ambient_string_sharectx->activeHeap;
268        (s.shareEditSet_owns_ulink){ false };
269        AddThisAfter(s.Handle, s2.Handle );                     // insert this handle after rhs handle
270        // ^ bug?  skip others at early point in string
271   
272        if (mode == COPY_VALUE) {
273            // make s alone in its shareEditSet
274            s.shareEditSet_prev = &s;
275            s.shareEditSet_next = &s;
276        } else {
277            verify( mode == SHARE_EDITS );
278
279            // s2 is logically const but not implementation const
280            string_res & s2mod = (string_res &) s2;
281
282            // insert s after s2 on shareEditSet
283            s.shareEditSet_next = s2mod.shareEditSet_next;
284            s.shareEditSet_prev = &s2mod;
285            s.shareEditSet_next->shareEditSet_prev = &s;
286            s.shareEditSet_prev->shareEditSet_next = &s;
287        }
288    } else {
289        // crossing heaps: eager copy
290        assert( mode == COPY_VALUE && "need to solidify context-crossing rules for requesting shared edits");
291        eagerCopyCtorHelper(s, s2.Handle.s + start, end - start);
292        verify(s.shareEditSet_prev == &s);
293        verify(s.shareEditSet_next == &s);
294    }
295}
296
297static void assignEditSet(string_res & this, string_res * shareEditSetStartPeer, string_res * shareEditSetEndPeer,
298    char * resultSesStart,
299    size_t resultSesLnth,
300    HandleNode * resultPadPosition, size_t bsize ) {
301
302    char * beforeBegin = shareEditSetStartPeer->Handle.s;
303    size_t beforeLen = this.Handle.s - beforeBegin;
304
305    char * afterBegin = this.Handle.s + this.Handle.lnth;
306    size_t afterLen = shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - afterBegin;
307
308    size_t oldLnth = this.Handle.lnth;
309
310    this.Handle.s = resultSesStart + beforeLen;
311    this.Handle.lnth = bsize;
312    MoveThisAfter( this.Handle, *resultPadPosition );
313
314    // adjust all substring string and handle locations, and check if any substring strings are outside the new base string
315    char *limit = resultSesStart + resultSesLnth;
316    for (string_res * p = this.shareEditSet_next; p != &this; p = p->shareEditSet_next) {
317        verify (p->Handle.s >= beforeBegin);
318        if ( p->Handle.s >= afterBegin ) {
319            verify ( p->Handle.s <= afterBegin + afterLen );
320            verify ( p->Handle.s + p->Handle.lnth <= afterBegin + afterLen );
321            // p starts after the edit
322            // take start and end as end-anchored
323            size_t startOffsetFromEnd = afterBegin + afterLen - p->Handle.s;
324            p->Handle.s = limit - startOffsetFromEnd;
325            // p->Handle.lnth unaffected
326        } else if ( p->Handle.s <= beforeBegin + beforeLen ) {
327            // p starts before, or at the start of, the edit
328            if ( p->Handle.s + p->Handle.lnth <= beforeBegin + beforeLen ) {
329                // p ends before the edit
330                // take end as start-anchored too
331                // p->Handle.lnth unaffected
332            } else if ( p->Handle.s + p->Handle.lnth < afterBegin ) {
333                // p ends during the edit; p does not include the last character replaced
334                // clip end of p to end at start of edit
335                p->Handle.lnth = beforeLen - ( p->Handle.s - beforeBegin );
336            } else {
337                // p ends after the edit
338                verify ( p->Handle.s + p->Handle.lnth <= afterBegin + afterLen );
339                // take end as end-anchored
340                // stretch-shrink p according to the edit
341                p->Handle.lnth += this.Handle.lnth;
342                p->Handle.lnth -= oldLnth;
343            }
344            // take start as start-anchored
345            size_t startOffsetFromStart = p->Handle.s - beforeBegin;
346            p->Handle.s = resultSesStart + startOffsetFromStart;
347        } else {
348            verify ( p->Handle.s < afterBegin );
349            // p starts during the edit
350            verify( p->Handle.s + p->Handle.lnth >= beforeBegin + beforeLen );
351            if ( p->Handle.s + p->Handle.lnth < afterBegin ) {
352                // p ends during the edit; p does not include the last character replaced
353                // set p to empty string at start of edit
354                p->Handle.s = this.Handle.s;
355                p->Handle.lnth = 0;
356            } else {
357                // p includes the end of the edit
358                // clip start of p to start at end of edit
359                int charsToClip = afterBegin - p->Handle.s;
360                p->Handle.s = this.Handle.s + this.Handle.lnth;
361                p->Handle.lnth -= charsToClip;
362            }
363        }
364        MoveThisAfter( p->Handle, *resultPadPosition ); // move substring handle to maintain sorted order by string position
365    }
366}
367
368static void assign_(string_res &this, const char* buffer, size_t bsize, const string_res & valSrc) {
369
370    // traverse the incumbent share-edit set (SES) to recover the range of a base string to which `this` belongs
371    string_res * shareEditSetStartPeer = & this;
372    string_res * shareEditSetEndPeer = & this;
373    for (string_res * editPeer = this.shareEditSet_next; editPeer != &this; editPeer = editPeer->shareEditSet_next) {
374        if ( editPeer->Handle.s < shareEditSetStartPeer->Handle.s ) {
375            shareEditSetStartPeer = editPeer;
376        }
377        if ( shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth < editPeer->Handle.s + editPeer->Handle.lnth) {
378            shareEditSetEndPeer = editPeer;
379        }
380    }
381
382    verify( shareEditSetEndPeer->Handle.s >= shareEditSetStartPeer->Handle.s );
383    size_t editSetLength = shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - shareEditSetStartPeer->Handle.s;
384    verify( editSetLength >= this.Handle.lnth );
385
386
387    if ( this.Handle.lnth == editSetLength                // the entire run of the share-edit set is being overwritten: SES's result will only use characters from the source string
388        && & valSrc                                       // sourcing from a managed string
389        && valSrc.Handle.ulink == this.Handle.ulink  ) {  // sourcing from same heap
390
391        assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer,
392            valSrc.Handle.s,
393            valSrc.Handle.lnth,
394            &((string_res&)valSrc).Handle, bsize);
395       
396    } else {
397
398        // full string is from start of shareEditSetStartPeer thru end of shareEditSetEndPeer
399        // `this` occurs in the middle of it, to be replaced
400        // build up the new text in `pasting`
401
402        // super private string ctor: ignore ambient context
403        void ?{}( string_res &s, VbyteHeap & heap, const char* rhs, size_t rhslnth ) with(s) {
404            (Handle){ heap };
405            Handle.s = VbyteAlloc(*Handle.ulink, rhslnth);
406            Handle.lnth = rhslnth;
407            (s.shareEditSet_owns_ulink){ false };
408            for ( int i = 0; i < rhslnth; i += 1 ) {            // copy characters
409                Handle.s[i] = rhs[i];
410            } // for
411            s.shareEditSet_prev = &s;
412            s.shareEditSet_next = &s;
413        }
414
415        // we are only overwriting a proper substring of some string: need to mash characters from old and new together
416        // OR we are importing characters: need to copy eagerly
417        string_res pasting = {
418            * this.Handle.ulink,                               // maintain same heap, regardless of context
419            shareEditSetStartPeer->Handle.s,                   // start of SES
420            this.Handle.s - shareEditSetStartPeer->Handle.s }; // length of SES, before this
421        append( pasting,
422            buffer,                                            // start of replacement for this
423            bsize );                                           // length of replacement for this
424        append( pasting,
425            this.Handle.s + this.Handle.lnth,                  // start of SES after this
426            shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth -
427            (this.Handle.s + this.Handle.lnth) );              // length of SES, after this
428
429        // The above string building can trigger compaction.
430        // The reference points (that are arguments of the string building) may move during that building.
431        // From this point on, they are stable.
432
433        assignEditSet(this, shareEditSetStartPeer, shareEditSetEndPeer,
434            pasting.Handle.s,
435            pasting.Handle.lnth,
436            &pasting.Handle, bsize);
437    }
438
439    // So now, capture their values for use in the overlap cases, below.
440    // Do not factor these definitions with the arguments used in string building above.
441
442}
443
444void assign(string_res &this, const char* buffer, size_t bsize) {
445    assign_(this, buffer, bsize, *0p);
446}
447
448void ?=?(string_res &s, const char* other) {
449    assign(s, other, strlen(other));
450}
451
452void ?=?(string_res &s, char other) {
453    assign(s, &other, 1);
454}
455
456// Copy assignment operator
457void ?=?(string_res & this, const string_res & rhs) with( this ) {
458    assign_(this, rhs.Handle.s, rhs.Handle.lnth, rhs);
459}
460
461void ?=?(string_res & this, string_res & rhs) with( this ) {
462    const string_res & rhs2 = rhs;
463    this = rhs2;
464}
465
466
467// Destructor
468void ^?{}(string_res &s) with(s) {
469    // much delegated to implied ^VbyteSM
470
471    // sever s from its share-edit peers, if any (four no-ops when already solo)
472    s.shareEditSet_prev->shareEditSet_next = s.shareEditSet_next;
473    s.shareEditSet_next->shareEditSet_prev = s.shareEditSet_prev;
474    // s.shareEditSet_next = &s;
475    // s.shareEditSet_prev = &s;
476
477    if (shareEditSet_owns_ulink && s.shareEditSet_next == &s) { // last one out
478        delete( s.Handle.ulink );
479    }
480}
481
482
483// Returns the character at the given index
484// With unicode support, this may be different from just the byte at the given
485// offset from the start of the string.
486char ?[?](const string_res &s, size_t index) with(s) {
487    //TODO: Check if index is valid (no exceptions yet)
488    return Handle.s[index];
489}
490
491void assignAt(const string_res &s, size_t index, char val) {
492    string_res editZone = { s, SHARE_EDITS, index, index+1 };
493    assign(editZone, &val, 1);
494}
495
496
497///////////////////////////////////////////////////////////////////
498// Concatenation
499
500void append(string_res &str1, const char * buffer, size_t bsize) {
501    size_t clnth = size(str1) + bsize;
502    if ( str1.Handle.s + size(str1) == buffer ) { // already juxtapose ?
503        // no-op
504    } else {                                            // must copy some text
505        if ( str1.Handle.s + size(str1) == VbyteAlloc(*str1.Handle.ulink, 0) ) { // str1 at end of string area ?
506            VbyteAlloc( *str1.Handle.ulink, bsize ); // create room for 2nd part at the end of string area
507        } else {                                        // copy the two parts
508            char * str1oldBuf = str1.Handle.s;
509            str1.Handle.s = VbyteAlloc( *str1.Handle.ulink, clnth );
510            ByteCopy( str1.Handle.s, 0, str1.Handle.lnth, str1oldBuf, 0, str1.Handle.lnth);
511        } // if
512        ByteCopy( str1.Handle.s, str1.Handle.lnth, bsize, (char*)buffer, 0, (int)bsize);
513        //       VbyteHeap & this, char *Dst, int DstStart, int DstLnth, char *Src, int SrcStart, int SrcLnth
514    } // if
515    str1.Handle.lnth = clnth;
516}
517
518void ?+=?(string_res &str1, const string_res &str2) {
519    append( str1, str2.Handle.s, str2.Handle.lnth );
520}
521
522void ?+=?(string_res &s, char other) {
523    append( s, &other, 1 );
524}
525
526void ?+=?(string_res &s, const char* other) {
527    append( s, other, strlen(other) );
528}
529
530
531
532
533//////////////////////////////////////////////////////////
534// Comparisons
535
536
537bool ?==?(const string_res &s1, const string_res &s2) {
538    return ByteCmp( s1.Handle.s, 0, s1.Handle.lnth, s2.Handle.s, 0, s2.Handle.lnth) == 0;
539}
540
541bool ?!=?(const string_res &s1, const string_res &s2) {
542    return !(s1 == s2);
543}
544bool ?==?(const string_res &s, const char* other) {
545    string_res sother = other;
546    return s == sother;
547}
548bool ?!=?(const string_res &s, const char* other) {
549    return !(s == other);
550}
551
552
553//////////////////////////////////////////////////////////
554// Search
555
556bool contains(const string_res &s, char ch) {
557    for (i; size(s)) {
558        if (s[i] == ch) return true;
559    }
560    return false;
561}
562
563int find(const string_res &s, char search) {
564    for (i; size(s)) {
565        if (s[i] == search) return i;
566    }
567    return size(s);
568}
569
570    /* Remaining implementations essentially ported from Sunjay's work */
571
572int find(const string_res &s, const string_res &search) {
573    return find(s, search.Handle.s, search.Handle.lnth);
574}
575
576int find(const string_res &s, const char* search) {
577    return find(s, search, strlen(search));
578}
579
580int find(const string_res &s, const char* search, size_t searchsize) {
581    // FIXME: This is a naive algorithm. We probably want to switch to someting
582    // like Boyer-Moore in the future.
583    // https://en.wikipedia.org/wiki/String_searching_algorithm
584
585    // Always find the empty string
586    if (searchsize == 0) {
587        return 0;
588    }
589
590    for (size_t i = 0; i < s.Handle.lnth; i++) {
591        size_t remaining = s.Handle.lnth - i;
592        // Never going to find the search string if the remaining string is
593        // smaller than search
594        if (remaining < searchsize) {
595            break;
596        }
597
598        bool matched = true;
599        for (size_t j = 0; j < searchsize; j++) {
600            if (search[j] != s.Handle.s[i + j]) {
601                matched = false;
602                break;
603            }
604        }
605        if (matched) {
606            return i;
607        }
608    }
609
610    return s.Handle.lnth;
611}
612
613bool includes(const string_res &s, const string_res &search) {
614    return includes(s, search.Handle.s, search.Handle.lnth);
615}
616
617bool includes(const string_res &s, const char* search) {
618    return includes(s, search, strlen(search));
619}
620
621bool includes(const string_res &s, const char* search, size_t searchsize) {
622    return find(s, search, searchsize) < s.Handle.lnth;
623}
624
625bool startsWith(const string_res &s, const string_res &prefix) {
626    return startsWith(s, prefix.Handle.s, prefix.Handle.lnth);
627}
628
629bool startsWith(const string_res &s, const char* prefix) {
630    return startsWith(s, prefix, strlen(prefix));
631}
632
633bool startsWith(const string_res &s, const char* prefix, size_t prefixsize) {
634    if (s.Handle.lnth < prefixsize) {
635        return false;
636    }
637    return memcmp(s.Handle.s, prefix, prefixsize) == 0;
638}
639
640bool endsWith(const string_res &s, const string_res &suffix) {
641    return endsWith(s, suffix.Handle.s, suffix.Handle.lnth);
642}
643
644bool endsWith(const string_res &s, const char* suffix) {
645    return endsWith(s, suffix, strlen(suffix));
646}
647
648bool endsWith(const string_res &s, const char* suffix, size_t suffixsize) {
649    if (s.Handle.lnth < suffixsize) {
650        return false;
651    }
652    // Amount to offset the bytes pointer so that we are comparing the end of s
653    // to suffix. s.bytes + offset should be the first byte to compare against suffix
654    size_t offset = s.Handle.lnth - suffixsize;
655    return memcmp(s.Handle.s + offset, suffix, suffixsize) == 0;
656}
657
658    /* Back to Mike's work */
659
660
661///////////////////////////////////////////////////////////////////////////
662// charclass, include, exclude
663
664void ?{}( charclass_res & this, const string_res & chars) {
665    (this){ chars.Handle.s, chars.Handle.lnth };
666}
667
668void ?{}( charclass_res & this, const char * chars ) {
669    (this){ chars, strlen(chars) };
670}
671
672void ?{}( charclass_res & this, const char * chars, size_t charssize ) {
673    (this.chars){ chars, charssize };
674    // now sort it ?
675}
676
677void ^?{}( charclass_res & this ) {
678    ^(this.chars){};
679}
680
681static bool test( const charclass_res & mask, char c ) {
682    // instead, use sorted char list?
683    return contains( mask.chars, c );
684}
685
686int exclude(const string_res &s, const charclass_res &mask) {
687    for (int i = 0; i < size(s); i++) {
688        if ( test(mask, s[i]) ) return i;
689    }
690    return size(s);
691}
692
693int include(const string_res &s, const charclass_res &mask) {
694    for (int i = 0; i < size(s); i++) {
695        if ( ! test(mask, s[i]) ) return i;
696    }
697    return size(s);
698}
699
700//######################### VbyteHeap "implementation" #########################
701
702
703// Add a new HandleNode node n after the current HandleNode node.
704
705static inline void AddThisAfter( HandleNode & this, HandleNode & n ) with(this) {
706#ifdef VbyteDebug
707    serr | "enter:AddThisAfter, this:" | &this | " n:" | &n;
708#endif // VbyteDebug
709    verify( n.ulink != 0p );
710    verify( this.ulink == n.ulink );
711    flink = n.flink;
712    blink = &n;
713    n.flink->blink = &this;
714    n.flink = &this;
715#ifdef VbyteDebug
716    {
717                serr | "HandleList:";
718                serr | nlOff;
719                for ( HandleNode *ni = HeaderPtr->flink; ni != HeaderPtr; ni = ni->flink ) {
720                        serr | "\tnode:" | ni | " lnth:" | ni->lnth | " s:" | (void *)ni->s | ",\"";
721                        for ( int i = 0; i < ni->lnth; i += 1 ) {
722                                serr | ni->s[i];
723                        } // for
724                        serr | "\" flink:" | ni->flink | " blink:" | ni->blink | nl;
725                } // for
726                serr | nlOn;
727    }
728    serr | "exit:AddThisAfter";
729#endif // VbyteDebug
730} // AddThisAfter
731
732
733// Delete the current HandleNode node.
734
735static inline void DeleteNode( HandleNode & this ) with(this) {
736#ifdef VbyteDebug
737    serr | "enter:DeleteNode, this:" | &this;
738#endif // VbyteDebug
739    flink->blink = blink;
740    blink->flink = flink;
741#ifdef VbyteDebug
742    serr | "exit:DeleteNode";
743#endif // VbyteDebug
744} //  DeleteNode
745
746
747
748// Allocates specified storage for a string from byte-string area. If not enough space remains to perform the
749// allocation, the garbage collection routine is called and a second attempt is made to allocate the space. If the
750// second attempt fails, a further attempt is made to create a new, larger byte-string area.
751
752static inline char * VbyteAlloc( VbyteHeap & this, int size ) with(this) {
753#ifdef VbyteDebug
754    serr | "enter:VbyteAlloc, size:" | size;
755#endif // VbyteDebug
756    uintptr_t NoBytes;
757    char *r;
758
759    NoBytes = ( uintptr_t )EndVbyte + size;
760    if ( NoBytes > ( uintptr_t )ExtVbyte ) {            // enough room for new byte-string ?
761                garbage( this );                                        // firer up the garbage collector
762                NoBytes = ( uintptr_t )EndVbyte + size;         // try again
763                if ( NoBytes > ( uintptr_t )ExtVbyte ) {        // enough room for new byte-string ?
764// unimplemented feature - assert, not verify
765assert( 0 && "need to implement actual growth" );
766                        // extend( size );                              // extend the byte-string area
767                } // if
768    } // if
769    r = EndVbyte;
770    EndVbyte += size;
771#ifdef VbyteDebug
772    serr | "exit:VbyteAlloc, r:" | (void *)r | " EndVbyte:" | (void *)EndVbyte | " ExtVbyte:" | ExtVbyte;
773#endif // VbyteDebug
774    return r;
775} // VbyteAlloc
776
777
778// Move an existing HandleNode node h somewhere after the current HandleNode node so that it is in ascending order by
779// the address in the byte string area.
780
781static inline void MoveThisAfter( HandleNode & this, const HandleNode  & h ) with(this) {
782#ifdef VbyteDebug
783    serr | "enter:MoveThisAfter, this:" | & this | " h:" | & h;
784#endif // VbyteDebug
785    verify( h.ulink != 0p );
786    verify( this.ulink == h.ulink );
787    if ( s < h.s ) {                                    // check argument values
788                // serr | "VbyteSM: Error - Cannot move byte string starting at:" | s | " after byte string starting at:"
789                //      | ( h->s ) | " and keep handles in ascending order";
790                // exit(-1 );
791                verify( 0 && "VbyteSM: Error - Cannot move byte strings as requested and keep handles in ascending order");
792    } // if
793
794    HandleNode *i;
795    for ( i = h.flink; i->s != 0 && s > ( i->s ); i = i->flink ); // find the position for this node after h
796    if ( & this != i->blink ) {
797                DeleteNode( this );
798                AddThisAfter( this, *i->blink );
799    } // if
800#ifdef VbyteDebug
801    {
802        serr | "HandleList:";
803        serr | nlOff;
804        for ( HandleNode *n = HeaderPtr->flink; n != HeaderPtr; n = n->flink ) {
805            serr | "\tnode:" | n | " lnth:" | n->lnth | " s:" | (void *)n->s | ",\"";
806            for ( int i = 0; i < n->lnth; i += 1 ) {
807                serr | n->s[i];
808            } // for
809            serr | "\" flink:" | n->flink | " blink:" | n->blink | nl;
810        } // for
811        serr | nlOn;
812    }
813    serr | "exit:MoveThisAfter";
814#endif // VbyteDebug
815} // MoveThisAfter
816
817
818
819
820
821//######################### VbyteHeap #########################
822
823// Move characters from one location in the byte-string area to another. The routine handles the following situations:
824//
825// if the |Src| > |Dst| => truncate
826// if the |Dst| > |Src| => pad Dst with blanks
827
828void ByteCopy( char *Dst, int DstStart, int DstLnth, char *Src, int SrcStart, int SrcLnth ) {
829    for ( int i = 0; i < DstLnth; i += 1 ) {
830      if ( i == SrcLnth ) {                             // |Dst| > |Src|
831            for ( ; i < DstLnth; i += 1 ) {             // pad Dst with blanks
832                Dst[DstStart + i] = ' ';
833            } // for
834            break;
835        } // exit
836        Dst[DstStart + i] = Src[SrcStart + i];
837    } // for
838} // ByteCopy
839
840// Compare two byte strings in the byte-string area. The routine returns the following values:
841//
842// 1 => Src1-byte-string > Src2-byte-string
843// 0 => Src1-byte-string = Src2-byte-string
844// -1 => Src1-byte-string < Src2-byte-string
845
846int ByteCmp( char *Src1, int Src1Start, int Src1Lnth, char *Src2, int Src2Start, int Src2Lnth )  {
847#ifdef VbyteDebug
848    serr | "enter:ByteCmp, Src1Start:" | Src1Start | " Src1Lnth:" | Src1Lnth | " Src2Start:" | Src2Start | " Src2Lnth:" | Src2Lnth;
849#endif // VbyteDebug
850    int cmp;
851
852    CharZip: for ( int i = 0; ; i += 1 ) {
853        if ( i == Src2Lnth - 1 ) {
854            for ( ; ; i += 1 ) {
855                if ( i == Src1Lnth - 1 ) {
856                    cmp = 0;
857                    break CharZip;
858                } // exit
859                if ( Src1[Src1Start + i] != ' ') {
860                        // SUSPECTED BUG:  this could be be why Peter got the bug report about == " "  (why is this case here at all?)
861                    cmp = 1;
862                    break CharZip;
863                } // exit
864            } // for
865        } // exit
866        if ( i == Src1Lnth - 1 ) {
867            for ( ; ; i += 1 ) {
868                if ( i == Src2Lnth - 1 ) {
869                    cmp = 0;
870                    break CharZip;
871                } // exit
872                if ( Src2[Src2Start + i] != ' ') {
873                    cmp = -1;
874                    break CharZip;
875                } // exit
876            } // for
877        } // exit
878      if ( Src2[Src2Start + i] != Src1[Src1Start+ i]) {
879            cmp = Src1[Src1Start + i] > Src2[Src2Start + i] ? 1 : -1;
880            break CharZip;
881        } // exit
882    } // for
883#ifdef VbyteDebug
884    serr | "exit:ByteCmp, cmp:" | cmp;
885#endif // VbyteDebug
886    return cmp;
887} // ByteCmp
888
889
890// The compaction moves all of the byte strings currently in use to the beginning of the byte-string area and modifies
891// the handles to reflect the new positions of the byte strings. Compaction assumes that the handle list is in ascending
892// order by pointers into the byte-string area.  The strings associated with substrings do not have to be moved because
893// the containing string has been moved. Hence, they only require that their string pointers be adjusted.
894
895void compaction(VbyteHeap & this) with(this) {
896    HandleNode *h;
897    char *obase, *nbase, *limit;
898   
899    NoOfCompactions += 1;
900    EndVbyte = StartVbyte;
901    h = Header.flink;                                   // ignore header node
902    for (;;) {
903                ByteCopy( EndVbyte, 0, h->lnth, h->s, 0, h->lnth );
904                obase = h->s;
905                h->s = EndVbyte;
906                nbase = h->s;
907                EndVbyte += h->lnth;
908                limit = obase + h->lnth;
909                h = h->flink;
910               
911                // check if any substrings are allocated within a string
912               
913                for (;;) {
914                        if ( h == &Header ) break;                      // end of header list ?
915                        if ( h->s >= limit ) break;                     // outside of current string ?
916                        h->s = nbase + (( uintptr_t )h->s - ( uintptr_t )obase );
917                        h = h->flink;
918                } // for
919                if ( h == &Header ) break;                      // end of header list ?
920    } // for
921} // compaction
922
923
924// Garbage determines the amount of free space left in the heap and then reduces, leave the same, or extends the size of
925// the heap.  The heap is then compacted in the existing heap or into the newly allocated heap.
926
927void garbage(VbyteHeap & this ) with(this) {
928#ifdef VbyteDebug
929    serr | "enter:garbage";
930    {
931                serr | "HandleList:";
932                for ( HandleNode *n = Header.flink; n != &Header; n = n->flink ) {
933                        serr | nlOff;
934                        serr | "\tnode:" | n | " lnth:" | n->lnth | " s:" | (void *)n->s | ",\"";
935                        for ( int i = 0; i < n->lnth; i += 1 ) {
936                                serr | n->s[i];
937                        } // for
938                        serr | nlOn;
939                        serr | "\" flink:" | n->flink | " blink:" | n->blink;
940                } // for
941    }
942#endif // VbyteDebug
943    int AmountUsed, AmountFree;
944
945    AmountUsed = 0;
946    for ( HandleNode *i = Header.flink; i != &Header; i = i->flink ) { // calculate amount of byte area used
947                AmountUsed += i->lnth;
948    } // for
949    AmountFree = ( uintptr_t )ExtVbyte - ( uintptr_t )StartVbyte - AmountUsed;
950   
951    if ( AmountFree < ( int )( CurrSize * 0.1 )) {      // free space less than 10% ?
952
953// unimplemented feature - assert, not verify
954assert( 0 && "need to implement actual growth" );
955//              extend( CurrSize );                             // extend the heap
956
957                        //  Peter says, "This needs work before it should be used."
958                        //  } else if ( AmountFree > CurrSize / 2 ) {           // free space greater than 3 times the initial allocation ?
959                        //              reduce(( AmountFree / CurrSize - 3 ) * CurrSize ); // reduce the memory
960
961    } // if
962    compaction(this);                                   // compact the byte area, in the same or new heap area
963#ifdef VbyteDebug
964    {
965                serr | "HandleList:";
966                for ( HandleNode *n = Header.flink; n != &Header; n = n->flink ) {
967                        serr | nlOff;
968                        serr | "\tnode:" | n | " lnth:" | n->lnth | " s:" | (void *)n->s | ",\"";
969                        for ( int i = 0; i < n->lnth; i += 1 ) {
970                                serr | n->s[i];
971                        } // for
972                        serr | nlOn;
973                        serr | "\" flink:" | n->flink | " blink:" | n->blink;
974                } // for
975    }
976    serr | "exit:garbage";
977#endif // VbyteDebug
978} // garbage
979
980#undef VbyteDebug
981
982//WIP
983#if 0
984
985
986// Extend the size of the byte-string area by creating a new area and copying the old area into it. The old byte-string
987// area is deleted.
988
989void VbyteHeap::extend( int size ) {
990#ifdef VbyteDebug
991    serr | "enter:extend, size:" | size;
992#endif // VbyteDebug
993    char *OldStartVbyte;
994
995    NoOfExtensions += 1;
996    OldStartVbyte = StartVbyte;                         // save previous byte area
997   
998    CurrSize += size > InitSize ? size : InitSize;      // minimum extension, initial size
999    StartVbyte = EndVbyte = new char[CurrSize];
1000    ExtVbyte = (void *)( StartVbyte + CurrSize );
1001    compaction();                                       // copy from old heap to new & adjust pointers to new heap
1002    delete OldStartVbyte;                               // release old heap
1003#ifdef VbyteDebug
1004    serr | "exit:extend, CurrSize:" | CurrSize;
1005#endif // VbyteDebug
1006} // extend
1007
1008
1009// Extend the size of the byte-string area by creating a new area and copying the old area into it. The old byte-string
1010// area is deleted.
1011
1012void VbyteHeap::reduce( int size ) {
1013#ifdef VbyteDebug
1014    serr | "enter:reduce, size:" | size;
1015#endif // VbyteDebug
1016    char *OldStartVbyte;
1017
1018    NoOfReductions += 1;
1019    OldStartVbyte = StartVbyte;                         // save previous byte area
1020   
1021    CurrSize -= size;
1022    StartVbyte = EndVbyte = new char[CurrSize];
1023    ExtVbyte = (void *)( StartVbyte + CurrSize );
1024    compaction();                                       // copy from old heap to new & adjust pointers to new heap
1025    delete  OldStartVbyte;                              // release old heap
1026#ifdef VbyteDebug
1027    serr | "exit:reduce, CurrSize:" | CurrSize;
1028#endif // VbyteDebug
1029} // reduce
1030
1031
1032#endif
Note: See TracBrowser for help on using the repository browser.