source: libcfa/src/collections/string_res.cfa@ 45d1ab9

stuck-waitfor-destruct
Last change on this file since 45d1ab9 was e8b3717, checked in by Michael Brooks <mlbrooks@…>, 2 years ago

Modify substring interface from start-end to start-len, and add a missing test.

  • Property mode set to 100644
File size: 42.9 KB
RevLine 
[f450f2f]1//
2// Cforall Version 1.0.0 Copyright (C) 2016 University of Waterloo
3//
4// The contents of this file are covered under the licence agreement in the
5// file "LICENCE" distributed with Cforall.
6//
7// string_res -- variable-length, mutable run of text, with resource semantics
8//
9// Author : Michael L. Brooks
10// Created On : Fri Sep 03 11:00:00 2021
[7d25f44]11// Last Modified By : Peter A. Buhr
[06280ad]12// Last Modified On : Tue Jan 16 22:19:27 2024
13// Update Count : 35
[f450f2f]14//
15
16#include "string_res.hfa"
[0f781fb8]17#include "string_sharectx.hfa"
[08ed947]18#include "stdlib.hfa"
[d32679d5]19#include <ctype.h>
[08ed947]20
21// Workaround for observed performance penalty from calling CFA's alloc.
22// Workaround is: EndVbyte = TEMP_ALLOC(char, CurrSize)
23// Should be: EndVbyte = alloc(CurrSize)
[681e12f]24#define TEMP_ALLOC(T, n) (( T * ) malloc( n * sizeof( T ) ))
[0f781fb8]25
[218096f]26#include <assert.h>
[f450f2f]27
28//######################### VbyteHeap "header" #########################
29
30#ifdef VbyteDebug
[6cc87c0]31HandleNode *HeaderPtr;
[f450f2f]32#endif // VbyteDebug
33
34struct VbyteHeap {
[681e12f]35 int NoOfCompactions; // number of compactions of the byte area
36 int NoOfExtensions; // number of extensions in the size of the byte area
37 int NoOfReductions; // number of reductions in the size of the byte area
[f450f2f]38
[681e12f]39 int InitSize; // initial number of bytes in the byte-string area
40 int CurrSize; // current number of bytes in the byte-string area
41 char *StartVbyte; // pointer to the `st byte of the start of the byte-string area
42 char *EndVbyte; // pointer to the next byte after the end of the currently used portion of byte-string area
43 void *ExtVbyte; // pointer to the next byte after the end of the byte-string area
[f450f2f]44
[681e12f]45 HandleNode Header; // header node for handle list
[f450f2f]46}; // VbyteHeap
47
48
[681e12f]49static void compaction( VbyteHeap & ); // compaction of the byte area
50static void garbage( VbyteHeap &, int ); // garbage collect the byte area
[4e8df745]51static void extend( VbyteHeap &, int ); // extend the size of the byte area
52static void reduce( VbyteHeap &, int ); // reduce the size of the byte area
[f450f2f]53
[4e8df745]54static void ?{}( VbyteHeap &, size_t = 1000 );
55static void ^?{}( VbyteHeap & );
[94647b0b]56
[4e8df745]57static int ByteCmp( char *, int, int, char *, int, int ); // compare 2 blocks of bytes
58static char *VbyteAlloc( VbyteHeap &, int ); // allocate a block bytes in the heap
59static char *VbyteTryAdjustLast( VbyteHeap &, int );
[f450f2f]60
[4e8df745]61static void AddThisAfter( HandleNode &, HandleNode & );
62static void DeleteNode( HandleNode & );
63static void MoveThisAfter( HandleNode &, const HandleNode & ); // move current handle after parameter handle
[f450f2f]64
65
66// Allocate the storage for the variable sized area and intialize the heap variables.
67
[681e12f]68static void ?{}( VbyteHeap & s, size_t Size ) with(s) {
[f450f2f]69#ifdef VbyteDebug
[681e12f]70 serr | "enter:VbyteHeap::VbyteHeap, s:" | &s | " Size:" | Size;
[f450f2f]71#endif // VbyteDebug
72 NoOfCompactions = NoOfExtensions = NoOfReductions = 0;
73 InitSize = CurrSize = Size;
[08ed947]74 StartVbyte = EndVbyte = TEMP_ALLOC(char, CurrSize);
[f450f2f]75 ExtVbyte = (void *)( StartVbyte + CurrSize );
76 Header.flink = Header.blink = &Header;
[681e12f]77 Header.ulink = &s;
[f450f2f]78#ifdef VbyteDebug
79 HeaderPtr = &Header;
[681e12f]80 serr | "exit:VbyteHeap::VbyteHeap, s:" | &s;
[f450f2f]81#endif // VbyteDebug
82} // VbyteHeap
83
84
85// Release the dynamically allocated storage for the byte area.
86
[681e12f]87static void ^?{}( VbyteHeap & s ) with(s) {
[f450f2f]88 free( StartVbyte );
89} // ~VbyteHeap
90
91
92//######################### HandleNode #########################
93
94
95// Create a handle node. The handle is not linked into the handle list. This is the responsibilitiy of the handle
96// creator.
97
[681e12f]98static void ?{}( HandleNode & s ) with(s) {
[f450f2f]99#ifdef VbyteDebug
[681e12f]100 serr | "enter:HandleNode::HandleNode, s:" | &s;
[f450f2f]101#endif // VbyteDebug
102 s = 0;
103 lnth = 0;
104#ifdef VbyteDebug
[681e12f]105 serr | "exit:HandleNode::HandleNode, s:" | &s;
[f450f2f]106#endif // VbyteDebug
107} // HandleNode
108
109// Create a handle node. The handle is linked into the handle list at the end. This means that this handle will NOT be
110// in order by string address, but this is not a problem because a string with length zero does nothing during garbage
111// collection.
112
[681e12f]113static void ?{}( HandleNode & s, VbyteHeap & vh ) with(s) {
[f450f2f]114#ifdef VbyteDebug
[681e12f]115 serr | "enter:HandleNode::HandleNode, s:" | &s;
[f450f2f]116#endif // VbyteDebug
117 s = 0;
118 lnth = 0;
[0f781fb8]119 ulink = &vh;
[681e12f]120 AddThisAfter( s, *vh.Header.blink );
[f450f2f]121#ifdef VbyteDebug
[681e12f]122 serr | "exit:HandleNode::HandleNode, s:" | &s;
[f450f2f]123#endif // VbyteDebug
124} // HandleNode
125
126
127// Delete a node from the handle list by unchaining it from the list. If the handle node was allocated dynamically, it
128// is the responsibility of the creator to destroy it.
129
[681e12f]130static void ^?{}( HandleNode & s ) with(s) {
[f450f2f]131#ifdef VbyteDebug
[681e12f]132 serr | "enter:HandleNode::~HandleNode, s:" | & s;
[f450f2f]133 {
134 serr | nlOff;
135 serr | " lnth:" | lnth | " s:" | (void *)s | ",\"";
[9ca5e56]136 for ( i; lnth ) {
[f450f2f]137 serr | s[i];
138 } // for
139 serr | "\" flink:" | flink | " blink:" | blink | nl;
140 serr | nlOn;
141 }
142#endif // VbyteDebug
[681e12f]143 DeleteNode( s );
[f450f2f]144} // ~HandleNode
145
146
[0f781fb8]147//######################### String Sharing Context #########################
[f450f2f]148
[0f781fb8]149static string_sharectx * ambient_string_sharectx; // fickle top of stack
150static string_sharectx default_string_sharectx = {NEW_SHARING}; // stable bottom of stack
[f450f2f]151
[681e12f]152void ?{}( string_sharectx & s, StringSharectx_Mode mode ) with( s ) {
[0f781fb8]153 (older){ ambient_string_sharectx };
154 if ( mode == NEW_SHARING ) {
[804bf677]155 (activeHeap){ new( (size_t) 1000 ) };
[0f781fb8]156 } else {
157 verify( mode == NO_SHARING );
158 (activeHeap){ 0p };
159 }
[681e12f]160 ambient_string_sharectx = & s;
[0f781fb8]161}
162
[681e12f]163void ^?{}( string_sharectx & s ) with( s ) {
[0f781fb8]164 if ( activeHeap ) delete( activeHeap );
165
[681e12f]166 // unlink s from older-list starting from ambient_string_sharectx
167 // usually, s==ambient_string_sharectx and the loop runs zero times
[0f781fb8]168 string_sharectx *& c = ambient_string_sharectx;
[681e12f]169 while ( c != &s ) &c = &c->older; // find s
170 c = s.older; // unlink
[0f781fb8]171}
172
173//######################### String Resource #########################
174
175
176VbyteHeap * DEBUG_string_heap() {
177 assert( ambient_string_sharectx->activeHeap && "No sharing context is active" );
178 return ambient_string_sharectx->activeHeap;
179}
[6cc87c0]180
181size_t DEBUG_string_bytes_avail_until_gc( VbyteHeap * heap ) {
[06280ad]182 return ((char *)heap->ExtVbyte) - heap->EndVbyte;
[6cc87c0]183}
184
[7b0e8b7]185size_t DEBUG_string_bytes_in_heap( VbyteHeap * heap ) {
186 return heap->CurrSize;
187}
188
[6cc87c0]189const char * DEBUG_string_heap_start( VbyteHeap * heap ) {
190 return heap->StartVbyte;
191}
192
[f450f2f]193// Returns the size of the string in bytes
[681e12f]194size_t size(const string_res & s) with(s) {
[f450f2f]195 return Handle.lnth;
196}
197
198// Output operator
[681e12f]199ofstream & ?|?(ofstream & out, const string_res & s) {
[9ca5e56]200 // CFA string is NOT null terminated, so print exactly lnth characters in a minimum width of 0.
201 out | wd( 0, s.Handle.lnth, s.Handle.s ) | nonl;
[f450f2f]202 return out;
203}
204
[681e12f]205void ?|?(ofstream & out, const string_res & s) {
[9ca5e56]206 (ofstream &)(out | s); ends( out );
[f450f2f]207}
208
[d32679d5]209// Input operator
[681e12f]210ifstream & ?|?(ifstream & in, string_res & s) {
[d32679d5]211 // Reading into a temp before assigning to s is near zero overhead in typical cases because of sharing.
212 // If s is a substring of something larger, simple assignment takes care of that case correctly.
213 // But directly reading a variable amount of text into the middle of a larger context is not practical.
214 string_res temp;
215
216 // Read in chunks. Often, one chunk is enough. Keep the string that accumulates chunks last in the heap,
217 // so available room is rest of heap. When a chunk fills the heap, force growth then take the next chunk.
[0860d9c]218 for (bool cont = true; cont; ) {
219 cont = false;
220
[d32679d5]221 // Append dummy content to temp, forcing expansion when applicable (occurs always on subsequent loops)
222 // length 2 ensures room for at least one real char, plus scanf/pipe-cstr's null terminator
223 temp += "--";
224 assert( temp.Handle.ulink->EndVbyte == temp.Handle.s + temp.Handle.lnth ); // last in heap
225
226 // reset, to overwrite the appended "--"
227 temp.Handle.lnth -= 2;
228 temp.Handle.ulink->EndVbyte -= 2;
229
[0860d9c]230 // rest of heap is available to read into
[06280ad]231 int lenReadable = (char *)temp.Handle.ulink->ExtVbyte - temp.Handle.ulink->EndVbyte;
[0860d9c]232 assert (lenReadable >= 2);
[d32679d5]233
234 // get bytes
[0860d9c]235 try {
[37ceccb]236 *(temp.Handle.ulink->EndVbyte) = '\0'; // pre-assign empty cstring
[0860d9c]237 in | wdi( lenReadable, temp.Handle.ulink->EndVbyte );
[681e12f]238 } catch (cstring_length *) {
[0860d9c]239 cont = true;
240 }
[d32679d5]241 int lenWasRead = strlen(temp.Handle.ulink->EndVbyte);
242
243 // update metadata
244 temp.Handle.lnth += lenWasRead;
245 temp.Handle.ulink->EndVbyte += lenWasRead;
246 }
247
[37ceccb]248 if ( temp.Handle.lnth > 0 ) s = temp;
[d32679d5]249 return in;
250}
251
[681e12f]252void ?|?( ifstream & in, string_res & s ) {
253 (ifstream &)(in | s);
[ff56dd2e]254}
255
256ifstream & ?|?( ifstream & is, _Istream_Rstr f ) {
257 // .---------------,
258 // | | | | |...|0|0| null terminator and guard if missing
259 // `---------------'
260 enum { gwd = 128 + 1, wd = gwd - 1 }; // guard and unguard width
261 char cstr[gwd]; // read in chunks
262 bool cont = false;
263
264 _Istream_Cstr cf = { cstr, (_Istream_str_base)f };
265 if ( ! cf.flags.rwd ) cf.wd = wd;
266
267 cstr[wd] = '\0'; // guard null terminate string
268 try {
[37ceccb]269 cstr[0] = '\0'; // pre-assign as empty cstring
[ff56dd2e]270 is | cf;
271 } catch( cstring_length * ) {
272 cont = true;
273 } finally {
[681e12f]274 if ( ! cf.flags.ignore // ok to initialize string
275// && cstr[0] != '\0' // something was read
276 ) {
[37ceccb]277 *(f.s) = cstr;
278 }
[ff56dd2e]279 } // try
280 for ( ; cont; ) { // overflow read ?
281 cont = false;
282 try {
[37ceccb]283 cstr[0] = '\0'; // pre-assign as empty cstring
[ff56dd2e]284 is | cf;
285 } catch( cstring_length * ) {
286 cont = true; // continue not allowed
287 } finally {
[681e12f]288 if ( ! cf.flags.ignore && cstr[0] != '\0' ) { // something was read
[37ceccb]289 *(f.s) += cstr; // build string chunk at a time
290 }
[ff56dd2e]291 } // try
292 } // for
293 return is;
294} // ?|?
295
296void ?|?( ifstream & in, _Istream_Rstr f ) {
[f842032]297 (ifstream &)(in | f);
[ff56dd2e]298}
299
[d32679d5]300
[f450f2f]301// Empty constructor
[681e12f]302void ?{}(string_res & s) with(s) {
[804bf677]303 if( ambient_string_sharectx->activeHeap ) {
304 (Handle){ * ambient_string_sharectx->activeHeap };
305 (shareEditSet_owns_ulink){ false };
[2b30370]306 verify( Handle.s == 0p && Handle.lnth == 0 );
[804bf677]307 } else {
308 (Handle){ * new( (size_t) 10 ) }; // TODO: can I lazily avoid allocating for empty string
309 (shareEditSet_owns_ulink){ true };
[2b30370]310 Handle.s = Handle.ulink->StartVbyte;
311 verify( Handle.lnth == 0 );
[804bf677]312 }
[f450f2f]313 s.shareEditSet_prev = &s;
314 s.shareEditSet_next = &s;
315}
316
[06280ad]317static void eagerCopyCtorHelper(string_res & s, const char * rhs, size_t rhslnth) with(s) {
[804bf677]318 if( ambient_string_sharectx->activeHeap ) {
319 (Handle){ * ambient_string_sharectx->activeHeap };
320 (shareEditSet_owns_ulink){ false };
321 } else {
322 (Handle){ * new( rhslnth ) };
323 (shareEditSet_owns_ulink){ true };
324 }
[0f781fb8]325 Handle.s = VbyteAlloc(*Handle.ulink, rhslnth);
[f450f2f]326 Handle.lnth = rhslnth;
[08ed947]327 memmove( Handle.s, rhs, rhslnth );
[f450f2f]328 s.shareEditSet_prev = &s;
329 s.shareEditSet_next = &s;
330}
331
[4b3b352]332// Constructor from a raw buffer and size
[06280ad]333void ?{}(string_res & s, const char * rhs, size_t rhslnth) with(s) {
[4b3b352]334 eagerCopyCtorHelper(s, rhs, rhslnth);
335}
336
[fe18b46]337// private ctor (not in header): use specified heap (ignore ambient) and copy chars in
[06280ad]338void ?{}( string_res & s, VbyteHeap & heap, const char * rhs, size_t rhslnth ) with(s) {
[fe18b46]339 (Handle){ heap };
340 Handle.s = VbyteAlloc(*Handle.ulink, rhslnth);
341 Handle.lnth = rhslnth;
342 (s.shareEditSet_owns_ulink){ false };
[08ed947]343 memmove( Handle.s, rhs, rhslnth );
[fe18b46]344 s.shareEditSet_prev = &s;
345 s.shareEditSet_next = &s;
346}
347
[e8b3717]348
[f450f2f]349// General copy constructor
[e8b3717]350void ?{}(string_res & s, const string_res & s2, StrResInitMode mode, size_t start, size_t len ) {
[f450f2f]351
[e8b3717]352 size_t end = start + len;
[218096f]353 verify( start <= end && end <= s2.Handle.lnth );
354
[fe18b46]355 if (s2.Handle.ulink != ambient_string_sharectx->activeHeap && mode == COPY_VALUE) {
356 // crossing heaps (including private): copy eagerly
357 eagerCopyCtorHelper(s, s2.Handle.s + start, end - start);
358 verify(s.shareEditSet_prev == &s);
359 verify(s.shareEditSet_next == &s);
360 } else {
[4b3b352]361 (s.Handle){};
362 s.Handle.s = s2.Handle.s + start;
363 s.Handle.lnth = end - start;
[fe18b46]364 s.Handle.ulink = s2.Handle.ulink;
365
[4b3b352]366 AddThisAfter(s.Handle, s2.Handle ); // insert this handle after rhs handle
367 // ^ bug? skip others at early point in string
[fe18b46]368
[4b3b352]369 if (mode == COPY_VALUE) {
[fe18b46]370 verify(s2.Handle.ulink == ambient_string_sharectx->activeHeap);
371 // requested logical copy in same heap: defer copy until write
372
373 (s.shareEditSet_owns_ulink){ false };
374
[4b3b352]375 // make s alone in its shareEditSet
376 s.shareEditSet_prev = &s;
377 s.shareEditSet_next = &s;
378 } else {
379 verify( mode == SHARE_EDITS );
[fe18b46]380 // sharing edits with source forces same heap as source (ignore context)
381
382 (s.shareEditSet_owns_ulink){ s2.shareEditSet_owns_ulink };
[f450f2f]383
[4b3b352]384 // s2 is logically const but not implementation const
385 string_res & s2mod = (string_res &) s2;
[f450f2f]386
[4b3b352]387 // insert s after s2 on shareEditSet
388 s.shareEditSet_next = s2mod.shareEditSet_next;
389 s.shareEditSet_prev = &s2mod;
390 s.shareEditSet_next->shareEditSet_prev = &s;
391 s.shareEditSet_prev->shareEditSet_next = &s;
[6cc87c0]392 }
[f450f2f]393 }
[4b3b352]394}
[f450f2f]395
[681e12f]396static void assignEditSet(string_res & s, string_res * shareEditSetStartPeer, string_res * shareEditSetEndPeer,
[4b3b352]397 char * resultSesStart,
398 size_t resultSesLnth,
399 HandleNode * resultPadPosition, size_t bsize ) {
[6cc87c0]400
401 char * beforeBegin = shareEditSetStartPeer->Handle.s;
[681e12f]402 size_t beforeLen = s.Handle.s - beforeBegin;
[f450f2f]403
[681e12f]404 char * afterBegin = s.Handle.s + s.Handle.lnth;
[6cc87c0]405 size_t afterLen = shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - afterBegin;
[f450f2f]406
[681e12f]407 size_t oldLnth = s.Handle.lnth;
[f450f2f]408
[681e12f]409 s.Handle.s = resultSesStart + beforeLen;
410 s.Handle.lnth = bsize;
[6f7aff3]411 if (resultPadPosition)
[681e12f]412 MoveThisAfter( s.Handle, *resultPadPosition );
[f450f2f]413
414 // adjust all substring string and handle locations, and check if any substring strings are outside the new base string
[4b3b352]415 char *limit = resultSesStart + resultSesLnth;
[681e12f]416 for ( string_res * p = s.shareEditSet_next; p != &s; p = p->shareEditSet_next ) {
[218096f]417 verify (p->Handle.s >= beforeBegin);
[6cc87c0]418 if ( p->Handle.s >= afterBegin ) {
[218096f]419 verify ( p->Handle.s <= afterBegin + afterLen );
420 verify ( p->Handle.s + p->Handle.lnth <= afterBegin + afterLen );
[6cc87c0]421 // p starts after the edit
422 // take start and end as end-anchored
423 size_t startOffsetFromEnd = afterBegin + afterLen - p->Handle.s;
424 p->Handle.s = limit - startOffsetFromEnd;
425 // p->Handle.lnth unaffected
426 } else if ( p->Handle.s <= beforeBegin + beforeLen ) {
427 // p starts before, or at the start of, the edit
428 if ( p->Handle.s + p->Handle.lnth <= beforeBegin + beforeLen ) {
[f450f2f]429 // p ends before the edit
430 // take end as start-anchored too
431 // p->Handle.lnth unaffected
432 } else if ( p->Handle.s + p->Handle.lnth < afterBegin ) {
[6cc87c0]433 // p ends during the edit; p does not include the last character replaced
[f450f2f]434 // clip end of p to end at start of edit
435 p->Handle.lnth = beforeLen - ( p->Handle.s - beforeBegin );
436 } else {
437 // p ends after the edit
[218096f]438 verify ( p->Handle.s + p->Handle.lnth <= afterBegin + afterLen );
[f450f2f]439 // take end as end-anchored
440 // stretch-shrink p according to the edit
[681e12f]441 p->Handle.lnth += s.Handle.lnth;
[f450f2f]442 p->Handle.lnth -= oldLnth;
443 }
444 // take start as start-anchored
445 size_t startOffsetFromStart = p->Handle.s - beforeBegin;
[4b3b352]446 p->Handle.s = resultSesStart + startOffsetFromStart;
[6cc87c0]447 } else {
[218096f]448 verify ( p->Handle.s < afterBegin );
[f450f2f]449 // p starts during the edit
[218096f]450 verify( p->Handle.s + p->Handle.lnth >= beforeBegin + beforeLen );
[f450f2f]451 if ( p->Handle.s + p->Handle.lnth < afterBegin ) {
[6cc87c0]452 // p ends during the edit; p does not include the last character replaced
[f450f2f]453 // set p to empty string at start of edit
[681e12f]454 p->Handle.s = s.Handle.s;
[f450f2f]455 p->Handle.lnth = 0;
456 } else {
[6cc87c0]457 // p includes the end of the edit
[f450f2f]458 // clip start of p to start at end of edit
[6cc87c0]459 int charsToClip = afterBegin - p->Handle.s;
[681e12f]460 p->Handle.s = s.Handle.s + s.Handle.lnth;
[6cc87c0]461 p->Handle.lnth -= charsToClip;
[f450f2f]462 }
463 }
[6f7aff3]464 if (resultPadPosition)
465 MoveThisAfter( p->Handle, *resultPadPosition ); // move substring handle to maintain sorted order by string position
[4b3b352]466 }
467}
468
[681e12f]469// traverse the share-edit set (SES) to recover the range of a base string to which `s` belongs
470static void locateInShareEditSet( string_res & s, string_res *& shareEditSetStartPeer, string_res *& shareEditSetEndPeer ) {
471 shareEditSetStartPeer = & s;
472 shareEditSetEndPeer = & s;
473 for (string_res * editPeer = s.shareEditSet_next; editPeer != &s; editPeer = editPeer->shareEditSet_next) {
[4b3b352]474 if ( editPeer->Handle.s < shareEditSetStartPeer->Handle.s ) {
475 shareEditSetStartPeer = editPeer;
476 }
477 if ( shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth < editPeer->Handle.s + editPeer->Handle.lnth) {
478 shareEditSetEndPeer = editPeer;
479 }
[f450f2f]480 }
[d32679d5]481}
482
[06280ad]483static string_res & assign_(string_res & s, const char * buffer, size_t bsize, const string_res & valSrc) {
[d32679d5]484
485 string_res * shareEditSetStartPeer;
486 string_res * shareEditSetEndPeer;
[681e12f]487 locateInShareEditSet( s, shareEditSetStartPeer, shareEditSetEndPeer );
[4b3b352]488
489 verify( shareEditSetEndPeer->Handle.s >= shareEditSetStartPeer->Handle.s );
[0ca15b7]490 size_t origEditSetLength = shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - shareEditSetStartPeer->Handle.s;
[681e12f]491 verify( origEditSetLength >= s.Handle.lnth );
[4b3b352]492
[681e12f]493 if ( s.shareEditSet_owns_ulink ) { // assigning to private context
[6f7aff3]494 // ok to overwrite old value within LHS
495 char * prefixStartOrig = shareEditSetStartPeer->Handle.s;
[681e12f]496 int prefixLen = s.Handle.s - prefixStartOrig;
497 char * suffixStartOrig = s.Handle.s + s.Handle.lnth;
[6f7aff3]498 int suffixLen = shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth - suffixStartOrig;
499
[681e12f]500 int delta = bsize - s.Handle.lnth;
501 if ( char * oldBytes = VbyteTryAdjustLast( *s.Handle.ulink, delta ) ) {
[6f7aff3]502 // growing: copy from old to new
[681e12f]503 char * dest = VbyteAlloc( *s.Handle.ulink, origEditSetLength + delta );
[fe18b46]504 char *destCursor = dest; memcpy(destCursor, prefixStartOrig, prefixLen);
505 destCursor += prefixLen; memcpy(destCursor, buffer , bsize );
506 destCursor += bsize; memcpy(destCursor, suffixStartOrig, suffixLen);
[681e12f]507 assignEditSet(s, shareEditSetStartPeer, shareEditSetEndPeer,
[6f7aff3]508 dest,
[0ca15b7]509 origEditSetLength + delta,
[6f7aff3]510 0p, bsize);
511 free( oldBytes );
512 } else {
513 // room is already allocated in-place: bubble suffix and overwite middle
514 memmove( suffixStartOrig + delta, suffixStartOrig, suffixLen );
[681e12f]515 memcpy( s.Handle.s, buffer, bsize );
[6f7aff3]516
[681e12f]517 assignEditSet(s, shareEditSetStartPeer, shareEditSetEndPeer,
[6f7aff3]518 shareEditSetStartPeer->Handle.s,
[0ca15b7]519 origEditSetLength + delta,
[6f7aff3]520 0p, bsize);
521 }
[4b3b352]522
[6f7aff3]523 } else if ( // assigning to shared context
[681e12f]524 s.Handle.lnth == origEditSetLength && // overwriting entire run of SES
[6f7aff3]525 & valSrc && // sourcing from a managed string
[681e12f]526 valSrc.Handle.ulink == s.Handle.ulink ) { // sourcing from same heap
[4b3b352]527
[6f7aff3]528 // SES's result will only use characters from the source string => reuse source
[681e12f]529 assignEditSet(s, shareEditSetStartPeer, shareEditSetEndPeer,
[4b3b352]530 valSrc.Handle.s,
531 valSrc.Handle.lnth,
532 &((string_res&)valSrc).Handle, bsize);
533
534 } else {
[6f7aff3]535 // overwriting a proper substring of some string: mash characters from old and new together (copy on write)
536 // OR we are importing characters: need to copy eagerly (can't refer to source)
[4b3b352]537
538 // full string is from start of shareEditSetStartPeer thru end of shareEditSetEndPeer
[681e12f]539 // `s` occurs in the middle of it, to be replaced
[4b3b352]540 // build up the new text in `pasting`
541
542 string_res pasting = {
[681e12f]543 * s.Handle.ulink, // maintain same heap, regardless of context
[4b3b352]544 shareEditSetStartPeer->Handle.s, // start of SES
[681e12f]545 s.Handle.s - shareEditSetStartPeer->Handle.s }; // length of SES, before s
[4b3b352]546 append( pasting,
[681e12f]547 buffer, // start of replacement for s
548 bsize ); // length of replacement for s
[4b3b352]549 append( pasting,
[681e12f]550 s.Handle.s + s.Handle.lnth, // start of SES after s
[4b3b352]551 shareEditSetEndPeer->Handle.s + shareEditSetEndPeer->Handle.lnth -
[681e12f]552 (s.Handle.s + s.Handle.lnth) ); // length of SES, after s
[4b3b352]553
554 // The above string building can trigger compaction.
555 // The reference points (that are arguments of the string building) may move during that building.
[681e12f]556 // From s point on, they are stable.
[4b3b352]557
[681e12f]558 assignEditSet(s, shareEditSetStartPeer, shareEditSetEndPeer,
[4b3b352]559 pasting.Handle.s,
560 pasting.Handle.lnth,
561 &pasting.Handle, bsize);
562 }
[1733184]563
[681e12f]564 return s;
[4b3b352]565}
566
[e891349]567string_res & assign(string_res & s, const string_res & src, size_t maxlen) {
568 return assign_(s, src.Handle.s, min(src.Handle.lnth, maxlen), *0p);
569}
570
[06280ad]571string_res & assign(string_res & s, const char * buffer, size_t bsize) {
[681e12f]572 return assign_(s, buffer, bsize, *0p);
[f450f2f]573}
574
[681e12f]575string_res & ?=?(string_res & s, char c) {
576 return assign(s, &c, 1);
[d8d512e]577}
578
579// Copy assignment operator
[681e12f]580string_res & ?=?(string_res & s, const string_res & rhs) with( s ) {
581 return assign_(s, rhs.Handle.s, rhs.Handle.lnth, rhs);
[d8d512e]582}
583
[681e12f]584string_res & ?=?(string_res & s, string_res & rhs) with( s ) {
[f450f2f]585 const string_res & rhs2 = rhs;
[681e12f]586 return s = rhs2;
[f450f2f]587}
588
589
590// Destructor
[681e12f]591void ^?{}(string_res & s) with(s) {
[f450f2f]592 // much delegated to implied ^VbyteSM
593
594 // sever s from its share-edit peers, if any (four no-ops when already solo)
595 s.shareEditSet_prev->shareEditSet_next = s.shareEditSet_next;
596 s.shareEditSet_next->shareEditSet_prev = s.shareEditSet_prev;
[218096f]597 // s.shareEditSet_next = &s;
598 // s.shareEditSet_prev = &s;
[804bf677]599
600 if (shareEditSet_owns_ulink && s.shareEditSet_next == &s) { // last one out
601 delete( s.Handle.ulink );
602 }
[f450f2f]603}
604
605
606// Returns the character at the given index
607// With unicode support, this may be different from just the byte at the given
608// offset from the start of the string.
[681e12f]609char ?[?](const string_res & s, size_t index) with(s) {
[f450f2f]610 //TODO: Check if index is valid (no exceptions yet)
611 return Handle.s[index];
612}
613
[681e12f]614void assignAt(const string_res & s, size_t index, char val) {
[e8b3717]615 // caution: not tested (not reachable by string-api-coverage interface)
616 // equivalent form at string level is `s[index] = val`,
617 // which uses the overload that returns a length-1 string
618 string_res editZone = { s, SHARE_EDITS, index, 1 };
[218096f]619 assign(editZone, &val, 1);
620}
621
[d8d512e]622
[f450f2f]623///////////////////////////////////////////////////////////////////
[d8d512e]624// Concatenation
[f450f2f]625
[681e12f]626void append(string_res & str1, const char * buffer, size_t bsize) {
[4e8df745]627 size_t clnth = str1.Handle.lnth + bsize;
628 if ( str1.Handle.s + str1.Handle.lnth == buffer ) { // already juxtapose ?
[d8d512e]629 // no-op
[f450f2f]630 } else { // must copy some text
[4e8df745]631 if ( str1.Handle.s + str1.Handle.lnth == VbyteAlloc(*str1.Handle.ulink, 0) ) { // str1 at end of string area ?
[0f781fb8]632 VbyteAlloc( *str1.Handle.ulink, bsize ); // create room for 2nd part at the end of string area
[f450f2f]633 } else { // copy the two parts
[fe18b46]634 char * str1newBuf = VbyteAlloc( *str1.Handle.ulink, clnth );
635 char * str1oldBuf = str1.Handle.s; // must read after VbyteAlloc call in case it gs's
636 str1.Handle.s = str1newBuf;
[4e8df745]637 memcpy( str1.Handle.s, str1oldBuf, str1.Handle.lnth );
[f450f2f]638 } // if
[4e8df745]639 memcpy( str1.Handle.s + str1.Handle.lnth, buffer, bsize );
[f450f2f]640 } // if
641 str1.Handle.lnth = clnth;
642}
643
[681e12f]644void ?+=?(string_res & str1, const string_res & str2) {
[d8d512e]645 append( str1, str2.Handle.s, str2.Handle.lnth );
[f450f2f]646}
647
[e891349]648void append(string_res & str1, const string_res & str2, size_t maxlen) {
649 append( str1, str2.Handle.s, min(str2.Handle.lnth, maxlen) );
650}
651
[681e12f]652void ?+=?(string_res & s, char c) {
653 append( s, & c, 1 );
[f450f2f]654}
[e891349]655void ?+=?(string_res & s, const char * c) {
656 append( s, c, strlen(c) );
657}
[f450f2f]658
[38951c31]659///////////////////////////////////////////////////////////////////
660// Repetition
661
662void ?*=?(string_res & s, size_t factor) {
[e8b3717]663 string_res s2 = { s, COPY_VALUE };
[38951c31]664 s = "";
665 for (factor) s += s2;
666}
667
[f450f2f]668//////////////////////////////////////////////////////////
669// Comparisons
670
[681e12f]671int strcmp(const string_res & s1, const string_res & s2) {
[416b443]672 // return 0;
673 int ans1 = memcmp(s1.Handle.s, s2.Handle.s, min(s1.Handle.lnth, s2.Handle.lnth));
674 if (ans1 != 0) return ans1;
675 return s1.Handle.lnth - s2.Handle.lnth;
[f450f2f]676}
677
[681e12f]678bool ?==?(const string_res & s1, const string_res & s2) { return strcmp(s1, s2) == 0; }
679bool ?!=?(const string_res & s1, const string_res & s2) { return strcmp(s1, s2) != 0; }
680bool ?>? (const string_res & s1, const string_res & s2) { return strcmp(s1, s2) > 0; }
681bool ?>=?(const string_res & s1, const string_res & s2) { return strcmp(s1, s2) >= 0; }
682bool ?<=?(const string_res & s1, const string_res & s2) { return strcmp(s1, s2) <= 0; }
683bool ?<? (const string_res & s1, const string_res & s2) { return strcmp(s1, s2) < 0; }
[416b443]684
[06280ad]685int strcmp (const string_res & s1, const char * s2) {
[416b443]686 string_res s2x = s2;
[681e12f]687 return strcmp(s1, s2x);
[f450f2f]688}
[416b443]689
[06280ad]690bool ?==?(const string_res & s1, const char * s2) { return strcmp(s1, s2) == 0; }
691bool ?!=?(const string_res & s1, const char * s2) { return strcmp(s1, s2) != 0; }
692bool ?>? (const string_res & s1, const char * s2) { return strcmp(s1, s2) > 0; }
693bool ?>=?(const string_res & s1, const char * s2) { return strcmp(s1, s2) >= 0; }
694bool ?<=?(const string_res & s1, const char * s2) { return strcmp(s1, s2) <= 0; }
695bool ?<? (const string_res & s1, const char * s2) { return strcmp(s1, s2) < 0; }
[416b443]696
[06280ad]697int strcmp (const char * s1, const string_res & s2) {
[416b443]698 string_res s1x = s1;
[681e12f]699 return strcmp(s1x, s2);
[f450f2f]700}
701
[06280ad]702bool ?==?(const char * s1, const string_res & s2) { return strcmp(s1, s2) == 0; }
703bool ?!=?(const char * s1, const string_res & s2) { return strcmp(s1, s2) != 0; }
704bool ?>? (const char * s1, const string_res & s2) { return strcmp(s1, s2) > 0; }
705bool ?>=?(const char * s1, const string_res & s2) { return strcmp(s1, s2) >= 0; }
706bool ?<=?(const char * s1, const string_res & s2) { return strcmp(s1, s2) <= 0; }
707bool ?<? (const char * s1, const string_res & s2) { return strcmp(s1, s2) < 0; }
[416b443]708
709
[f450f2f]710
711//////////////////////////////////////////////////////////
712// Search
713
[681e12f]714bool contains(const string_res & s, char ch) {
[9ca5e56]715 for ( i; size(s) ) {
[f450f2f]716 if (s[i] == ch) return true;
717 }
718 return false;
719}
720
[681e12f]721int find(const string_res & s, char search) {
[08ed947]722 return findFrom(s, 0, search);
[f450f2f]723}
724
[681e12f]725int findFrom(const string_res & s, size_t fromPos, char search) {
[08ed947]726 // FIXME: This paricular overload (find of single char) is optimized to use memchr.
727 // The general overload (find of string, memchr applying to its first character) and `contains` should be adjusted to match.
728 char * searchFrom = s.Handle.s + fromPos;
729 size_t searchLnth = s.Handle.lnth - fromPos;
730 int searchVal = search;
731 char * foundAt = (char *) memchr(searchFrom, searchVal, searchLnth);
732 if (foundAt == 0p) return s.Handle.lnth;
733 else return foundAt - s.Handle.s;
734}
[f450f2f]735
[681e12f]736int find(const string_res & s, const string_res & search) {
[08ed947]737 return findFrom(s, 0, search);
738}
739
[681e12f]740int findFrom(const string_res & s, size_t fromPos, const string_res & search) {
[08ed947]741 return findFrom(s, fromPos, search.Handle.s, search.Handle.lnth);
[f450f2f]742}
743
[06280ad]744int find(const string_res & s, const char * search) {
[08ed947]745 return findFrom(s, 0, search);
746}
[06280ad]747int findFrom(const string_res & s, size_t fromPos, const char * search) {
[08ed947]748 return findFrom(s, fromPos, search, strlen(search));
[f450f2f]749}
750
[06280ad]751int find(const string_res & s, const char * search, size_t searchsize) {
[08ed947]752 return findFrom(s, 0, search, searchsize);
753}
754
[06280ad]755int findFrom(const string_res & s, size_t fromPos, const char * search, size_t searchsize) {
[08ed947]756
757 /* Remaining implementations essentially ported from Sunjay's work */
758
759
[f450f2f]760 // FIXME: This is a naive algorithm. We probably want to switch to someting
761 // like Boyer-Moore in the future.
762 // https://en.wikipedia.org/wiki/String_searching_algorithm
763
764 // Always find the empty string
765 if (searchsize == 0) {
766 return 0;
767 }
768
[9ca5e56]769 for ( i; fromPos ~ s.Handle.lnth ) {
[f450f2f]770 size_t remaining = s.Handle.lnth - i;
771 // Never going to find the search string if the remaining string is
772 // smaller than search
773 if (remaining < searchsize) {
774 break;
775 }
776
777 bool matched = true;
[9ca5e56]778 for ( j; searchsize ) {
[f450f2f]779 if (search[j] != s.Handle.s[i + j]) {
780 matched = false;
781 break;
782 }
783 }
784 if (matched) {
785 return i;
786 }
787 }
788
789 return s.Handle.lnth;
790}
791
[681e12f]792bool includes(const string_res & s, const string_res & search) {
[f450f2f]793 return includes(s, search.Handle.s, search.Handle.lnth);
794}
795
[06280ad]796bool includes(const string_res & s, const char * search) {
[f450f2f]797 return includes(s, search, strlen(search));
798}
799
[06280ad]800bool includes(const string_res & s, const char * search, size_t searchsize) {
[f450f2f]801 return find(s, search, searchsize) < s.Handle.lnth;
802}
803
[681e12f]804bool startsWith(const string_res & s, const string_res & prefix) {
[f450f2f]805 return startsWith(s, prefix.Handle.s, prefix.Handle.lnth);
806}
807
[06280ad]808bool startsWith(const string_res & s, const char * prefix) {
[f450f2f]809 return startsWith(s, prefix, strlen(prefix));
810}
811
[06280ad]812bool startsWith(const string_res & s, const char * prefix, size_t prefixsize) {
[f450f2f]813 if (s.Handle.lnth < prefixsize) {
814 return false;
815 }
816 return memcmp(s.Handle.s, prefix, prefixsize) == 0;
817}
818
[681e12f]819bool endsWith(const string_res & s, const string_res & suffix) {
[f450f2f]820 return endsWith(s, suffix.Handle.s, suffix.Handle.lnth);
821}
822
[06280ad]823bool endsWith(const string_res & s, const char * suffix) {
[f450f2f]824 return endsWith(s, suffix, strlen(suffix));
825}
826
[06280ad]827bool endsWith(const string_res & s, const char * suffix, size_t suffixsize) {
[f450f2f]828 if (s.Handle.lnth < suffixsize) {
829 return false;
830 }
831 // Amount to offset the bytes pointer so that we are comparing the end of s
832 // to suffix. s.bytes + offset should be the first byte to compare against suffix
833 size_t offset = s.Handle.lnth - suffixsize;
834 return memcmp(s.Handle.s + offset, suffix, suffixsize) == 0;
835}
836
837 /* Back to Mike's work */
838
839
840///////////////////////////////////////////////////////////////////////////
841// charclass, include, exclude
842
[681e12f]843void ?{}( charclass_res & s, const string_res & chars) {
844 (s){ chars.Handle.s, chars.Handle.lnth };
[f450f2f]845}
846
[681e12f]847void ?{}( charclass_res & s, const char * chars ) {
848 (s){ chars, strlen(chars) };
[f450f2f]849}
850
[681e12f]851void ?{}( charclass_res & s, const char * chars, size_t charssize ) {
852 (s.chars){ chars, charssize };
[f450f2f]853 // now sort it ?
854}
855
[681e12f]856void ^?{}( charclass_res & s ) {
857 ^(s.chars){};
[f450f2f]858}
859
860static bool test( const charclass_res & mask, char c ) {
861 // instead, use sorted char list?
862 return contains( mask.chars, c );
863}
864
[681e12f]865int exclude(const string_res & s, const charclass_res & mask) {
[9ca5e56]866 for ( i; size(s) ) {
[f450f2f]867 if ( test(mask, s[i]) ) return i;
868 }
869 return size(s);
870}
871
[681e12f]872int include(const string_res & s, const charclass_res & mask) {
[9ca5e56]873 for ( i; size(s) ) {
[f450f2f]874 if ( ! test(mask, s[i]) ) return i;
875 }
876 return size(s);
877}
878
879//######################### VbyteHeap "implementation" #########################
880
881
882// Add a new HandleNode node n after the current HandleNode node.
883
[681e12f]884static void AddThisAfter( HandleNode & s, HandleNode & n ) with(s) {
[f450f2f]885#ifdef VbyteDebug
[681e12f]886 serr | "enter:AddThisAfter, s:" | &s | " n:" | &n;
[f450f2f]887#endif // VbyteDebug
[08ed947]888 // Performance note: we are on the critical path here. MB has ensured that the verifies don't contribute to runtime (are compiled away, like they're supposed to be).
[0f781fb8]889 verify( n.ulink != 0p );
[681e12f]890 verify( s.ulink == n.ulink );
[f450f2f]891 flink = n.flink;
892 blink = &n;
[681e12f]893 n.flink->blink = &s;
894 n.flink = &s;
[f450f2f]895#ifdef VbyteDebug
896 {
897 serr | "HandleList:";
898 serr | nlOff;
899 for ( HandleNode *ni = HeaderPtr->flink; ni != HeaderPtr; ni = ni->flink ) {
900 serr | "\tnode:" | ni | " lnth:" | ni->lnth | " s:" | (void *)ni->s | ",\"";
[9ca5e56]901 for ( i; ni->lnth ) {
[f450f2f]902 serr | ni->s[i];
903 } // for
904 serr | "\" flink:" | ni->flink | " blink:" | ni->blink | nl;
905 } // for
906 serr | nlOn;
907 }
908 serr | "exit:AddThisAfter";
909#endif // VbyteDebug
910} // AddThisAfter
911
912
913// Delete the current HandleNode node.
914
[681e12f]915static void DeleteNode( HandleNode & s ) with(s) {
[f450f2f]916#ifdef VbyteDebug
[681e12f]917 serr | "enter:DeleteNode, s:" | &s;
[f450f2f]918#endif // VbyteDebug
919 flink->blink = blink;
920 blink->flink = flink;
921#ifdef VbyteDebug
922 serr | "exit:DeleteNode";
923#endif // VbyteDebug
924} // DeleteNode
925
926
927// Allocates specified storage for a string from byte-string area. If not enough space remains to perform the
[6f7aff3]928// allocation, the garbage collection routine is called.
[f450f2f]929
[681e12f]930static char * VbyteAlloc( VbyteHeap & s, int size ) with(s) {
[f450f2f]931#ifdef VbyteDebug
932 serr | "enter:VbyteAlloc, size:" | size;
933#endif // VbyteDebug
934 uintptr_t NoBytes;
935 char *r;
936
937 NoBytes = ( uintptr_t )EndVbyte + size;
[681e12f]938 if ( NoBytes > ( uintptr_t )ExtVbyte ) { // enough room for new byte-string ?
939 garbage( s, size ); // firer up the garbage collector
[4e8df745]940 verify( (( uintptr_t )EndVbyte + size) <= ( uintptr_t )ExtVbyte && "garbage run did not free up required space" );
[f450f2f]941 } // if
942 r = EndVbyte;
943 EndVbyte += size;
944#ifdef VbyteDebug
945 serr | "exit:VbyteAlloc, r:" | (void *)r | " EndVbyte:" | (void *)EndVbyte | " ExtVbyte:" | ExtVbyte;
946#endif // VbyteDebug
947 return r;
948} // VbyteAlloc
949
950
[6f7aff3]951// Adjusts the last allocation in this heap by delta bytes, or resets this heap to be able to offer
952// new allocations of its original size + delta bytes. Positive delta means bigger;
953// negative means smaller. A null return indicates that the original heap location has room for
954// the requested growth. A non-null return indicates that copying to a new location is required
955// but has not been done; the returned value is the old heap storage location; `this` heap is
956// modified to reference the new location. In the copy-requred case, the caller should use
957// VbyteAlloc to claim the new space, while doing optimal copying from old to new, then free old.
958
[681e12f]959static char * VbyteTryAdjustLast( VbyteHeap & s, int delta ) with(s) {
[6f7aff3]960 if ( ( uintptr_t )EndVbyte + delta <= ( uintptr_t )ExtVbyte ) {
961 // room available
962 EndVbyte += delta;
963 return 0p;
964 }
965
966 char *oldBytes = StartVbyte;
967
968 NoOfExtensions += 1;
969 CurrSize *= 2;
[08ed947]970 StartVbyte = EndVbyte = TEMP_ALLOC(char, CurrSize);
[6f7aff3]971 ExtVbyte = StartVbyte + CurrSize;
972
973 return oldBytes;
974}
975
976
[f450f2f]977// Move an existing HandleNode node h somewhere after the current HandleNode node so that it is in ascending order by
978// the address in the byte string area.
979
[681e12f]980static void MoveThisAfter( HandleNode & s, const HandleNode & h ) with(s) {
[f450f2f]981#ifdef VbyteDebug
[681e12f]982 serr | "enter:MoveThisAfter, s:" | & s | " h:" | & h;
[f450f2f]983#endif // VbyteDebug
[0f781fb8]984 verify( h.ulink != 0p );
[681e12f]985 verify( s.ulink == h.ulink );
[f450f2f]986 if ( s < h.s ) { // check argument values
987 // serr | "VbyteSM: Error - Cannot move byte string starting at:" | s | " after byte string starting at:"
988 // | ( h->s ) | " and keep handles in ascending order";
989 // exit(-1 );
[218096f]990 verify( 0 && "VbyteSM: Error - Cannot move byte strings as requested and keep handles in ascending order");
[f450f2f]991 } // if
992
993 HandleNode *i;
994 for ( i = h.flink; i->s != 0 && s > ( i->s ); i = i->flink ); // find the position for this node after h
[681e12f]995 if ( & s != i->blink ) {
996 DeleteNode( s );
997 AddThisAfter( s, *i->blink );
[f450f2f]998 } // if
999#ifdef VbyteDebug
1000 {
1001 serr | "HandleList:";
1002 serr | nlOff;
1003 for ( HandleNode *n = HeaderPtr->flink; n != HeaderPtr; n = n->flink ) {
1004 serr | "\tnode:" | n | " lnth:" | n->lnth | " s:" | (void *)n->s | ",\"";
[9ca5e56]1005 for ( i; n->lnth ) {
1006 serr | n->s[i];
[f450f2f]1007 } // for
[6cc87c0]1008 serr | "\" flink:" | n->flink | " blink:" | n->blink | nl;
[f450f2f]1009 } // for
1010 serr | nlOn;
1011 }
[6cc87c0]1012 serr | "exit:MoveThisAfter";
[f450f2f]1013#endif // VbyteDebug
1014} // MoveThisAfter
1015
1016
1017
1018
1019
1020//######################### VbyteHeap #########################
1021
1022// Compare two byte strings in the byte-string area. The routine returns the following values:
1023//
1024// 1 => Src1-byte-string > Src2-byte-string
1025// 0 => Src1-byte-string = Src2-byte-string
1026// -1 => Src1-byte-string < Src2-byte-string
1027
[0f781fb8]1028int ByteCmp( char *Src1, int Src1Start, int Src1Lnth, char *Src2, int Src2Start, int Src2Lnth ) {
[f450f2f]1029#ifdef VbyteDebug
1030 serr | "enter:ByteCmp, Src1Start:" | Src1Start | " Src1Lnth:" | Src1Lnth | " Src2Start:" | Src2Start | " Src2Lnth:" | Src2Lnth;
1031#endif // VbyteDebug
1032 int cmp;
1033
1034 CharZip: for ( int i = 0; ; i += 1 ) {
1035 if ( i == Src2Lnth - 1 ) {
1036 for ( ; ; i += 1 ) {
1037 if ( i == Src1Lnth - 1 ) {
1038 cmp = 0;
1039 break CharZip;
1040 } // exit
1041 if ( Src1[Src1Start + i] != ' ') {
1042 // SUSPECTED BUG: this could be be why Peter got the bug report about == " " (why is this case here at all?)
1043 cmp = 1;
1044 break CharZip;
1045 } // exit
1046 } // for
1047 } // exit
1048 if ( i == Src1Lnth - 1 ) {
1049 for ( ; ; i += 1 ) {
1050 if ( i == Src2Lnth - 1 ) {
1051 cmp = 0;
1052 break CharZip;
1053 } // exit
1054 if ( Src2[Src2Start + i] != ' ') {
1055 cmp = -1;
1056 break CharZip;
1057 } // exit
1058 } // for
1059 } // exit
1060 if ( Src2[Src2Start + i] != Src1[Src1Start+ i]) {
1061 cmp = Src1[Src1Start + i] > Src2[Src2Start + i] ? 1 : -1;
1062 break CharZip;
1063 } // exit
1064 } // for
1065#ifdef VbyteDebug
1066 serr | "exit:ByteCmp, cmp:" | cmp;
1067#endif // VbyteDebug
1068 return cmp;
1069} // ByteCmp
1070
1071
1072// The compaction moves all of the byte strings currently in use to the beginning of the byte-string area and modifies
1073// the handles to reflect the new positions of the byte strings. Compaction assumes that the handle list is in ascending
1074// order by pointers into the byte-string area. The strings associated with substrings do not have to be moved because
1075// the containing string has been moved. Hence, they only require that their string pointers be adjusted.
1076
[681e12f]1077void compaction(VbyteHeap & s) with(s) {
[f450f2f]1078 HandleNode *h;
1079 char *obase, *nbase, *limit;
1080
1081 NoOfCompactions += 1;
1082 EndVbyte = StartVbyte;
1083 h = Header.flink; // ignore header node
[9ca5e56]1084 for () {
[94647b0b]1085 memmove( EndVbyte, h->s, h->lnth );
[f450f2f]1086 obase = h->s;
1087 h->s = EndVbyte;
1088 nbase = h->s;
1089 EndVbyte += h->lnth;
1090 limit = obase + h->lnth;
1091 h = h->flink;
1092
1093 // check if any substrings are allocated within a string
1094
[9ca5e56]1095 for () {
[f450f2f]1096 if ( h == &Header ) break; // end of header list ?
1097 if ( h->s >= limit ) break; // outside of current string ?
1098 h->s = nbase + (( uintptr_t )h->s - ( uintptr_t )obase );
1099 h = h->flink;
1100 } // for
1101 if ( h == &Header ) break; // end of header list ?
1102 } // for
1103} // compaction
1104
1105
[08ed947]1106static double heap_expansion_freespace_threshold = 0.1; // default inherited from prior work: expand heap when less than 10% "free" (i.e. garbage)
1107 // probably an unreasonable default, but need to assess early-round tests on changing it
1108
1109void TUNING_set_string_heap_liveness_threshold( double val ) {
1110 heap_expansion_freespace_threshold = 1.0 - val;
1111}
1112
1113
[f450f2f]1114// Garbage determines the amount of free space left in the heap and then reduces, leave the same, or extends the size of
1115// the heap. The heap is then compacted in the existing heap or into the newly allocated heap.
1116
[681e12f]1117void garbage(VbyteHeap & s, int minreq ) with(s) {
[f450f2f]1118#ifdef VbyteDebug
1119 serr | "enter:garbage";
1120 {
1121 serr | "HandleList:";
1122 for ( HandleNode *n = Header.flink; n != &Header; n = n->flink ) {
1123 serr | nlOff;
1124 serr | "\tnode:" | n | " lnth:" | n->lnth | " s:" | (void *)n->s | ",\"";
[9ca5e56]1125 for ( i; n->lnth ) {
[f450f2f]1126 serr | n->s[i];
1127 } // for
1128 serr | nlOn;
1129 serr | "\" flink:" | n->flink | " blink:" | n->blink;
1130 } // for
1131 }
1132#endif // VbyteDebug
1133 int AmountUsed, AmountFree;
1134
1135 AmountUsed = 0;
1136 for ( HandleNode *i = Header.flink; i != &Header; i = i->flink ) { // calculate amount of byte area used
1137 AmountUsed += i->lnth;
1138 } // for
1139 AmountFree = ( uintptr_t )ExtVbyte - ( uintptr_t )StartVbyte - AmountUsed;
1140
[08ed947]1141 if ( ( double ) AmountFree < ( CurrSize * heap_expansion_freespace_threshold ) || AmountFree < minreq ) { // free space less than threshold or not enough to serve cur request
[f450f2f]1142
[681e12f]1143 extend( s, max( CurrSize, minreq ) ); // extend the heap
[f450f2f]1144
1145 // Peter says, "This needs work before it should be used."
1146 // } else if ( AmountFree > CurrSize / 2 ) { // free space greater than 3 times the initial allocation ?
1147 // reduce(( AmountFree / CurrSize - 3 ) * CurrSize ); // reduce the memory
1148
[97c215f]1149 // `extend` implies a `compaction` during the copy
1150
1151 } else {
[681e12f]1152 compaction(s); // in-place
[97c215f]1153 }// if
[f450f2f]1154#ifdef VbyteDebug
1155 {
1156 serr | "HandleList:";
1157 for ( HandleNode *n = Header.flink; n != &Header; n = n->flink ) {
1158 serr | nlOff;
1159 serr | "\tnode:" | n | " lnth:" | n->lnth | " s:" | (void *)n->s | ",\"";
[9ca5e56]1160 for ( i; n->lnth ) {
[f450f2f]1161 serr | n->s[i];
1162 } // for
1163 serr | nlOn;
1164 serr | "\" flink:" | n->flink | " blink:" | n->blink;
1165 } // for
1166 }
1167 serr | "exit:garbage";
1168#endif // VbyteDebug
1169} // garbage
1170
1171#undef VbyteDebug
1172
1173
1174
1175// Extend the size of the byte-string area by creating a new area and copying the old area into it. The old byte-string
1176// area is deleted.
1177
[681e12f]1178void extend( VbyteHeap & s, int size ) with (s) {
[f450f2f]1179#ifdef VbyteDebug
1180 serr | "enter:extend, size:" | size;
1181#endif // VbyteDebug
1182 char *OldStartVbyte;
1183
1184 NoOfExtensions += 1;
1185 OldStartVbyte = StartVbyte; // save previous byte area
1186
1187 CurrSize += size > InitSize ? size : InitSize; // minimum extension, initial size
[08ed947]1188 StartVbyte = EndVbyte = TEMP_ALLOC(char, CurrSize);
[f450f2f]1189 ExtVbyte = (void *)( StartVbyte + CurrSize );
[681e12f]1190 compaction(s); // copy from old heap to new & adjust pointers to new heap
[7b0e8b7]1191 free( OldStartVbyte ); // release old heap
[f450f2f]1192#ifdef VbyteDebug
1193 serr | "exit:extend, CurrSize:" | CurrSize;
1194#endif // VbyteDebug
1195} // extend
1196
[7b0e8b7]1197//WIP
1198#if 0
[f450f2f]1199
1200// Extend the size of the byte-string area by creating a new area and copying the old area into it. The old byte-string
1201// area is deleted.
1202
1203void VbyteHeap::reduce( int size ) {
1204#ifdef VbyteDebug
1205 serr | "enter:reduce, size:" | size;
1206#endif // VbyteDebug
1207 char *OldStartVbyte;
1208
1209 NoOfReductions += 1;
1210 OldStartVbyte = StartVbyte; // save previous byte area
1211
1212 CurrSize -= size;
1213 StartVbyte = EndVbyte = new char[CurrSize];
1214 ExtVbyte = (void *)( StartVbyte + CurrSize );
1215 compaction(); // copy from old heap to new & adjust pointers to new heap
1216 delete OldStartVbyte; // release old heap
1217#ifdef VbyteDebug
1218 serr | "exit:reduce, CurrSize:" | CurrSize;
1219#endif // VbyteDebug
1220} // reduce
1221
1222
1223#endif
Note: See TracBrowser for help on using the repository browser.