Ignore:
Timestamp:
Oct 5, 2023, 4:17:14 PM (9 months ago)
Author:
Michael Brooks <mlbrooks@…>
Branches:
master
Children:
4d860ea3
Parents:
b67b632
Message:

Fix read-to-variable-length-string cases when internal buffer fills.

Also fix read-to-cstring ability to give no-exception cases when an entire buffer fills.

The added test cases run, and fail, when run against prior libcfa.
Doing so illustrates a CFA-string-level bug.
Doing so illustrates a C-string-level changed semantics.

At the CFA-string level, the bug was, when reading strings of just the right length,
what should be two reads ("abc" then "def") gets mashed into one ("abcdef").
These cases are clearly bugs because a test program that just echoes chuncks of delimeted input would do so inaccurately.
They're just hard to drive because the relevant chunk lengths are implementation-dependent, and sometimes big.

At the C-string level, the semantic change concerns when to throw the cstring_length exception.
By this change, make the original semantics,
"An exception means the maximum number of characters was read," into
"An exception means that if the buffer were larger, then more characters would have been read."

The added test cases cover the respective stop conditions for manipulator state "%s", include, exclude, getline, and getline/delimiter.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • tests/collections/string-istream-manip.cfa

    rb67b632 r0860d9c  
    33#include <collections/string.hfa>
    44#include <collections/string_res.hfa>
     5#include <stdio.h>
     6
     7// No-op manipulators.
     8// Temporary hack while there are two code paths in the string implementation.
     9// (One for reading plain strings, the other for reading via a manipulator.)
     10// The test cases that use plainjane(-) are exercising the via-manipulator code path,
     11// just with trivial manipulation.
     12static _Istream_Sstr plainjane( string     & s )  { return (_Istream_Sstr)@{  s, {{0p}, -1, {.flags.rwd : false}} }; }
     13static _Istream_Rstr plainjane( string_res & s )  { return (_Istream_Rstr)@{ &s, {{0p}, -1, {.flags.rwd : false}} }; }
     14
     15static void forceStringHeapFreeSpaceTo(int desiredSize) {
     16    for (1_000_000) {
     17        string x = "a";
     18        (void)x;
     19      if (desiredSize == DEBUG_string_bytes_avail_until_gc(DEBUG_string_heap())) return;
     20    }
     21    sout | "Unable to force size" | desiredSize | "in 1,000,000 tries";
     22}
    523
    624int main() {
     25    // These "pre" cases deal with issues analogous to the "pre" cases of io/manipulatorsInput.
     26    // The acceptance criterion is simpler but driving the cases is harder.
     27    // The tests just read strings and echo what they read; acceptance of simple echoing assures
     28    // no spurious splitting merging.
     29    // The lengths of the strings are chosen to match white-box knowledge of when the string layer
     30    // has tor drive the cstring layer through a second iteration:
     31    //  - for no-manip, lengths are near the room at end of string heap
     32    //    (chosen target size of 9 showed the original bug on preS2, aligned with the other cases)
     33    //  - for manip, lengths are near the auxiliary buffer size of 128
     34    // Only first case repeats for string_res; rest run only from the passthru string layer.
     35    // Similarly, the manipulator breadth isn't checked at the cstring layer either.
     36    {
     37        // S: string, no manipulator
     38        void echoTillX(const char * casename) {
     39            string s;
     40            do {
     41                forceStringHeapFreeSpaceTo(9);
     42                sin | s;
     43                sout | casename | s;
     44            } while ( size(s) > 0 && s[size(s)-1] != 'x' );
     45        }
     46        echoTillX("preS1");
     47        echoTillX("preS2");
     48        echoTillX("preS3");
     49        echoTillX("preS4");
     50    }
     51    {
     52        // SMN: string, manipulator for no-op
     53        void echoTillX(const char * casename) {
     54            string s;
     55            do {
     56                sin | plainjane( s );
     57                sout | casename | s;
     58            } while ( size(s) > 0 && s[size(s)-1] != 'x' );
     59        }
     60        echoTillX("preSMN1");
     61        echoTillX("preSMN2");
     62        echoTillX("preSMN3");
     63        echoTillX("preSMN4");
     64    }
     65    {
     66        // RMN: string_res, manipulator for no-op
     67        void echoTillX(const char * casename) {
     68            string_res s;
     69            do {
     70                sin | plainjane( s );
     71                sout | casename | s;
     72            } while ( size(s) > 0 && s[size(s)-1] != 'x' );
     73        }
     74        echoTillX("preRMN1");
     75        echoTillX("preRMN2");
     76        echoTillX("preRMN3");
     77        echoTillX("preRMN4");
     78    }
     79    {
     80        // SMI: string, manipulator `incl`
     81        void echoTillX(const char * casename) {
     82            string s;
     83            do {
     84                sin | skip("-\n");
     85                sin | incl( ".:|# x", s );
     86                sout | casename | " \"" | s | "\"";
     87            } while ( size(s) > 0 && s[size(s)-1] != 'x' );
     88        }
     89        echoTillX("preSMI1");
     90        echoTillX("preSMI2");
     91        echoTillX("preSMI3");
     92        echoTillX("preSMI4");
     93    }
     94    {
     95        // SME: string, manipulator `excl`
     96        void echoTillX(const char * casename) {
     97            string s;
     98            do {
     99                sin | skip("-\n");
     100                sin | excl( "-\n", s );
     101                sout | casename | " \"" | s | "\"";
     102            } while ( size(s) > 0 && s[size(s)-1] != 'x' );
     103        }
     104        echoTillX("preSME1");
     105        echoTillX("preSME2");
     106        echoTillX("preSME3");
     107        echoTillX("preSME4");
     108    }
     109    sin | skip("-\n");
     110    {
     111        // SMG: string, manipulator `getline`
     112        void echoTillX(const char * casename) {
     113            string s;
     114            do {
     115                sin | getline( s );
     116                sout | casename | s;
     117            } while ( size(s) > 0 && s[size(s)-1] != 'x' );
     118        }
     119        echoTillX("preSMG1");
     120        echoTillX("preSMG2");
     121        echoTillX("preSMG3");
     122        echoTillX("preSMG4");
     123    }
     124    {
     125        // SMD: string, manipulator (`getline` with custom) delimiter
     126        void echoTillX(const char * casename) {
     127            string s;
     128            do {
     129                sin | getline( s, '@' );
     130                sout | casename | s;
     131            } while ( size(s) > 0 && s[size(s)-1] != 'x' );
     132            sin | skip(" \n");
     133        }
     134        echoTillX("preSMD1");
     135        echoTillX("preSMD2");
     136        echoTillX("preSMD3");
     137        echoTillX("preSMD4");
     138    }
     139
    7140    /* Keep harmonized with io/manipulatorsInput */
    8141    {
     
    31164                sin | "\n";
    32165        }
     166    // Full repeat on string_res layer assures the full manipulator vocabulary is supported there.
    33167    {
    34168        string_res s = "yyyyyyyyyyyyyyyyyyyy";
Note: See TracChangeset for help on using the changeset viewer.