Changeset 0860d9c


Ignore:
Timestamp:
Oct 5, 2023, 4:17:14 PM (17 months ago)
Author:
Michael Brooks <mlbrooks@…>
Branches:
master
Children:
4d860ea3
Parents:
b67b632
Message:

Fix read-to-variable-length-string cases when internal buffer fills.

Also fix read-to-cstring ability to give no-exception cases when an entire buffer fills.

The added test cases run, and fail, when run against prior libcfa.
Doing so illustrates a CFA-string-level bug.
Doing so illustrates a C-string-level changed semantics.

At the CFA-string level, the bug was, when reading strings of just the right length,
what should be two reads ("abc" then "def") gets mashed into one ("abcdef").
These cases are clearly bugs because a test program that just echoes chuncks of delimeted input would do so inaccurately.
They're just hard to drive because the relevant chunk lengths are implementation-dependent, and sometimes big.

At the C-string level, the semantic change concerns when to throw the cstring_length exception.
By this change, make the original semantics,
"An exception means the maximum number of characters was read," into
"An exception means that if the buffer were larger, then more characters would have been read."

The added test cases cover the respective stop conditions for manipulator state "%s", include, exclude, getline, and getline/delimiter.

Files:
10 edited

Legend:

Unmodified
Added
Removed
  • TabularUnified libcfa/src/collections/string_res.cfa

    rb67b632 r0860d9c  
    218218    // Read in chunks.  Often, one chunk is enough.  Keep the string that accumulates chunks last in the heap,
    219219    // so available room is rest of heap.  When a chunk fills the heap, force growth then take the next chunk.
    220     for (;;) {
     220    for (bool cont = true; cont; ) {
     221        cont = false;
     222
    221223        // Append dummy content to temp, forcing expansion when applicable (occurs always on subsequent loops)
    222224        // length 2 ensures room for at least one real char, plus scanf/pipe-cstr's null terminator
     
    228230        temp.Handle.ulink->EndVbyte -= 2;
    229231
    230         // rest of heap, less 1 byte for null terminator, is available to read into
    231         int lenReadable = (char*)temp.Handle.ulink->ExtVbyte - temp.Handle.ulink->EndVbyte - 1;
    232         assert (lenReadable >= 1);
     232        // rest of heap is available to read into
     233        int lenReadable = (char*)temp.Handle.ulink->ExtVbyte - temp.Handle.ulink->EndVbyte;
     234        assert (lenReadable >= 2);
    233235
    234236        // get bytes
    235         in | wdi( lenReadable + 1, lenReadable, temp.Handle.ulink->EndVbyte );
     237        try {
     238            in | wdi( lenReadable, temp.Handle.ulink->EndVbyte );
     239        } catch (cstring_length*) {
     240            cont = true;
     241        }
    236242        int lenWasRead = strlen(temp.Handle.ulink->EndVbyte);
    237243
     
    239245        temp.Handle.lnth += lenWasRead;
    240246        temp.Handle.ulink->EndVbyte += lenWasRead;
    241 
    242       if (lenWasRead < lenReadable) break;
    243247    }
    244248
  • TabularUnified libcfa/src/iostream.cfa

    rb67b632 r0860d9c  
    2222#include <float.h>                                                                              // DBL_DIG, LDBL_DIG
    2323#include <complex.h>                                                                    // creal, cimag
     24#include <ctype.h>                                                                              // isspace
    2425//#include <stdio.h>
    2526
     
    2930extern char *strcpy (char *__restrict __dest, const char *__restrict __src) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__nonnull__ (1, 2)));
    3031extern void *memcpy (void *__restrict __dest, const void *__restrict __src, size_t __n) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__nonnull__ (1, 2)));
     32extern char *strchr(const char *str, int ch);
    3133} // extern "C"
    3234
     
    991993                        // wd is buffer bytes available (for input chars + null terminator)
    992994                        // rwd is count of input chars
    993                         int rwd = f.flags.rwd ? f.wd : (f.wd - 1);
     995                        int rwd;
     996                        if (f.flags.rwd) {
     997                                verify (f.wd >= 0);
     998                                rwd = f.wd;
     999                        } else {
     1000                                verify (f.wd >= 1);
     1001                                rwd = f.wd - 1;
     1002                        } // if
    9941003                        start += sprintf( &fmtstr[start], "%d", rwd );
    9951004                }
     
    10181027                //fprintf( stderr, "KK %s %zd %d %c %s\n", fmtstr, len, check, f.s[check], f.s );
    10191028
    1020                 if (! f.flags.ignore && ! f.flags.rwd && f.s[check] != '\0' ) // sentinel overwritten ?
    1021                         throw (cstring_length){ &cstring_length_vt };
     1029                if ( ! f.flags.ignore && ! f.flags.rwd && f.s[check] != '\0' ) { // sentinel overwritten ?
     1030                        // buffer filled, but would we have kept going?
     1031                        if ( ! eof( is ) ) {
     1032                                char peek;
     1033                                fmt( is, "%c", &peek );
     1034                                ungetc( is, peek );
     1035                                bool hasMore;
     1036                                if (f.flags.delimiter) { // getline
     1037                                        hasMore = (peek != f.delimiter[0]);
     1038                                } else if (f.scanset) { // incl/excl
     1039                                        bool peekMatch = strchr(f.scanset, peek) != 0p;
     1040                                        hasMore = f.flags.inex ? (!peekMatch) : (peekMatch);
     1041                                } else { // %s
     1042                                        hasMore = !isspace(peek);
     1043                                }
     1044                                if (hasMore) throw (cstring_length){ &cstring_length_vt };
     1045                        } // if
     1046                } // if
    10221047
    10231048                if ( f.flags.delimiter ) {                                              // getline ?
  • TabularUnified tests/collections/.expect/string-istream-manip.txt

    rb67b632 r0860d9c  
     1preS1 0123456
     2preS1 x
     3preS2 01234567
     4preS2 x
     5preS3 012345678
     6preS3 x
     7preS4 0123456789
     8preS4 x
     9preSMN1 ...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...0123456
     10preSMN1 x
     11preSMN2 ...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...01234567
     12preSMN2 x
     13preSMN3 ...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...012345678
     14preSMN3 x
     15preSMN4 ...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...0123456789
     16preSMN4 x
     17preRMN1 ...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...0123456
     18preRMN1 x
     19preRMN2 ...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...01234567
     20preRMN2 x
     21preRMN3 ...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...012345678
     22preRMN3 x
     23preRMN4 ...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...0123456789
     24preRMN4 x
     25preSMI1 "...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...       "
     26preSMI1 "x"
     27preSMI2 "...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...        "
     28preSMI2 "x"
     29preSMI3 "...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...         "
     30preSMI3 "x"
     31preSMI4 "...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...          "
     32preSMI4 "x"
     33preSME1 "...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...       "
     34preSME1 "x"
     35preSME2 "...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...        "
     36preSME2 "x"
     37preSME3 "...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...         "
     38preSME3 "x"
     39preSME4 "...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...          "
     40preSME4 "x"
     41preSMG1 ...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...0123456
     42preSMG1 x
     43preSMG2 ...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...01234567
     44preSMG2 x
     45preSMG3 ...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...012345678
     46preSMG3 x
     47preSMG4 ...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...0123456789
     48preSMG4 x
     49preSMD1 ...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...0123456
     50preSMD1 x
     51preSMD2 ...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...01234567
     52preSMD2 x
     53preSMD3 ...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...012345678
     54preSMD3 x
     55preSMD4 ...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...0123456789
     56preSMD4 x
    1571 yyyyyyyyyyyyyyyyyyyy
    2582 abcxxx
  • TabularUnified tests/collections/.in/string-istream-manip.txt

    rb67b632 r0860d9c  
     10123456 x
     201234567 x
     3012345678 x
     40123456789 x
     5...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...0123456 x
     6...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...01234567 x
     7...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...012345678 x
     8...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...0123456789 x
     9...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...0123456 x
     10...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...01234567 x
     11...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...012345678 x
     12...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...0123456789 x
     13...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...       -x-
     14...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...        -x-
     15...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...         -x-
     16...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...          -x-
     17...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...       -x-
     18...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...        -x-
     19...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...         -x-
     20...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...          -x-
     21...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...0123456
     22x
     23...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...01234567
     24x
     25...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...012345678
     26x
     27...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...0123456789
     28x
     29...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...0123456@x@
     30...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...01234567@x@
     31...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...012345678@x@
     32...:...:...:...|...:...:...:...|...:...:...:...|...:...:...:...#...:...:...:...|...:...:...:...|...:...:...:...|...:...0123456789@x@
    133abc
    234cccccb
  • TabularUnified tests/collections/string-istream-manip.cfa

    rb67b632 r0860d9c  
    33#include <collections/string.hfa>
    44#include <collections/string_res.hfa>
     5#include <stdio.h>
     6
     7// No-op manipulators.
     8// Temporary hack while there are two code paths in the string implementation.
     9// (One for reading plain strings, the other for reading via a manipulator.)
     10// The test cases that use plainjane(-) are exercising the via-manipulator code path,
     11// just with trivial manipulation.
     12static _Istream_Sstr plainjane( string     & s )  { return (_Istream_Sstr)@{  s, {{0p}, -1, {.flags.rwd : false}} }; }
     13static _Istream_Rstr plainjane( string_res & s )  { return (_Istream_Rstr)@{ &s, {{0p}, -1, {.flags.rwd : false}} }; }
     14
     15static void forceStringHeapFreeSpaceTo(int desiredSize) {
     16    for (1_000_000) {
     17        string x = "a";
     18        (void)x;
     19      if (desiredSize == DEBUG_string_bytes_avail_until_gc(DEBUG_string_heap())) return;
     20    }
     21    sout | "Unable to force size" | desiredSize | "in 1,000,000 tries";
     22}
    523
    624int main() {
     25    // These "pre" cases deal with issues analogous to the "pre" cases of io/manipulatorsInput.
     26    // The acceptance criterion is simpler but driving the cases is harder.
     27    // The tests just read strings and echo what they read; acceptance of simple echoing assures
     28    // no spurious splitting merging.
     29    // The lengths of the strings are chosen to match white-box knowledge of when the string layer
     30    // has tor drive the cstring layer through a second iteration:
     31    //  - for no-manip, lengths are near the room at end of string heap
     32    //    (chosen target size of 9 showed the original bug on preS2, aligned with the other cases)
     33    //  - for manip, lengths are near the auxiliary buffer size of 128
     34    // Only first case repeats for string_res; rest run only from the passthru string layer.
     35    // Similarly, the manipulator breadth isn't checked at the cstring layer either.
     36    {
     37        // S: string, no manipulator
     38        void echoTillX(const char * casename) {
     39            string s;
     40            do {
     41                forceStringHeapFreeSpaceTo(9);
     42                sin | s;
     43                sout | casename | s;
     44            } while ( size(s) > 0 && s[size(s)-1] != 'x' );
     45        }
     46        echoTillX("preS1");
     47        echoTillX("preS2");
     48        echoTillX("preS3");
     49        echoTillX("preS4");
     50    }
     51    {
     52        // SMN: string, manipulator for no-op
     53        void echoTillX(const char * casename) {
     54            string s;
     55            do {
     56                sin | plainjane( s );
     57                sout | casename | s;
     58            } while ( size(s) > 0 && s[size(s)-1] != 'x' );
     59        }
     60        echoTillX("preSMN1");
     61        echoTillX("preSMN2");
     62        echoTillX("preSMN3");
     63        echoTillX("preSMN4");
     64    }
     65    {
     66        // RMN: string_res, manipulator for no-op
     67        void echoTillX(const char * casename) {
     68            string_res s;
     69            do {
     70                sin | plainjane( s );
     71                sout | casename | s;
     72            } while ( size(s) > 0 && s[size(s)-1] != 'x' );
     73        }
     74        echoTillX("preRMN1");
     75        echoTillX("preRMN2");
     76        echoTillX("preRMN3");
     77        echoTillX("preRMN4");
     78    }
     79    {
     80        // SMI: string, manipulator `incl`
     81        void echoTillX(const char * casename) {
     82            string s;
     83            do {
     84                sin | skip("-\n");
     85                sin | incl( ".:|# x", s );
     86                sout | casename | " \"" | s | "\"";
     87            } while ( size(s) > 0 && s[size(s)-1] != 'x' );
     88        }
     89        echoTillX("preSMI1");
     90        echoTillX("preSMI2");
     91        echoTillX("preSMI3");
     92        echoTillX("preSMI4");
     93    }
     94    {
     95        // SME: string, manipulator `excl`
     96        void echoTillX(const char * casename) {
     97            string s;
     98            do {
     99                sin | skip("-\n");
     100                sin | excl( "-\n", s );
     101                sout | casename | " \"" | s | "\"";
     102            } while ( size(s) > 0 && s[size(s)-1] != 'x' );
     103        }
     104        echoTillX("preSME1");
     105        echoTillX("preSME2");
     106        echoTillX("preSME3");
     107        echoTillX("preSME4");
     108    }
     109    sin | skip("-\n");
     110    {
     111        // SMG: string, manipulator `getline`
     112        void echoTillX(const char * casename) {
     113            string s;
     114            do {
     115                sin | getline( s );
     116                sout | casename | s;
     117            } while ( size(s) > 0 && s[size(s)-1] != 'x' );
     118        }
     119        echoTillX("preSMG1");
     120        echoTillX("preSMG2");
     121        echoTillX("preSMG3");
     122        echoTillX("preSMG4");
     123    }
     124    {
     125        // SMD: string, manipulator (`getline` with custom) delimiter
     126        void echoTillX(const char * casename) {
     127            string s;
     128            do {
     129                sin | getline( s, '@' );
     130                sout | casename | s;
     131            } while ( size(s) > 0 && s[size(s)-1] != 'x' );
     132            sin | skip(" \n");
     133        }
     134        echoTillX("preSMD1");
     135        echoTillX("preSMD2");
     136        echoTillX("preSMD3");
     137        echoTillX("preSMD4");
     138    }
     139
    7140    /* Keep harmonized with io/manipulatorsInput */
    8141    {
     
    31164                sin | "\n";
    32165        }
     166    // Full repeat on string_res layer assures the full manipulator vocabulary is supported there.
    33167    {
    34168        string_res s = "yyyyyyyyyyyyyyyyyyyy";
  • TabularUnified tests/io/.expect/manipulatorsInput.arm64.txt

    rb67b632 r0860d9c  
    11pre1 "123456", canary ok
    2 pre2a "1234567", exception occurred, canary ok
    3 pre2b "89", canary ok
     2pre2 "1234567", canary ok
     3pre3a "1234567", exception occurred, canary ok
     4pre3b "8", canary ok
     5pre4a "1234567", exception occurred, canary ok
     6pre4b "89", canary ok
    471 yyyyyyyyyyyyyyyyyyyy
    582 abcxxx
  • TabularUnified tests/io/.expect/manipulatorsInput.x64.txt

    rb67b632 r0860d9c  
    11pre1 "123456", canary ok
    2 pre2a "1234567", exception occurred, canary ok
    3 pre2b "89", canary ok
     2pre2 "1234567", canary ok
     3pre3a "1234567", exception occurred, canary ok
     4pre3b "8", canary ok
     5pre4a "1234567", exception occurred, canary ok
     6pre4b "89", canary ok
    471 yyyyyyyyyyyyyyyyyyyy
    582 abcxxx
  • TabularUnified tests/io/.expect/manipulatorsInput.x86.txt

    rb67b632 r0860d9c  
    11pre1 "123456", canary ok
    2 pre2a "1234567", exception occurred, canary ok
    3 pre2b "89", canary ok
     2pre2 "1234567", canary ok
     3pre3a "1234567", exception occurred, canary ok
     4pre3b "8", canary ok
     5pre4a "1234567", exception occurred, canary ok
     6pre4b "89", canary ok
    471 yyyyyyyyyyyyyyyyyyyy
    582 abcxxx
  • TabularUnified tests/io/.in/manipulatorsInput.txt

    rb67b632 r0860d9c  
    11123456
     21234567
     312345678
    24123456789
    35abc
  • TabularUnified tests/io/manipulatorsInput.cfa

    rb67b632 r0860d9c  
    4545                }
    4646
    47                 rep("pre1");
    48                 rep("pre2a");
    49                 rep("pre2b");
     47                rep("pre1");    // 123456     |  123456
     48                rep("pre2");    // 1234567    |  1234567
     49                rep("pre3a");   // 12345678   |  1234567
     50                rep("pre3b");   //            |  8
     51                rep("pre4a");   // 123456789  |  1234567
     52                rep("pre4b");   //            |  89
     53
    5054                scanf("\n");  // next test does not start with %s so does not tolerate leading whitespace
    5155        }
Note: See TracChangeset for help on using the changeset viewer.