Changeset 20c2ade


Ignore:
Timestamp:
Sep 11, 2024, 3:49:00 PM (2 months ago)
Author:
Andrew Beach <ajbeach@…>
Branches:
master
Children:
5b95e67
Parents:
6325bd4
Message:

Parse string literals to get their length, taking into account escape sequences. Getting this from the lexer would avoid redundent work, but that is not set up. This corrects some typeof evaluations which lead to incorrect sizeof expressions.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • src/Parser/ExpressionNode.cpp

    r6325bd4 r20c2ade  
    461461} // build_constantChar
    462462
     463static bool isoctal( char ch ) {
     464        return ('0' <= ch && ch <= '7');
     465}
     466
     467static bool ishexadecimal( char ch ) {
     468        return (('0' <= ch && ch <= '9')
     469                || ('a' <= ch && ch <= 'f')
     470                || ('A' <= ch && ch <= 'F'));
     471}
     472
     473// A "sequence" is the series of characters in a character/string literal
     474// that becomes a single character value in the runtime value.
     475static size_t sequenceLength( const std::string & str, size_t pos ) {
     476        // Most "sequences" are just a single character, filter those out:
     477        if ( '\\' != str[pos] ) return 1;
     478        switch ( str[pos + 1] ) {
     479        // Simple Escape Sequence (\_ where _ is one of the following):
     480        case '\'': case '\"': case '?': case '\\':
     481        case 'a': case 'b': case 'f': case 'n': case 'r': case 't': case 'v':
     482        // GCC Escape Sequence (as simple, just some different letters):
     483        case 'e':
     484                return 2;
     485        // Numeric Escape Sequence (\___ where _ is 1-3 octal digits):
     486        case '0': case '1': case '2': case '3':
     487        case '4': case '5': case '6': case '7':
     488                return ( !isoctal( str[pos + 2] ) ) ? 2 :
     489                       ( !isoctal( str[pos + 3] ) ) ? 3 : 4;
     490        // Numeric Escape Sequence (\x_ where _ is 1 or more hexadecimal digits):
     491        case 'x': {
     492                size_t length = 2;
     493                while ( ishexadecimal( str[pos + length] ) ) ++length;
     494                return length;
     495        }
     496        // Uniersal Character Name (\u____ where _ is 4 decimal digits):
     497        case 'u':
     498                return 6;
     499        // Uniersal Character Name (\U________ where _ is 8 decimal digits):
     500        case 'U':
     501                return 10;
     502        default:
     503                assertf( false, "Unknown escape sequence (start %c).", str[pos] );
     504                return 1;
     505        }
     506}
     507
    463508ast::Expr * build_constantStr(
    464509                const CodeLocation & location,
     
    485530                strtype = new ast::BasicType( ast::BasicKind::Char );
    486531        } // switch
     532
     533        // The dimension value of the type is equal to the number of "sequences"
     534        // not including the openning and closing quotes in the literal plus 1
     535        // for the implicit null terminator.
     536        size_t dimension = 1;
     537        for ( size_t pos = 1 ; pos < str.size() - 1 ;
     538                        pos += sequenceLength( str, pos ) ) {
     539                dimension += 1;
     540        }
     541
    487542        ast::ArrayType * at = new ast::ArrayType(
    488543                strtype,
    489                 // Length is adjusted: +1 for '\0' and -2 for '"'
    490                 ast::ConstantExpr::from_ulong( location, str.size() + 1 - 2 ),
     544                ast::ConstantExpr::from_ulong( location, dimension ),
    491545                ast::FixedLen,
    492546                ast::DynamicDim );
Note: See TracChangeset for help on using the changeset viewer.