Ignore:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • src/Parser/lex.ll

    ra67b60e rbd4d011  
    55 * file "LICENCE" distributed with Cforall.
    66 *
    7  * lex.ll --
     7 * lex.l --
    88 *
    99 * Author           : Peter A. Buhr
    1010 * Created On       : Sat Sep 22 08:58:10 2001
    1111 * Last Modified By : Peter A. Buhr
    12  * Last Modified On : Wed Jun 28 21:03:45 2017
    13  * Update Count     : 529
     12 * Last Modified On : Mon Mar 13 08:36:17 2017
     13 * Update Count     : 506
    1414 */
    1515
     
    2727#include <cstdio>                                                                               // FILENAME_MAX
    2828
     29#include "lex.h"
     30#include "parser.h"                                                                             // YACC generated definitions based on C++ grammar
    2931#include "ParseNode.h"
    3032#include "TypedefTable.h"
     
    7577                                // numeric constants, CFA: '_' in constant
    7678hex_quad {hex}("_"?{hex}){3}
    77 integer_suffix "_"?(([uU](("ll"|"LL"|[lL])[iI]|[iI]?("ll"|"LL"|[lL])?))|([iI](("ll"|"LL"|[lL])[uU]|[uU]?("ll"|"LL"|[lL])?))|(("ll"|"LL"|[lL])([iI][uU]|[uU]?[iI]?)))
     79integer_suffix "_"?(([uU][lL]?)|([uU]("ll"|"LL")?)|([lL][uU]?)|("ll"|"LL")[uU]?)
    7880
    7981octal_digits ({octal})|({octal}({octal}|"_")*{octal})
     
    8991
    9092decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal})
    91 real_decimal {decimal_digits}"."{exponent}?{floating_suffix}?
    92 real_fraction "."{decimal_digits}{exponent}?{floating_suffix}?
    93 real_constant {decimal_digits}{real_fraction}
     93real_decimal {decimal_digits}"."
     94real_fraction "."{decimal_digits}
     95real_constant {decimal_digits}?{real_fraction}
    9496exponent "_"?[eE]"_"?[+-]?{decimal_digits}
    95                                 // GCC: D (double) and iI (imaginary) suffixes, and DL (long double)
    96 floating_suffix "_"?([fFdDlL][iI]?|[iI][lLfFdD]?|"DL")
     97                                // GCC: D (double), DL (long double) and iI (imaginary) suffixes
     98floating_suffix "_"?([fFdDlL][iI]?|"DL"|[iI][lLfFdD]?)
     99                                //floating_suffix "_"?([fFdD]|[lL]|[D][L])|([iI][lLfFdD])|([lLfFdD][iI]))
    97100floating_constant (({real_constant}{exponent}?)|({decimal_digits}{exponent})){floating_suffix}?
    98101
     
    233236long                    { KEYWORD_RETURN(LONG); }
    234237lvalue                  { KEYWORD_RETURN(LVALUE); }                             // CFA
    235 monitor                 { KEYWORD_RETURN(MONITOR); }                    // CFA
     238monitor         { KEYWORD_RETURN(MONITOR); }                    // CFA
    236239mutex                   { KEYWORD_RETURN(MUTEX); }                              // CFA
    237240_Noreturn               { KEYWORD_RETURN(NORETURN); }                   // C11
     
    375378"?"{op_binary_over}"?"  { IDENTIFIER_RETURN(); }                // binary
    376379        /*
    377           This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the string "*?*?"  can be
    378           lexed as "*?"/"*?" or "*"/"?*?". Since it is common practise to put a unary operator juxtaposed to an identifier,
    379           e.g., "*i", users will be annoyed if they cannot do this with respect to operator identifiers. Therefore, there is
    380           a lexical look-ahead for the second case, with backtracking to return the leading unary operator and then
    381           reparsing the trailing operator identifier.  Otherwise a space is needed between the unary operator and operator
    382           identifier to disambiguate this common case.
    383 
    384           A similar issue occurs with the dereference, *?(...), and routine-call, ?()(...) identifiers.  The ambiguity
    385           occurs when the deference operator has no parameters, *?() and *?()(...), requiring arbitrary whitespace
    386           look-ahead for the routine-call parameter-list to disambiguate.  However, the dereference operator must have a
    387           parameter/argument to dereference *?(...).  Hence, always interpreting the string *?() as * ?() does not preclude
    388           any meaningful program.
    389 
    390           The remaining cases are with the increment/decrement operators and conditional expression:
    391 
    392           i++? ...(...);
    393           i?++ ...(...);
    394 
    395           requiring arbitrary whitespace look-ahead for the operator parameter-list, even though that interpretation is an
    396       incorrect expression (juxtaposed identifiers).  Therefore, it is necessary to disambiguate these cases with a
    397       space:
    398 
    399           i++ ? i : 0;
    400           i? ++i : 0;
     380          This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the string "*?*?"
     381          can be lexed as "*"/"?*?" or "*?"/"*?". Since it is common practise to put a unary operator juxtaposed
     382          to an identifier, e.g., "*i", users will be annoyed if they cannot do this with respect to operator
     383          identifiers. Even with this special hack, there are 5 general cases that cannot be handled. The first
     384          case is for the function-call identifier "?()":
     385
     386          int * ?()();  // declaration: space required after '*'
     387          * ?()();      // expression: space required after '*'
     388
     389          Without the space, the string "*?()" is ambiguous without N character look ahead; it requires scanning
     390          ahead to determine if there is a '(', which is the start of an argument/parameter list.
     391
     392          The 4 remaining cases occur in expressions:
     393
     394          i++?i:0;              // space required before '?'
     395          i--?i:0;              // space required before '?'
     396          i?++i:0;              // space required after '?'
     397          i?--i:0;              // space required after '?'
     398
     399          In the first two cases, the string "i++?" is ambiguous, where this string can be lexed as "i"/"++?" or
     400          "i++"/"?"; it requires scanning ahead to determine if there is a '(', which is the start of an argument
     401          list.  In the second two cases, the string "?++x" is ambiguous, where this string can be lexed as
     402          "?++"/"x" or "?"/"++x"; it requires scanning ahead to determine if there is a '(', which is the start of
     403          an argument list.
    401404        */
    402 {op_unary}"?"({op_unary_pre_post}|"()"|"[?]"|{op_binary_over}"?") {
     405{op_unary}"?"({op_unary_pre_post}|"[?]"|{op_binary_over}"?") {
    403406        // 1 or 2 character unary operator ?
    404407        int i = yytext[1] == '?' ? 1 : 2;
Note: See TracChangeset for help on using the changeset viewer.