Changeset b1e63ac5 for src/Parser/lex.ll


Ignore:
Timestamp:
Jul 4, 2017, 9:40:16 AM (8 years ago)
Author:
Rob Schluntz <rschlunt@…>
Branches:
ADT, aaron-thesis, arm-eh, ast-experimental, cleanup-dtors, deferred_resn, demangler, enum, forall-pointer-decay, jacob/cs343-translation, jenkins-sandbox, master, new-ast, new-ast-unique-expr, new-env, no_list, persistent-indexer, pthread-emulation, qualifiedEnum, resolv-new, with_gc
Children:
208e5be
Parents:
9c951e3 (diff), f7cb0bc (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge branch 'master' into references

File:
1 edited

Legend:

Unmodified
Added
Removed
  • src/Parser/lex.ll

    r9c951e3 rb1e63ac5  
    55 * file "LICENCE" distributed with Cforall.
    66 *
    7  * lex.l --
     7 * lex.ll --
    88 *
    99 * Author           : Peter A. Buhr
    1010 * Created On       : Sat Sep 22 08:58:10 2001
    1111 * Last Modified By : Peter A. Buhr
    12  * Last Modified On : Mon Mar 13 08:36:17 2017
    13  * Update Count     : 506
     12 * Last Modified On : Wed Jun 28 21:03:45 2017
     13 * Update Count     : 529
    1414 */
    1515
     
    2727#include <cstdio>                                                                               // FILENAME_MAX
    2828
    29 #include "lex.h"
    30 #include "parser.h"                                                                             // YACC generated definitions based on C++ grammar
    3129#include "ParseNode.h"
    3230#include "TypedefTable.h"
     
    7775                                // numeric constants, CFA: '_' in constant
    7876hex_quad {hex}("_"?{hex}){3}
    79 integer_suffix "_"?(([uU][lL]?)|([uU]("ll"|"LL")?)|([lL][uU]?)|("ll"|"LL")[uU]?)
     77integer_suffix "_"?(([uU](("ll"|"LL"|[lL])[iI]|[iI]?("ll"|"LL"|[lL])?))|([iI](("ll"|"LL"|[lL])[uU]|[uU]?("ll"|"LL"|[lL])?))|(("ll"|"LL"|[lL])([iI][uU]|[uU]?[iI]?)))
    8078
    8179octal_digits ({octal})|({octal}({octal}|"_")*{octal})
     
    9189
    9290decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal})
    93 real_decimal {decimal_digits}"."
    94 real_fraction "."{decimal_digits}
    95 real_constant {decimal_digits}?{real_fraction}
     91real_decimal {decimal_digits}"."{exponent}?{floating_suffix}?
     92real_fraction "."{decimal_digits}{exponent}?{floating_suffix}?
     93real_constant {decimal_digits}{real_fraction}
    9694exponent "_"?[eE]"_"?[+-]?{decimal_digits}
    97                                 // GCC: D (double), DL (long double) and iI (imaginary) suffixes
    98 floating_suffix "_"?([fFdDlL][iI]?|"DL"|[iI][lLfFdD]?)
    99                                 //floating_suffix "_"?([fFdD]|[lL]|[D][L])|([iI][lLfFdD])|([lLfFdD][iI]))
     95                                // GCC: D (double) and iI (imaginary) suffixes, and DL (long double)
     96floating_suffix "_"?([fFdDlL][iI]?|[iI][lLfFdD]?|"DL")
    10097floating_constant (({real_constant}{exponent}?)|({decimal_digits}{exponent})){floating_suffix}?
    10198
     
    236233long                    { KEYWORD_RETURN(LONG); }
    237234lvalue                  { KEYWORD_RETURN(LVALUE); }                             // CFA
    238 monitor         { KEYWORD_RETURN(MONITOR); }                    // CFA
     235monitor                 { KEYWORD_RETURN(MONITOR); }                    // CFA
    239236mutex                   { KEYWORD_RETURN(MUTEX); }                              // CFA
    240237_Noreturn               { KEYWORD_RETURN(NORETURN); }                   // C11
     
    378375"?"{op_binary_over}"?"  { IDENTIFIER_RETURN(); }                // binary
    379376        /*
    380           This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the string "*?*?"
    381           can be lexed as "*"/"?*?" or "*?"/"*?". Since it is common practise to put a unary operator juxtaposed
    382           to an identifier, e.g., "*i", users will be annoyed if they cannot do this with respect to operator
    383           identifiers. Even with this special hack, there are 5 general cases that cannot be handled. The first
    384           case is for the function-call identifier "?()":
    385 
    386           int * ?()();  // declaration: space required after '*'
    387           * ?()();      // expression: space required after '*'
    388 
    389           Without the space, the string "*?()" is ambiguous without N character look ahead; it requires scanning
    390           ahead to determine if there is a '(', which is the start of an argument/parameter list.
    391 
    392           The 4 remaining cases occur in expressions:
    393 
    394           i++?i:0;              // space required before '?'
    395           i--?i:0;              // space required before '?'
    396           i?++i:0;              // space required after '?'
    397           i?--i:0;              // space required after '?'
    398 
    399           In the first two cases, the string "i++?" is ambiguous, where this string can be lexed as "i"/"++?" or
    400           "i++"/"?"; it requires scanning ahead to determine if there is a '(', which is the start of an argument
    401           list.  In the second two cases, the string "?++x" is ambiguous, where this string can be lexed as
    402           "?++"/"x" or "?"/"++x"; it requires scanning ahead to determine if there is a '(', which is the start of
    403           an argument list.
     377          This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the string "*?*?"  can be
     378          lexed as "*?"/"*?" or "*"/"?*?". Since it is common practise to put a unary operator juxtaposed to an identifier,
     379          e.g., "*i", users will be annoyed if they cannot do this with respect to operator identifiers. Therefore, there is
     380          a lexical look-ahead for the second case, with backtracking to return the leading unary operator and then
     381          reparsing the trailing operator identifier.  Otherwise a space is needed between the unary operator and operator
     382          identifier to disambiguate this common case.
     383
     384          A similar issue occurs with the dereference, *?(...), and routine-call, ?()(...) identifiers.  The ambiguity
     385          occurs when the deference operator has no parameters, *?() and *?()(...), requiring arbitrary whitespace
     386          look-ahead for the routine-call parameter-list to disambiguate.  However, the dereference operator must have a
     387          parameter/argument to dereference *?(...).  Hence, always interpreting the string *?() as * ?() does not preclude
     388          any meaningful program.
     389
     390          The remaining cases are with the increment/decrement operators and conditional expression:
     391
     392          i++? ...(...);
     393          i?++ ...(...);
     394
     395          requiring arbitrary whitespace look-ahead for the operator parameter-list, even though that interpretation is an
     396      incorrect expression (juxtaposed identifiers).  Therefore, it is necessary to disambiguate these cases with a
     397      space:
     398
     399          i++ ? i : 0;
     400          i? ++i : 0;
    404401        */
    405 {op_unary}"?"({op_unary_pre_post}|"[?]"|{op_binary_over}"?") {
     402{op_unary}"?"({op_unary_pre_post}|"()"|"[?]"|{op_binary_over}"?") {
    406403        // 1 or 2 character unary operator ?
    407404        int i = yytext[1] == '?' ? 1 : 2;
Note: See TracChangeset for help on using the changeset viewer.