/*
 * Cforall Version 1.0.0 Copyright (C) 2015 University of Waterloo
 *
 * The contents of this file are covered under the licence agreement in the
 * file "LICENCE" distributed with Cforall.
 * 
 * lex.l -- 
 * 
 * Author           : Peter A. Buhr
 * Created On       : Sat Sep 22 08:58:10 2001
 * Last Modified By : Peter A. Buhr
 * Last Modified On : Thu Aug 18 22:17:30 2016
 * Update Count     : 472
 */

%option yylineno
%option nounput

%{
// This lexer assumes the program has been preprocessed by cpp. Hence, all user level preprocessor directive have been
// performed and removed from the source. The only exceptions are preprocessor directives passed to the compiler (e.g.,
// line-number directives) and C/C++ style comments, which are ignored.

//**************************** Includes and Defines ****************************

#include <string>
#include <cstdio>										// FILENAME_MAX

#include "lex.h"
#include "parser.h"										// YACC generated definitions based on C++ grammar
#include "ParseNode.h"
#include "TypedefTable.h"

char *yyfilename;
std::string *strtext;									// accumulate parts of character and string constant value

#define RETURN_LOCN(x)		yylval.tok.loc.file = yyfilename; yylval.tok.loc.line = yylineno; return( x )
#define RETURN_VAL(x)		yylval.tok.str = new std::string( yytext ); RETURN_LOCN( x )
#define RETURN_CHAR(x)		yylval.tok.str = nullptr; RETURN_LOCN( x )
#define RETURN_STR(x)		yylval.tok.str = strtext; RETURN_LOCN( x )

#define WHITE_RETURN(x)		// do nothing
#define NEWLINE_RETURN()	WHITE_RETURN( '\n' )
#define ASCIIOP_RETURN()	RETURN_CHAR( (int)yytext[0] ) // single character operator
#define NAMEDOP_RETURN(x)	RETURN_CHAR( x )			// multichar operator, with a name
#define NUMERIC_RETURN(x)	rm_underscore(); RETURN_VAL( x ) // numeric constant
#define KEYWORD_RETURN(x)	RETURN_CHAR( x )			// keyword
#define IDENTIFIER_RETURN()	RETURN_VAL( typedefTable.isKind( yytext ) )
#define ATTRIBUTE_RETURN()	RETURN_VAL( ATTR_IDENTIFIER )

void rm_underscore() {
	// Remove underscores in numeric constant by copying the non-underscore characters to the front of the string.
	yyleng = 0;
	for ( int i = 0; yytext[i] != '\0'; i += 1 ) {
		if ( yytext[i] != '_' ) {
			yytext[yyleng] = yytext[i];
			yyleng += 1;
		} // if
	} // for
	yytext[yyleng] = '\0';
}

%}

octal [0-7]
nonzero [1-9]
decimal [0-9]
hex [0-9a-fA-F]
universal_char "\\"((u"_"?{hex_quad})|(U"_"?{hex_quad}{2}))

				// identifier, GCC: $ in identifier
identifier ([a-zA-Z_$]|{universal_char})([0-9a-zA-Z_$]|{universal_char})*

				// quoted identifier
quoted_identifier "`"{identifier}"`"

				// attribute identifier, GCC: $ in identifier
attr_identifier "@"{identifier}

				// numeric constants, CFA: '_' in constant
hex_quad {hex}("_"?{hex}){3}
integer_suffix "_"?(([uU][lL]?)|([uU]("ll"|"LL")?)|([lL][uU]?)|("ll"|"LL")[uU]?)

octal_digits ({octal})|({octal}({octal}|"_")*{octal})
octal_prefix "0""_"?
octal_constant (("0")|({octal_prefix}{octal_digits})){integer_suffix}?

nonzero_digits ({nonzero})|({nonzero}({decimal}|"_")*{decimal})
decimal_constant {nonzero_digits}{integer_suffix}?

hex_digits ({hex})|({hex}({hex}|"_")*{hex})
hex_prefix "0"[xX]"_"?
hex_constant {hex_prefix}{hex_digits}{integer_suffix}?

decimal_digits ({decimal})|({decimal}({decimal}|"_")*{decimal})
fractional_constant ({decimal_digits}?"."{decimal_digits})|({decimal_digits}".")
exponent "_"?[eE]"_"?[+-]?{decimal_digits}
				// GCC: D (double), DL (long double) and iI (imaginary) suffixes
floating_suffix "_"?([fFdDlL][iI]?|"DL"|[iI][lLfFdD]?)
				//floating_suffix "_"?([fFdD]|[lL]|[D][L])|([iI][lLfFdD])|([lLfFdD][iI]))
floating_constant (({fractional_constant}{exponent}?)|({decimal_digits}{exponent})){floating_suffix}?

binary_exponent "_"?[pP]"_"?[+-]?{decimal_digits}
hex_fractional_constant ({hex_digits}?"."{hex_digits})|({hex_digits}".")
hex_floating_constant {hex_prefix}(({hex_fractional_constant}{binary_exponent})|({hex_digits}{binary_exponent})){floating_suffix}?

				// character escape sequence, GCC: \e => esc character
simple_escape "\\"[abefnrtv'"?\\]
				// ' stop highlighting
octal_escape "\\"{octal}("_"?{octal}){0,2}
hex_escape "\\""x""_"?{hex_digits}
escape_seq {simple_escape}|{octal_escape}|{hex_escape}|{universal_char}
cwide_prefix "L"|"U"|"u"
swide_prefix {cwide_prefix}|"u8"

				// display/white-space characters
h_tab [\011]
form_feed [\014]
v_tab [\013]
c_return [\015]
h_white [ ]|{h_tab}

				// overloadable operators
op_unary_only "~"|"!"
op_unary_binary "+"|"-"|"*"
op_unary_pre_post "++"|"--"
op_unary {op_unary_only}|{op_unary_binary}|{op_unary_pre_post}

op_binary_only "/"|"%"|"^"|"&"|"|"|"<"|">"|"="|"=="|"!="|"<<"|">>"|"<="|">="|"+="|"-="|"*="|"/="|"%="|"&="|"|="|"^="|"<<="|">>="
op_binary_over {op_unary_binary}|{op_binary_only}
				// op_binary_not_over "?"|"->"|"."|"&&"|"||"|"@="
				// operator {op_unary_pre_post}|{op_binary_over}|{op_binary_not_over}

%x COMMENT
%x BKQUOTE
%x QUOTE
%x STRING

%%
				   /* line directives */
^{h_white}*"#"{h_white}*[0-9]+{h_white}*["][^"\n]+["].*"\n" {
	/* " stop highlighting */
	static char *filename[FILENAME_MAX];				// temporarily store current source-file name
	char *end_num;
	char *begin_string, *end_string;
	long lineno, length;
	lineno = strtol( yytext + 1, &end_num, 0 );
	begin_string = strchr( end_num, '"' );
	if ( begin_string ) {								// file name ?
		end_string = strchr( begin_string + 1, '"' );	// look for ending delimiter
		assert( end_string );							// closing quote ?
		length = end_string - begin_string - 1;			// file-name length without quotes or sentinel
		assert( length < FILENAME_MAX );				// room for sentinel ?
		memcpy( &filename, begin_string + 1, length );	// copy file name from yytext
		filename[ length ] = '\0';						// terminate string with sentinel
		//std::cout << "file " << filename << " line " << lineno << std::endl;
		yylineno = lineno;
		yyfilename = filename[0];
	} // if
}

				/* ignore preprocessor directives (for now) */
^{h_white}*"#"[^\n]*"\n" ;

				/* ignore C style comments (ALSO HANDLED BY CPP) */
"/*"			{ BEGIN COMMENT; }
<COMMENT>.|\n	;
<COMMENT>"*/"	{ BEGIN 0; }

				/* ignore C++ style comments (ALSO HANDLED BY CPP) */
"//"[^\n]*"\n"	;

				/* ignore whitespace */
{h_white}+		{ WHITE_RETURN(' '); }
({v_tab}|{c_return}|{form_feed})+ { WHITE_RETURN(' '); }
({h_white}|{v_tab}|{c_return}|{form_feed})*"\n" { NEWLINE_RETURN(); }

				/* keywords */
_Alignas		{ KEYWORD_RETURN(ALIGNAS); }			// C11
_Alignof		{ KEYWORD_RETURN(ALIGNOF); }			// C11
__alignof		{ KEYWORD_RETURN(ALIGNOF); }			// GCC
__alignof__		{ KEYWORD_RETURN(ALIGNOF); }			// GCC
asm				{ KEYWORD_RETURN(ASM); }
__asm			{ KEYWORD_RETURN(ASM); }				// GCC
__asm__			{ KEYWORD_RETURN(ASM); }				// GCC
_At				{ KEYWORD_RETURN(AT); }					// CFA
_Atomic			{ KEYWORD_RETURN(ATOMIC); }				// C11
__attribute		{ KEYWORD_RETURN(ATTRIBUTE); }			// GCC
__attribute__	{ KEYWORD_RETURN(ATTRIBUTE); }			// GCC
auto			{ KEYWORD_RETURN(AUTO); }
_Bool			{ KEYWORD_RETURN(BOOL); }				// C99
break			{ KEYWORD_RETURN(BREAK); }
case			{ KEYWORD_RETURN(CASE); }
catch			{ KEYWORD_RETURN(CATCH); }				// CFA
catchResume		{ KEYWORD_RETURN(CATCHRESUME); }		// CFA
char			{ KEYWORD_RETURN(CHAR); }
choose			{ KEYWORD_RETURN(CHOOSE); }				// CFA
_Complex		{ KEYWORD_RETURN(COMPLEX); }			// C99
__complex		{ KEYWORD_RETURN(COMPLEX); }			// GCC
__complex__		{ KEYWORD_RETURN(COMPLEX); }			// GCC
const			{ KEYWORD_RETURN(CONST); }
__const			{ KEYWORD_RETURN(CONST); }				// GCC
__const__		{ KEYWORD_RETURN(CONST); }				// GCC
continue		{ KEYWORD_RETURN(CONTINUE); }
default			{ KEYWORD_RETURN(DEFAULT); }
disable			{ KEYWORD_RETURN(DISABLE); }			// CFA
do				{ KEYWORD_RETURN(DO); }
double			{ KEYWORD_RETURN(DOUBLE); }
dtype			{ KEYWORD_RETURN(DTYPE); }				// CFA
else			{ KEYWORD_RETURN(ELSE); }
enable			{ KEYWORD_RETURN(ENABLE); }				// CFA
enum			{ KEYWORD_RETURN(ENUM); }
__extension__	{ KEYWORD_RETURN(EXTENSION); }			// GCC
extern			{ KEYWORD_RETURN(EXTERN); }
fallthrough		{ KEYWORD_RETURN(FALLTHRU); }			// CFA
fallthru		{ KEYWORD_RETURN(FALLTHRU); }			// CFA
finally			{ KEYWORD_RETURN(FINALLY); }			// CFA
float			{ KEYWORD_RETURN(FLOAT); }
__float128		{ KEYWORD_RETURN(FLOAT); }				// GCC
for				{ KEYWORD_RETURN(FOR); }
forall			{ KEYWORD_RETURN(FORALL); }				// CFA
fortran			{ KEYWORD_RETURN(FORTRAN); }
ftype			{ KEYWORD_RETURN(FTYPE); }				// CFA
_Generic		{ KEYWORD_RETURN(GENERIC); }			// C11
goto			{ KEYWORD_RETURN(GOTO); }
if				{ KEYWORD_RETURN(IF); }
_Imaginary		{ KEYWORD_RETURN(IMAGINARY); }			// C99
__imag			{ KEYWORD_RETURN(IMAGINARY); }			// GCC
__imag__		{ KEYWORD_RETURN(IMAGINARY); }			// GCC
inline			{ KEYWORD_RETURN(INLINE); }				// C99
__inline		{ KEYWORD_RETURN(INLINE); }				// GCC
__inline__		{ KEYWORD_RETURN(INLINE); }				// GCC
int				{ KEYWORD_RETURN(INT); }
__int128		{ KEYWORD_RETURN(INT); }				// GCC
__label__		{ KEYWORD_RETURN(LABEL); }				// GCC
long			{ KEYWORD_RETURN(LONG); }
lvalue			{ KEYWORD_RETURN(LVALUE); }				// CFA
_Noreturn		{ KEYWORD_RETURN(NORETURN); }			// C11
__builtin_offsetof { KEYWORD_RETURN(OFFSETOF); }		// GCC
otype			{ KEYWORD_RETURN(OTYPE); }				// CFA
register		{ KEYWORD_RETURN(REGISTER); }
restrict		{ KEYWORD_RETURN(RESTRICT); }			// C99
__restrict		{ KEYWORD_RETURN(RESTRICT); }			// GCC
__restrict__	{ KEYWORD_RETURN(RESTRICT); }			// GCC
return			{ KEYWORD_RETURN(RETURN); }
short			{ KEYWORD_RETURN(SHORT); }
signed			{ KEYWORD_RETURN(SIGNED); }
__signed		{ KEYWORD_RETURN(SIGNED); }				// GCC
__signed__		{ KEYWORD_RETURN(SIGNED); }				// GCC
sizeof			{ KEYWORD_RETURN(SIZEOF); }
static			{ KEYWORD_RETURN(STATIC); }
_Static_assert	{ KEYWORD_RETURN(STATICASSERT); }		// C11
struct			{ KEYWORD_RETURN(STRUCT); }
switch			{ KEYWORD_RETURN(SWITCH); }
_Thread_local	{ KEYWORD_RETURN(THREADLOCAL); }		// C11
throw			{ KEYWORD_RETURN(THROW); }				// CFA
throwResume		{ KEYWORD_RETURN(THROWRESUME); }		// CFA
trait			{ KEYWORD_RETURN(TRAIT); }				// CFA
try				{ KEYWORD_RETURN(TRY); }				// CFA
typedef			{ KEYWORD_RETURN(TYPEDEF); }
typeof			{ KEYWORD_RETURN(TYPEOF); }				// GCC
__typeof		{ KEYWORD_RETURN(TYPEOF); }				// GCC
__typeof__		{ KEYWORD_RETURN(TYPEOF); }				// GCC
union			{ KEYWORD_RETURN(UNION); }
unsigned		{ KEYWORD_RETURN(UNSIGNED); }
__builtin_va_list { KEYWORD_RETURN(VALIST); }			// GCC
void			{ KEYWORD_RETURN(VOID); }
volatile		{ KEYWORD_RETURN(VOLATILE); }
__volatile		{ KEYWORD_RETURN(VOLATILE); }			// GCC
__volatile__	{ KEYWORD_RETURN(VOLATILE); }			// GCC
while			{ KEYWORD_RETURN(WHILE); }

				/* identifier */
{identifier}	{ IDENTIFIER_RETURN(); }
{attr_identifier} { ATTRIBUTE_RETURN(); }
"`"				{ BEGIN BKQUOTE; }
<BKQUOTE>{identifier} { IDENTIFIER_RETURN(); }
<BKQUOTE>"`"	{ BEGIN 0; }

				/* numeric constants */
"0"				{ NUMERIC_RETURN(ZERO); }				// CFA
"1"				{ NUMERIC_RETURN(ONE); }				// CFA
{decimal_constant} { NUMERIC_RETURN(INTEGERconstant); }
{octal_constant} { NUMERIC_RETURN(INTEGERconstant); }
{hex_constant}	{ NUMERIC_RETURN(INTEGERconstant); }
{floating_constant}	{ NUMERIC_RETURN(FLOATINGconstant); }
{hex_floating_constant}	{ NUMERIC_RETURN(FLOATINGconstant); }

				/* character constant, allows empty value */
({cwide_prefix}[_]?)?['] { BEGIN QUOTE; rm_underscore(); strtext = new std::string; *strtext += std::string( yytext ); }
<QUOTE>[^'\\\n]* { *strtext += std::string( yytext ); }
<QUOTE>['\n]	{ BEGIN 0; *strtext += std::string( yytext); RETURN_STR(CHARACTERconstant); }
				/* ' stop highlighting */

				/* string constant */
({swide_prefix}[_]?)?["] { BEGIN STRING; rm_underscore(); strtext = new std::string; *strtext += std::string( yytext ); }
<STRING>[^"\\\n]* { *strtext += std::string( yytext ); }
<STRING>["\n]	{ BEGIN 0; *strtext += std::string( yytext ); RETURN_STR(STRINGliteral); }
				/* " stop highlighting */

				/* common character/string constant */
<QUOTE,STRING>{escape_seq} { rm_underscore(); *strtext += std::string( yytext ); }
<QUOTE,STRING>"\\"{h_white}*"\n" {}						// continuation (ALSO HANDLED BY CPP)
<QUOTE,STRING>"\\" { *strtext += std::string( yytext ); } // unknown escape character

				/* punctuation */
"["				{ ASCIIOP_RETURN(); }
"]"				{ ASCIIOP_RETURN(); }
"("				{ ASCIIOP_RETURN(); }
")"				{ ASCIIOP_RETURN(); }
"{"				{ ASCIIOP_RETURN(); }
"}"				{ ASCIIOP_RETURN(); }
","				{ ASCIIOP_RETURN(); }					// also operator
":"				{ ASCIIOP_RETURN(); }
";"				{ ASCIIOP_RETURN(); }
"."				{ ASCIIOP_RETURN(); }					// also operator
"..."			{ NAMEDOP_RETURN(ELLIPSIS); }

				/* alternative C99 brackets, "<:" & "<:<:" handled by preprocessor */
"<:"			{ RETURN_VAL('['); }
":>"			{ RETURN_VAL(']'); }
"<%"			{ RETURN_VAL('{'); }
"%>"			{ RETURN_VAL('}'); }

				/* operators */
"!"				{ ASCIIOP_RETURN(); }
"+"				{ ASCIIOP_RETURN(); }
"-"				{ ASCIIOP_RETURN(); }
"*"				{ ASCIIOP_RETURN(); }
"/"				{ ASCIIOP_RETURN(); }
"%"				{ ASCIIOP_RETURN(); }
"^"				{ ASCIIOP_RETURN(); }
"~"				{ ASCIIOP_RETURN(); }
"&"				{ ASCIIOP_RETURN(); }
"|"				{ ASCIIOP_RETURN(); }
"<"				{ ASCIIOP_RETURN(); }
">"				{ ASCIIOP_RETURN(); }
"="				{ ASCIIOP_RETURN(); }
"?"				{ ASCIIOP_RETURN(); }

"++"			{ NAMEDOP_RETURN(ICR); }
"--"			{ NAMEDOP_RETURN(DECR); }
"=="			{ NAMEDOP_RETURN(EQ); }
"!="			{ NAMEDOP_RETURN(NE); }
"<<"			{ NAMEDOP_RETURN(LS); }
">>"			{ NAMEDOP_RETURN(RS); }
"<="			{ NAMEDOP_RETURN(LE); }
">="			{ NAMEDOP_RETURN(GE); }
"&&"			{ NAMEDOP_RETURN(ANDAND); }
"||"			{ NAMEDOP_RETURN(OROR); }
"->"			{ NAMEDOP_RETURN(ARROW); }
"+="			{ NAMEDOP_RETURN(PLUSassign); }
"-="			{ NAMEDOP_RETURN(MINUSassign); }
"*="			{ NAMEDOP_RETURN(MULTassign); }
"/="			{ NAMEDOP_RETURN(DIVassign); }
"%="			{ NAMEDOP_RETURN(MODassign); }
"&="			{ NAMEDOP_RETURN(ANDassign); }
"|="			{ NAMEDOP_RETURN(ORassign); }
"^="			{ NAMEDOP_RETURN(ERassign); }
"<<="			{ NAMEDOP_RETURN(LSassign); }
">>="			{ NAMEDOP_RETURN(RSassign); }

"@="			{ NAMEDOP_RETURN(ATassign); }			// CFA

				/* CFA, operator identifier */
{op_unary}"?"	{ IDENTIFIER_RETURN(); }				// unary
"?"({op_unary_pre_post}|"()"|"[?]"|"{}") { IDENTIFIER_RETURN(); }
"^?{}" { IDENTIFIER_RETURN(); }
"?"{op_binary_over}"?"	{ IDENTIFIER_RETURN(); }		// binary
	/*
	  This rule handles ambiguous cases with operator identifiers, e.g., "int *?*?()", where the string "*?*?"
	  can be lexed as "*"/"?*?" or "*?"/"*?". Since it is common practise to put a unary operator juxtaposed
	  to an identifier, e.g., "*i", users will be annoyed if they cannot do this with respect to operator
	  identifiers. Even with this special hack, there are 5 general cases that cannot be handled. The first
	  case is for the function-call identifier "?()":

	  int * ?()();	// declaration: space required after '*'
	  * ?()();	// expression: space required after '*'

	  Without the space, the string "*?()" is ambiguous without N character look ahead; it requires scanning
	  ahead to determine if there is a '(', which is the start of an argument/parameter list.

	  The 4 remaining cases occur in expressions:

	  i++?i:0;		// space required before '?'
	  i--?i:0;		// space required before '?'
	  i?++i:0;		// space required after '?'
	  i?--i:0;		// space required after '?'

	  In the first two cases, the string "i++?" is ambiguous, where this string can be lexed as "i"/"++?" or
	  "i++"/"?"; it requires scanning ahead to determine if there is a '(', which is the start of an argument
	  list.  In the second two cases, the string "?++x" is ambiguous, where this string can be lexed as
	  "?++"/"x" or "?"/"++x"; it requires scanning ahead to determine if there is a '(', which is the start of
	  an argument list.
	*/
{op_unary}"?"({op_unary_pre_post}|"[?]"|{op_binary_over}"?") {
	// 1 or 2 character unary operator ?
	int i = yytext[1] == '?' ? 1 : 2;
	yyless( i );		// put back characters up to first '?'
	if ( i > 1 ) {
		NAMEDOP_RETURN( yytext[0] == '+' ? ICR : DECR );
	} else {
		ASCIIOP_RETURN();
	} // if
}

				/* unknown characters */
.			{ printf("unknown character(s):\"%s\" on line %d\n", yytext, yylineno); }

%%

// Local Variables: //
// mode: c++ //
// tab-width: 4 //
// compile-command: "make install" //
// End: //
