/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * This file is part of J11. * * See the file "J11-LICENSE" for Copyright information and the * * terms and conditions for copying, distribution and * * modification of J11. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ Package org.sablecc.java11.unicodepreprocessor; Helpers any_unicode_character = [0..0xffff]; unicode_marker = 'u'+; hex_digit = ['0'..'9'] | ['a'..'f'] | ['A'..'F']; States normal, sub; Tokens /************************************************************************************* * The precedence of longer and earlier definitions is important! The sequence '\\u' * * will generate two tokens: even_backslash('\\') and raw_input_character('u'). * *************************************************************************************/ even_backslash = '\\'; /* The preprocessor should return two unicode characters: '\' '\' */ unicode_escape = '\' unicode_marker hex_digit hex_digit hex_digit hex_digit; /* The preprocessor should return a single unicode character */ erroneous_escape = '\' unicode_marker hex_digit? hex_digit? hex_digit?; /* The preprocessor should issue an error */ {normal->sub, sub} sub = 0x001a; /* The preprocessor should discard a SUB ASCII character if it is the last */ /* character on the input reader. */ /* This requires the use of a customized lexer that derives from Lexer and */ /* that puts the TSub token in a buffer until the next token is read. */ /* If the next token is EOF, EOF is returned, and the state should be reset */ /* to normal. Else, the text of next token is pushed back on the input */ /* reader, the buffered TSub token is returned, and the state is reset to */ /* normal. */ raw_input_character = any_unicode_character; /* The preprocessor should return a single unicode character */