/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * This file is part of J11.                                       *
 * See the file "J11-LICENSE" for Copyright information and the    *
 * terms and conditions for copying, distribution and              *
 * modification of J11.                                            *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

Package org.sablecc.java11.unicodepreprocessor;

Helpers
    any_unicode_character = [0..0xffff];
    unicode_marker = 'u'+;
    hex_digit = ['0'..'9'] | ['a'..'f'] | ['A'..'F'];

States
    normal,
    sub;

Tokens
/*************************************************************************************
 * The precedence of longer and earlier definitions is important! The sequence '\\u' *
 * will generate two tokens: even_backslash('\\') and raw_input_character('u').      *
 *************************************************************************************/

    even_backslash = '\\';
        /* The preprocessor should return two unicode characters: '\' '\' */

    unicode_escape = '\' unicode_marker hex_digit hex_digit hex_digit hex_digit;
        /* The preprocessor should return a single unicode character */
    
    erroneous_escape = '\' unicode_marker hex_digit? hex_digit? hex_digit?;
        /* The preprocessor should issue an error */

{normal->sub, sub}
    sub = 0x001a;
        /* The preprocessor should discard a SUB ASCII character if it is the last  */
        /* character on the input reader.                                           */
        /* This requires the use of a customized lexer that derives from Lexer and  */
        /* that puts the TSub token in a buffer until the next token is read.       */
        /* If the next token is EOF, EOF is returned, and the state should be reset */
        /* to normal. Else, the text of next token is pushed back on the input      */
        /* reader, the buffered TSub token is returned, and the state is reset to   */
        /* normal.                                                                  */

    raw_input_character = any_unicode_character;
        /* The preprocessor should return a single unicode character */