/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * This file is part of SableCC. * * See the file "LICENSE" for copyright information and the * * terms and conditions for copying, distribution and * * modification of SableCC. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /* This grammar defines the SableCC 3.x input language. */ Package org.sablecc.sablecc; // Root Java package for generated files. Helpers /* These are character sets and regular expressions used in the definition of tokens. */ all = [0 .. 0xFFFF]; lowercase = ['a' .. 'z']; uppercase = ['A' .. 'Z']; digit = ['0' .. '9']; hex_digit = [digit + [['a' .. 'f'] + ['A' .. 'F']]]; tab = 9; cr = 13; lf = 10; eol = cr lf | cr | lf; // This takes care of different platforms not_cr_lf = [all - [cr + lf]]; not_star = [all - '*']; not_star_slash = [not_star - '/']; blank = (' ' | tab | eol)+; short_comment = '//' not_cr_lf* eol; long_comment = '/*' not_star* '*'+ (not_star_slash not_star* '*'+)* '/'; comment = short_comment | long_comment; letter = lowercase | uppercase | '_' | '$'; id_part = lowercase (lowercase | digit)*; States normal, /* The first state is the initial state. */ package; Tokens /* These are token definitions. It is allowed to use helper regular * * expressions in the body of a token definition. * * On a given input, the longest valid definition is chosen, In * * case of a match, the definition that appears first is chosen. * * Example: on input -> 's' <- "char" will have precedence on * * "string", because it appears first. */ {package} pkg_id = letter (letter | digit)*; {normal->package} package = 'Package'; states = 'States'; helpers = 'Helpers'; tokens = 'Tokens'; ignored = 'Ignored'; productions = 'Productions'; abstract = 'Abstract'; syntax = 'Syntax'; tree = 'Tree'; new = 'New'; null = 'Null'; token_specifier = 'T'; production_specifier = 'P'; dot = '.'; d_dot = '..'; {normal, package->normal} semicolon = ';'; equal = '='; l_bkt = '['; r_bkt = ']'; l_par = '('; r_par = ')'; l_brace = '{'; r_brace = '}'; plus = '+'; minus = '-'; q_mark = '?'; star = '*'; bar = '|'; comma = ','; slash = '/'; arrow = '->'; colon = ':'; id = id_part ('_' id_part)*; char = ''' not_cr_lf '''; dec_char = digit+; hex_char = '0' ('x' | 'X') hex_digit+; string = ''' [not_cr_lf - ''']+ '''; blank = blank; comment = comment; Ignored Tokens /* These tokens are simply ignored by the parser. */ blank, comment; Productions /* These are the productions of the grammar. The first production is * * used by the implicit start production: * * start = (first production) EOF; * * ?, * and + have the same meaning as in a regular expression. * * In case a token and a production share the same name, the use of * * P. (for production) or T. (for token) is required. * * Each alternative can be explicitely named by preceding it with a * * name enclosed in braces. * * Each alternative element can be explicitely named by preceding it * * with a name enclosed in brackets and followed by a colon. */ grammar = P.package? P.helpers? P.states? P.tokens? ign_tokens? P.productions? P.ast? {-> New grammar([P.package.list_pkg_id], P.helpers, P.states, P.tokens, P.ign_tokens, P.productions, P.ast) }; package {-> [list_pkg_id]:pkg_id*} = T.package pkg_name {-> [pkg_name.pkg_id] }; pkg_name {-> pkg_id*} = pkg_id [pkg_ids]:pkg_name_tail* semicolon {-> [pkg_id, pkg_ids.pkg_id] }; pkg_name_tail {-> pkg_id } = dot pkg_id {-> pkg_id }; helpers = T.helpers [helper_defs]:helper_def+ {-> New helpers([helper_defs]) }; helper_def = id equal reg_exp semicolon {-> New helper_def(id, reg_exp) }; states = T.states id_list semicolon {-> New states([id_list.id]) }; id_list {-> id*} = id [ids]:id_list_tail* {-> [id, ids.id]}; id_list_tail {-> id } = comma id {-> id}; tokens = T.tokens [token_defs]:token_def+ {-> New tokens([token_defs]) }; token_def = state_list? id equal reg_exp look_ahead? semicolon {-> New token_def(state_list, id, reg_exp, look_ahead.slash, look_ahead.reg_exp) }; state_list = l_brace id transition? [state_lists]:state_list_tail* r_brace {-> New state_list(id, transition, [state_lists])}; state_list_tail = comma id transition? {-> New state_list_tail(id, transition) }; transition = arrow id {-> New transition(id)}; ign_tokens = ignored T.tokens id_list? semicolon {-> New ign_tokens([id_list.id]) }; look_ahead {-> slash reg_exp} = slash reg_exp {-> slash reg_exp}; reg_exp = concat [concats]:reg_exp_tail* {-> New reg_exp([concat, concats.concat])}; reg_exp_tail {-> concat } = bar concat {-> concat}; concat = [un_exps]:un_exp* {-> New concat([un_exps])}; un_exp = basic un_op?; basic = {char} P.char {-> New basic.char(P.char)} | {set} set {-> New basic.set(set)} | {string} string {-> New basic.string(string)} | {id} id {-> New basic.id(id)} | {reg_exp} l_par reg_exp r_par {-> New basic.reg_exp(reg_exp)} ; char = {char} T.char | {dec} dec_char | {hex} hex_char; set = {operation} l_bkt [left]:basic bin_op [right]:basic r_bkt {-> New set.operation(left, bin_op, right) } | {interval} l_bkt [left]:P.char d_dot [right]:P.char r_bkt {-> New set.interval(left, right) }; un_op = {star} star {-> New un_op.star(star)} | {q_mark} q_mark {-> New un_op.q_mark(q_mark)} | {plus} plus {-> New un_op.plus(plus)} ; bin_op = {plus} plus {-> New bin_op.plus()} | {minus} minus {-> New bin_op.minus()} ; productions = T.productions [prods]:prod+ {-> New productions([prods]) }; prod = id prod_transform? equal alts semicolon {-> New prod(id, prod_transform.arrow, [prod_transform.elem], [alts.list_alt])}; prod_transform {-> arrow elem*} = l_brace arrow [elems]:elem* r_brace {-> arrow [elems]}; alts {-> [list_alt]:alt*} = alt [alts]:alts_tail* {-> [alt, alts.alt]}; alts_tail {-> alt} = bar alt {-> alt}; alt = alt_name? [elems]:elem* alt_transform? {-> New alt(alt_name.id, [elems], alt_transform)}; alt_transform = l_brace arrow [terms]: term* r_brace {-> New alt_transform(l_brace, [terms], r_brace)}; term = {new} new prod_name l_par params? r_par {-> New term.new(prod_name, l_par, [params.list_term]) } | {list} l_bkt list_of_list_term? r_bkt {-> New term.list(l_bkt, [list_of_list_term.list_terms])} | {simple} specifier? id simple_term_tail? {-> New term.simple(specifier, id, simple_term_tail.id)} | {null} null {-> New term.null()} ; list_of_list_term {-> [list_terms]:list_term* } = list_term [list_terms]:list_term_tail* {-> [list_term, list_terms.list_term] } ; list_term = {new} new prod_name l_par params? r_par {-> New list_term.new(prod_name, l_par, [params.list_term])} | {simple} specifier? id simple_term_tail? {-> New list_term.simple(specifier, id, simple_term_tail.id)}; list_term_tail {-> list_term} = comma list_term {-> list_term} ; simple_term_tail {-> id} = dot id {-> id}; prod_name = id prod_name_tail? {-> New prod_name(id, prod_name_tail.id)}; prod_name_tail {-> id} = dot id {-> id}; params {-> [list_term]:term*} = term [params]:params_tail* {-> [term, params.term]}; params_tail {-> term} = comma term {-> term}; alt_name {-> id} = l_brace id r_brace {-> id}; elem = elem_name? specifier? id un_op? {-> New elem(elem_name.id, specifier, id, un_op) }; elem_name {-> id} = l_bkt id r_bkt colon {-> id}; specifier = {token} token_specifier dot {-> New specifier.token()} | {production} production_specifier dot {-> New specifier.production()} ; ast = abstract syntax tree [prods]:ast_prod+ {-> New ast([prods]) }; ast_prod = id equal [alts]:ast_alts semicolon {-> New ast_prod(id, [alts.list_ast_alt])}; ast_alts {-> [list_ast_alt]:ast_alt*} = ast_alt [ast_alts]:ast_alts_tail* {-> [ast_alt, ast_alts.ast_alt]}; ast_alts_tail {-> ast_alt} = bar ast_alt {-> ast_alt}; ast_alt = alt_name? [elems]:elem* {-> New ast_alt(alt_name.id, [elems])}; /*****************************************************************************************/ /* */ /* */ /* */ /* */ /* */ /*****************************************************************************************/ Abstract Syntax Tree grammar = [package]:pkg_id* P.helpers? P.states? P.tokens? P.ign_tokens? P.productions? P.ast?; helpers = [helper_defs]:helper_def*; helper_def = id reg_exp; states = [list_id]:id*; tokens = [token_defs]:token_def*; token_def = state_list? id reg_exp slash? [look_ahead]:reg_exp?; state_list = id transition? [state_lists]:state_list_tail*; state_list_tail = id transition?; transition = id; ign_tokens = [list_id]:id*; reg_exp = [concats]:concat*; concat = [un_exps]: un_exp*; un_exp = basic un_op?; basic = {char} P.char | {set} set | {string} string | {id} id | {reg_exp} reg_exp; char = {char} T.char | {dec} dec_char | {hex} hex_char; set = {operation} [left]:basic bin_op [right]:basic | {interval} [left]:P.char [right]:P.char ; un_op = {star} star | {q_mark} q_mark | {plus} plus ; bin_op = {plus} | {minus}; productions = [prods]:prod*; prod = id arrow? [prod_transform]:elem* [alts]:alt*; alt = [alt_name]:id? [elems]:elem* alt_transform?; alt_transform = l_brace [terms]:term* r_brace; term = {new} prod_name l_par [params]:term* | {list} l_bkt [list_terms]:list_term* | {simple} specifier? id [simple_term_tail]:id? | {null} ; list_term = {new} prod_name l_par [params]:term* | {simple} specifier? id [simple_term_tail]:id? ; prod_name = id [prod_name_tail]:id? ; elem = [elem_name]:id? specifier? id un_op?; specifier = {token} | {production} ; ast = [prods]:ast_prod*; ast_prod = id [alts]:ast_alt*; ast_alt = [alt_name]:id? [elems]:elem*;