/* * C grammar based on the ISO/IEC 9899:1999 standard * Copyright (C) 2003 Roger Keays * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Author: Roger Keays * Date: 12 May 2003 * Revision: 1.0 * * Known Bugs: * o Does not include any pre-processor stuff from the standard (we let the * preprocessor worry about that!) * o Production 'typedef_name' is removed to make the grammar LALR(1) * o Grammar suffers from hanging else problem: else statements require { } * * Change history: */ /* regular expressions */ Helpers /* sensible stuff */ all = [0 .. 127]; cr = 13; lf = 10; eol = cr | lf | cr lf; tab = 9; not_star = [all -'*']; not_star_slash = [not_star - '/']; /* 6.4.2.1 digit, nondigit */ digit = ['0' .. '9']; nondigit = ['_' + [['a' .. 'z'] + ['A' .. 'z']]]; /* 6.4.4.1 (various constants) */ nonzero_digit = [digit - '0']; decimal_constant = nonzero_digit digit*; octal_digit = ['0' .. '7']; octal_constant = '0' octal_digit*; hex_prefix = '0x' | '0X'; hex_digit = [digit + [['a' .. 'f'] + ['A' .. 'F']]]; hex_constant = hex_prefix hex_digit*; unsigned_suffix = 'u' | 'U'; long_suffix = 'l' | 'L'; long_long_suffix = 'll' | 'LL'; integer_suffix = unsigned_suffix long_suffix? | unsigned_suffix long_long_suffix | long_suffix unsigned_suffix? | long_long_suffix unsigned_suffix?; /* 6.4.3 universal-character-name, hex-quad */ hex_quad = hex_digit hex_digit hex_digit hex_digit; universal_char_name = '\u' hex_quad; /* 6.4.2.1 identifier-nondigit */ identifier_nondigit = nondigit | universal_char_name; /* 6.4.4.3 enumeration-constant */ enumeration_constant_helper = identifier_nondigit (digit | identifier_nondigit)*; /* 6.4.2.2 (various constants) */ sign = '+' | '-'; digit_sequence = digit+; hex_digit_sequence = hex_digit+; exponent_part = ('e' | 'E') sign? digit_sequence; binary_exponent_part = ('p' | 'P') sign? digit_sequence; floating_suffix = ('f' | 'l' | 'F' | 'L'); fractional_constant = digit_sequence? '.' digit_sequence | digit_sequence '.'; decimal_floating_constant = fractional_constant exponent_part? floating_suffix? | digit_sequence exponent_part floating_suffix?; hex_fractional_constant = hex_digit_sequence? '.' hex_digit_sequence | hex_digit_sequence '.'; hex_floating_constant = hex_prefix (hex_fractional_constant | hex_digit_sequence) binary_exponent_part floating_suffix?; /* 6.4.4.1 (various constants) */ integer_constant = decimal_constant integer_suffix? | octal_constant integer_suffix? | hex_constant integer_suffix?; /* 6.4.4.2 (various constants) */ floating_constant = decimal_floating_constant | hex_floating_constant; /* 6.4.4.4 (various sequences) */ simple_escape_seq = '\' ''' | '\"' | '\?' | '\\' | '\a' | '\b' | '\f' | '\n' | '\r' | '\t' | '\v'; octal_escape_seq = '\' octal_digit octal_digit? octal_digit?; hex_escape_seq = '\x' hex_digit+; escape_seq = simple_escape_seq | octal_escape_seq | hex_escape_seq | universal_char_name; c_char = [all - [''' + ['\' + [cr + lf]]]] | escape_seq; c_char_seq = c_char+; character_constant = 'L'? ''' c_char_seq '''; /* 6.4.5 string sequences */ s_char = [all - ['"' + ['\' + [cr + lf]]]] | escape_seq; s_char_seq = s_char+; /* * Tokens and keywords. The ISO standard has very few tokens: just keywords, * identifiers, constants, string-literals and punctuators. All the other * lexical elements are best expressed as Helpers as they are never referenced * in the Productions section. */ Tokens /* sensible stuff */ blank = (eol | tab | ' ')+; comment = ('//' all* eol) | ('/*' not_star* '*'+ (not_star_slash not_star* '*'+)* '/'); /* 6.4 tokens */ /* 6.4.1 keywords */ kw_auto = 'auto'; kw_break = 'break'; kw_case = 'case'; kw_char = 'char'; kw_const = 'const'; kw_continue = 'continue'; kw_default = 'default'; kw_do = 'do'; kw_double = 'double'; kw_else = 'else'; kw_enum = 'enum'; kw_extern = 'extern'; kw_float = 'float'; kw_for = 'for'; kw_goto = 'goto'; kw_if = 'if'; kw_inline = 'inline'; kw_int = 'int'; kw_long = 'long'; kw_register = 'register'; kw_restrict = 'restrict'; kw_return = 'return'; kw_short = 'short'; kw_signed = 'signed'; kw_sizeof = 'sizeof'; kw_static = 'static'; kw_struct = 'struct'; kw_switch = 'switch'; kw_typedef = 'typedef'; kw_union = 'union'; kw_unsigned = 'unsigned'; kw_void = 'void'; kw_volatile = 'volatile'; kw_while = 'while'; kw_bool = '_Bool'; /* ?! hey it's in the standard! */ kw_complex = '_Complex'; /* ?! hey it's in the standard! */ kw_imaginary = '_Imaginary'; /* ?! hey it's in the standard! */ /* 6.4.2.1 Identifiers */ identifier = identifier_nondigit (digit | identifier_nondigit)*; /* 6.4.4.3 enumeration-constant */ enumeration_constant = enumeration_constant_helper; /* 6.4.4 constant */ constant = integer_constant | floating_constant | enumeration_constant_helper | character_constant; /* 6.4.5 string-literal */ string_literal = 'L'? '"' s_char_seq '"'; /* 6.4.6 punctuators */ tok_lbracket = '['; tok_rbracket = ']'; tok_lpar = '('; tok_rpar = ')'; tok_lbrace = '{'; tok_rbrace = '}'; tok_dot = '.'; tok_arrow = '->'; tok_plus_plus = '++'; tok_minus_minus = '--'; tok_amp = '&'; tok_star = '*'; tok_plus = '+'; tok_minus = '-'; tok_tilde = '~'; tok_exclamation = '!'; tok_slash = '/'; tok_percent = '%'; tok_lshift = '<<'; tok_rshift = '>>'; tok_lt = '<'; tok_gt = '>'; tok_lt_eq = '<='; tok_gt_eq = '>='; tok_eq_eq = '=='; tok_not_eq = '!='; tok_caret = '^'; tok_bar = '|'; tok_amp_amp = '&&'; tok_bar_bar = '||'; tok_question = '?'; tok_colon = ':'; tok_semicolon = ';'; tok_elipsis = '...'; tok_eq = '='; tok_star_eq = '*='; tok_slash_eq = '/='; tok_percent_eq = '%='; tok_plus_eq = '+='; tok_minus_eq = '-='; tok_lshift_eq = '<<='; tok_rshift_eq = '>>='; tok_amp_eq = '&='; tok_caret_eq = '^='; tok_bar_eq = '|='; tok_comma = ','; /* ?! hey, it's in the standard! */ tok_hash = '#'; tok_hash_hash = '##'; tok_lt_colon = '<:'; tok_colon_gt = '>:'; tok_lt_percent = '<%'; tok_percent_gt = '%>'; tok_percent_colon = '%:'; tok_percent_colon_percent_colon = '%:%:'; Ignored Tokens comment, blank; /* Concrete Syntax Tree */ Productions /* 6.9 translation-unit */ translation_unit = external_declaration+ ; /* 6.9 external-declaration */ external_declaration = {defn} function_definition | {decl} declaration; /* 6.9.1 function-definition */ function_definition = declaration_specifiers declarator declaration_list? compound_statement; /* 6.9.1 declaration-list */ declaration_list = declaration+; /* 6.5.1 primary-expression */ primary_expression = {identifier} identifier | {constant} constant | {string} string_literal | {expression} tok_lpar expression tok_rpar; /* 6.5.2 postfix-expression */ postfix_expression = {primary} primary_expression | {bracket} postfix_expression tok_lbracket expression tok_rbracket | {par} postfix_expression tok_lpar argument_expression_list? tok_rpar | {dot} postfix_expression tok_dot identifier | {arrow} postfix_expression tok_arrow identifier | {plus_plus} postfix_expression tok_plus_plus | {minus_minus} postfix_expression tok_minus_minus | {cast1} tok_lpar type_name tok_rpar tok_lbrace initializer_list tok_rbrace | {cast2} tok_lpar type_name tok_rpar tok_lbrace initializer_list tok_comma tok_rbrace; /* 6.5.2 argument-expression-list */ argument_expression_list = {single} assignment_expression | {list} argument_expression_list tok_comma assignment_expression; /* 6.5.3 unary-expression */ unary_expression = {postfix} postfix_expression | {plus_plus} tok_plus_plus unary_expression | {minus_minus} tok_minus_minus unary_expression | {cast} unary_operator cast_expression | {sizeof1} kw_sizeof unary_expression | {sizeof2} kw_sizeof tok_lpar type_name tok_rpar; /* 6.5.3 unary-operator */ unary_operator = {amp} tok_amp | {star} tok_star | {plus} tok_plus | {minus} tok_minus | {tilde} tok_tilde | {exclamation} tok_exclamation; /* 6.5.4 cast-expression */ cast_expression = {no} unary_expression | {cast} tok_lpar type_name tok_rpar cast_expression; /* 6.5.5 multiplicative-expression */ multiplicative_expression = {no} cast_expression | {mult} multiplicative_expression tok_star cast_expression | {divide} multiplicative_expression tok_slash cast_expression | {mod} multiplicative_expression tok_percent cast_expression; /* 6.5.6 additive-expression */ additive_expression = {no} multiplicative_expression | {plus} additive_expression tok_plus multiplicative_expression | {minus} additive_expression tok_minus multiplicative_expression; /* 6.5.7 shift-expression */ shift_expression = {no} additive_expression | {lshift} shift_expression tok_lshift additive_expression | {rshift} shift_expression tok_rshift additive_expression; /* 6.5.8 relational-expression */ relational_expression = {no} shift_expression | {lt} relational_expression tok_lt shift_expression | {gt} relational_expression tok_gt shift_expression | {lt_eq} relational_expression tok_lt_eq shift_expression | {gt_eq} relational_expression tok_gt_eq shift_expression; /* 6.5.9 equality-expression */ equality_expression = {no} relational_expression | {eq_eq} equality_expression tok_eq_eq relational_expression | {not_eq} equality_expression tok_not_eq relational_expression; /* 6.5.10 AND-expression */ and_expression = {no} equality_expression | {and} and_expression tok_amp equality_expression; /* 6.5.11 exclusive-OR-expression */ exclusive_or_expression = {no} and_expression | {xor} exclusive_or_expression tok_caret and_expression; /* 6.5.12 inclusive-OR-expression */ inclusive_or_expression = {no} exclusive_or_expression | {or} inclusive_or_expression tok_bar exclusive_or_expression; /* 6.5.13 logical-AND-expression */ logical_and_expression = {no} inclusive_or_expression | {and} logical_and_expression tok_amp_amp inclusive_or_expression; /* 6.5.14 logical-OR-expression */ logical_or_expression = {no} logical_and_expression | {or} logical_or_expression tok_bar_bar logical_and_expression; /* 6.5.15 conditional-expression */ conditional_expression = {no} logical_or_expression | {cond} logical_or_expression tok_question expression tok_colon conditional_expression ; /* 6.5.16 assignment-expression */ assignment_expression = {no} conditional_expression | {assign} unary_expression assignment_operator assignment_expression; /* 6.5.16 assignment-operator */ assignment_operator = {eq} tok_eq | {star_eq} tok_star_eq | {slash_eq} tok_slash_eq | {percent_eq} tok_percent_eq | {plus_eq} tok_plus_eq | {minus_eq} tok_minus_eq | {lshift_eq} tok_lshift_eq | {rshift_eq} tok_rshift_eq | {amp_eq} tok_amp_eq | {caret_eq} tok_caret_eq | {bar_eq} tok_bar_eq; /* 6.5.17 expression */ expression = {no} assignment_expression | {list} expression tok_comma assignment_expression; /* 6.6 constant-expression */ constant_expression = conditional_expression; /* 6.7 declaration */ declaration = declaration_specifiers init_declarator_list? tok_semicolon ; /* 6.7 declaration-specifiers */ declaration_specifiers = {storage} storage_class_specifier declaration_specifiers? | {type_spec} type_specifier declaration_specifiers? | {type_qual} type_qualifier declaration_specifiers? | {function} function_specifier declaration_specifiers? ; /* 6.7 init-declarator-list */ init_declarator_list = {single} init_declarator | {list} init_declarator_list tok_comma init_declarator; /* 6.7 init-declarator */ init_declarator = {plain} declarator | {assign} declarator tok_eq initializer; /* 6.7.1 storage-class-specifier */ storage_class_specifier = {typedef} kw_typedef | {extern} kw_extern | {static} kw_static | {auto} kw_auto | {register} kw_register; /* 6.7.2 type-specifier */ type_specifier = {void} kw_void | {char} kw_char | {short} kw_short | {int} kw_int | {long} kw_long | {float} kw_float | {double} kw_double | {signed} kw_signed | {unsigned} kw_unsigned | {bool} kw_bool | {complex} kw_complex | {imaginary} kw_imaginary | {struct} struct_or_union_specifier | {enum} enum_specifier ; //{typedef} typedef_name; //REMOVED TO MAKE GRAMMAR LALR(1) /* 6.7.2.1 struct-or-union-specifier */ struct_or_union_specifier = {defined} struct_or_union identifier? tok_lbrace struct_declaration_list tok_rbrace | {not_defined} struct_or_union identifier; /* 6.7.2.1 struct-or-union */ struct_or_union = {struct} kw_struct | {union} kw_union; /* 6.7.2.1 struct-declaration-list */ struct_declaration_list = struct_declaration+ ; /* 6.7.2.1 struct-declaration */ struct_declaration = specifier_qualifier_list struct_declarator_list tok_semicolon ; /* 6.7.2.1 specifier-qualifier-list */ specifier_qualifier_list = {spec_first} type_specifier specifier_qualifier_list? | {qual_first} type_qualifier specifier_qualifier_list? ; /* 6.7.2.1 struct-declarator-list */ struct_declarator_list = {single} struct_declarator | {list} struct_declarator_list tok_comma struct_declarator; /* 6.7.2.1 struct-declarator */ struct_declarator = {plain} declarator | {with_const} declarator? tok_colon constant_expression; /* 6.7.2.2 enum-specifier */ enum_specifier = {values1} kw_enum identifier? tok_lbrace enumerator_list tok_rbrace | {values2} kw_enum identifier? tok_lbrace enumerator_list tok_comma tok_rbrace | {novalues} kw_enum identifier; /* 6.7.2.2 enumerator-list */ enumerator_list = {single} enumerator | {list} enumerator_list tok_comma enumerator; /* 6.7.2.2 enumerator */ enumerator = {plain} enumeration_constant | {assign} enumeration_constant tok_eq constant_expression; /* 6.7.3 type-qualifier */ type_qualifier = {const} kw_const | {restrict} kw_restrict | {volatile} kw_volatile; /* 6.7.4 function-specifier */ function_specifier = kw_inline; /* 6.7.5 declarator */ declarator = pointer? direct_declarator; /* 6.7.5 direct-declarator */ direct_declarator = {ident} identifier | {par} tok_lpar declarator tok_rpar | {qual1} direct_declarator tok_lbracket type_qualifier_list? assignment_expression? tok_rbracket | {qual2} direct_declarator tok_lbracket kw_static type_qualifier_list? assignment_expression tok_rbracket | {qual3} direct_declarator tok_lbracket type_qualifier_list kw_static assignment_expression tok_rbracket | {qual4} direct_declarator tok_lbracket type_qualifier_list? tok_star tok_rbracket | {param_types} direct_declarator tok_lpar parameter_type_list tok_rpar | {ident_list} direct_declarator tok_lpar identifier_list? tok_rpar; /* 6.7.5 pointer */ pointer = {single} tok_star type_qualifier_list? | {recursive} tok_star type_qualifier_list? pointer; /* 6.7.5 type-qualifier-list */ type_qualifier_list = type_qualifier+ ; /* 6.7.5 parameter-type-list */ parameter_type_list = {plain} parameter_list | {elipsis} parameter_list tok_comma tok_elipsis; /* 6.7.5 parameter-list */ parameter_list = {single} parameter_declaration | {list} parameter_list tok_comma parameter_declaration; /* 6.7.5 parameter-declaration */ parameter_declaration = {plain} declaration_specifiers declarator | {abstract} declaration_specifiers abstract_declarator?; /* 6.7.5 identifier-list */ identifier_list = {single} identifier | {list} identifier_list tok_comma identifier; /* 6.7.6 type-name */ type_name = specifier_qualifier_list abstract_declarator?; /* 6.7.6 abstract-declarator */ abstract_declarator = {pointer} pointer | {direct} pointer? direct_abstract_declarator; /* 6.7.6 direct-abstract-declarator */ direct_abstract_declarator = {par} tok_lpar abstract_declarator tok_rpar | {bracket1} direct_abstract_declarator? tok_lbracket assignment_expression? tok_rbracket | {bracket2} direct_abstract_declarator? tok_lbracket tok_star tok_rbracket | {params} direct_abstract_declarator? tok_lpar parameter_type_list? tok_rpar; /* 6.7.7 typedef-name */ typedef_name = identifier; /* 6.7.8 initializer */ initializer = {assign} assignment_expression | {list1} tok_lbrace initializer_list tok_rbrace | {list2} tok_lbrace initializer_list tok_comma tok_rbrace; /* 6.7.8 initializer-list */ initializer_list = {single} designation? initializer | {list} initializer_list tok_comma designation? initializer; /* 6.7.8 designation */ designation = designator_list tok_eq; /* 6.7.8 designator-list */ designator_list = designator+ ; /* 6.7.8 designator */ designator = {brackets} tok_lbracket constant_expression tok_rbracket | {dot} tok_dot identifier; /* 6.8 statement */ statement = {labeled} labeled_statement | {compound} compound_statement | {expression} expression_statement | {selection} selection_statement | {iteration} iteration_statement | {jump} jump_statement; /* 6.8.1 labeled-statement */ labeled_statement = {plain} identifier tok_colon statement | {case} kw_case constant_expression tok_colon statement | {default} kw_default tok_colon statement ; /* 6.8.2 compound-statement */ compound_statement = {alt} tok_lbrace block_item_list? tok_rbrace; /* 6.8.2 block-item-list */ block_item_list = block_item+; /* 6.8.2 block-item */ block_item = {decl} declaration | {statement} statement; /* 6.8.3 expression-statement */ expression_statement = {exp} expression? tok_semicolon; /* 6.8.4 selection-statment */ selection_statement = {if} kw_if tok_lpar expression tok_rpar statement | {ifelse} kw_if tok_lpar expression tok_rpar compound_statement kw_else [other]:compound_statement | {switch} kw_switch tok_lpar expression tok_rpar statement; /* 6.8.5 iteration-statement */ iteration_statement = {while} kw_while tok_lpar expression tok_rpar statement | {do} kw_do statement kw_while tok_lpar expression tok_rpar tok_semicolon | {for1} kw_for tok_lpar [decl]:expression? [a]:tok_semicolon [cond]:expression? [b]:tok_semicolon [iter]:expression? tok_rpar statement | {for2} kw_for tok_lpar declaration [cond]:expression? tok_semicolon [iter]:expression? tok_rpar statement ; /* 6.8.6 jump-statement */ jump_statement = {goto} kw_goto identifier tok_semicolon | {continue} kw_continue tok_semicolon | {break} kw_break tok_semicolon | {return} kw_return expression? tok_semicolon; /* Abstract Syntax Tree Abstract Syntax Tree */