From 84e4114f79f8fae408e3ef2a70109bfa22f330de Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 3 Aug 2017 15:18:29 -0700 Subject: [PATCH] Allow conflicts involving repeat rules to be whitelisted, via their parent rule --- .../build_tables/build_parse_table.cc | 37 ++++++--- .../expected_error.txt | 14 ++++ .../conflict_in_repeat_rule/grammar.json | 76 +++++++++++++++++++ .../conflict_in_repeat_rule/readme.md | 1 + 4 files changed, 119 insertions(+), 9 deletions(-) create mode 100644 test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt create mode 100644 test/fixtures/test_grammars/conflict_in_repeat_rule/grammar.json create mode 100644 test/fixtures/test_grammars/conflict_in_repeat_rule/readme.md diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index cfb40c2d..7621554d 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -44,6 +44,7 @@ class ParseTableBuilder { const LexicalGrammar lexical_grammar; unordered_map recovery_states; unordered_map parse_state_ids; + vector item_sets_by_state_id; deque parse_state_queue; ParseTable parse_table; set conflicts; @@ -168,19 +169,19 @@ class ParseTableBuilder { } ParseStateId add_parse_state(SymbolSequence &&preceding_symbols, const ParseItemSet &item_set) { - auto pair = parse_state_ids.find(item_set); - if (pair == parse_state_ids.end()) { - ParseStateId state_id = parse_table.states.size(); + ParseStateId new_state_id = parse_table.states.size(); + auto insertion = parse_state_ids.insert({move(item_set), new_state_id}); + if (insertion.second) { + item_sets_by_state_id.push_back(&insertion.first->first); parse_table.states.push_back(ParseState()); - parse_state_ids[item_set] = state_id; parse_state_queue.push_back({ move(preceding_symbols), - move(item_set), - state_id + insertion.first->first, + new_state_id }); - return state_id; + return new_state_id; } else { - return pair->second; + return insertion.first->second; } } @@ -603,7 +604,25 @@ class ParseTableBuilder { set actual_conflict; for (const ParseItem &item : conflicting_items) { - actual_conflict.insert(item.lhs()); + Symbol symbol = item.lhs(); + if (grammar.variables[symbol.index].type == VariableTypeAuxiliary) { + ParseStateId preceding_state_id = 1; + for (auto &preceding_symbol : preceding_symbols) { + ParseState &preceding_state = parse_table.states[preceding_state_id]; + if (preceding_state.nonterminal_entries.count(symbol.index)) break; + preceding_state_id = preceding_symbol.is_terminal() ? + preceding_state.terminal_entries[preceding_symbol].actions.back().state_index : + preceding_state.nonterminal_entries[preceding_symbol.index]; + } + const ParseItemSet &preceding_item_set = *item_sets_by_state_id[preceding_state_id]; + for (auto &preceding_entry : preceding_item_set.entries) { + if (preceding_entry.first.next_symbol() == symbol) { + actual_conflict.insert(preceding_entry.first.lhs()); + } + } + } else { + actual_conflict.insert(symbol); + } } for (const auto &expected_conflict : grammar.expected_conflicts) { diff --git a/test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt b/test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt new file mode 100644 index 00000000..2c710346 --- /dev/null +++ b/test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt @@ -0,0 +1,14 @@ +Unresolved conflict for symbol sequence: + + '[' identifier • ']' … + +Possible interpretations: + + 1: '[' (array_repeat1 identifier) • ']' … + 2: '[' (array_type_repeat1 identifier) • ']' … + +Possible resolutions: + + 1: Specify a higher precedence in `array_repeat1` than in the other rules. + 2: Specify a higher precedence in `array_type_repeat1` than in the other rules. + 3: Add a conflict for these rules: `array` `array_type` diff --git a/test/fixtures/test_grammars/conflict_in_repeat_rule/grammar.json b/test/fixtures/test_grammars/conflict_in_repeat_rule/grammar.json new file mode 100644 index 00000000..67c05355 --- /dev/null +++ b/test/fixtures/test_grammars/conflict_in_repeat_rule/grammar.json @@ -0,0 +1,76 @@ +{ + "name": "conflict_in_repeat_rule", + + "rules": { + "statement": { + "type": "CHOICE", + "members": [ + { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "array"}, + {"type": "STRING", "value": ";"} + ] + }, + { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "array_type"}, + {"type": "SYMBOL", "name": "identifier"}, + {"type": "STRING", "value": ";"} + ] + } + ] + }, + + "array": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "[" + }, + { + "type": "REPEAT", + "content": { + "type": "CHOICE", + "members": [ + {"type": "SYMBOL", "name": "identifier"}, + {"type": "STRING", "value": "0"} + ] + } + }, + { + "type": "STRING", + "value": "]" + } + ] + }, + + "array_type": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "[" + }, + { + "type": "REPEAT", + "content": { + "type": "CHOICE", + "members": [ + {"type": "SYMBOL", "name": "identifier"}, + {"type": "STRING", "value": "void"} + ] + } + }, + { + "type": "STRING", + "value": "]" + } + ] + }, + + "identifier": {"type": "PATTERN", "value": "\\a+"} + } +} \ No newline at end of file diff --git a/test/fixtures/test_grammars/conflict_in_repeat_rule/readme.md b/test/fixtures/test_grammars/conflict_in_repeat_rule/readme.md new file mode 100644 index 00000000..be4e65ed --- /dev/null +++ b/test/fixtures/test_grammars/conflict_in_repeat_rule/readme.md @@ -0,0 +1 @@ +This grammar has a conflict that involves *repeat rules*: auxiliary rules that are added by the parser generator in order to implement repetition. There is no way of referring to these rules in the grammar DSL, so these conflicts must be resolved by referring to their parent rules. \ No newline at end of file