Allow conflicts involving repeat rules to be whitelisted, via their parent rule

2017-08-03 15:18:29 -07:00 · 2017-08-03 15:18:29 -07:00 · 84e4114f79
commit 84e4114f79
parent 119c67dd78
4 changed files with 119 additions and 9 deletions
--- a/src/compiler/build_tables/build_parse_table.cc
+++ b/src/compiler/build_tables/build_parse_table.cc
@ -44,6 +44,7 @@ class ParseTableBuilder {
  const LexicalGrammar lexical_grammar;
  unordered_map<Symbol, ParseItemSet> recovery_states;
  unordered_map<ParseItemSet, ParseStateId> parse_state_ids;
+  vector<const ParseItemSet *> item_sets_by_state_id;
  deque<ParseStateQueueEntry> parse_state_queue;
  ParseTable parse_table;
  set<string> conflicts;
@ -168,19 +169,19 @@ class ParseTableBuilder {
  }

  ParseStateId add_parse_state(SymbolSequence &&preceding_symbols, const ParseItemSet &item_set) {
-    auto pair = parse_state_ids.find(item_set);
-    if (pair == parse_state_ids.end()) {
-      ParseStateId state_id = parse_table.states.size();
+    ParseStateId new_state_id = parse_table.states.size();
+    auto insertion = parse_state_ids.insert({move(item_set), new_state_id});
+    if (insertion.second) {
+      item_sets_by_state_id.push_back(&insertion.first->first);
      parse_table.states.push_back(ParseState());
-      parse_state_ids[item_set] = state_id;
      parse_state_queue.push_back({
        move(preceding_symbols),
-        move(item_set),
-        state_id
+        insertion.first->first,
+        new_state_id
      });
-      return state_id;
+      return new_state_id;
    } else {
-      return pair->second;
+      return insertion.first->second;
    }
  }

@ -603,7 +604,25 @@ class ParseTableBuilder {

    set<Symbol> actual_conflict;
    for (const ParseItem &item : conflicting_items) {
-      actual_conflict.insert(item.lhs());
+      Symbol symbol = item.lhs();
+      if (grammar.variables[symbol.index].type == VariableTypeAuxiliary) {
+        ParseStateId preceding_state_id = 1;
+        for (auto &preceding_symbol : preceding_symbols) {
+          ParseState &preceding_state = parse_table.states[preceding_state_id];
+          if (preceding_state.nonterminal_entries.count(symbol.index)) break;
+          preceding_state_id = preceding_symbol.is_terminal() ?
+            preceding_state.terminal_entries[preceding_symbol].actions.back().state_index :
+            preceding_state.nonterminal_entries[preceding_symbol.index];
+        }
+        const ParseItemSet &preceding_item_set = *item_sets_by_state_id[preceding_state_id];
+        for (auto &preceding_entry : preceding_item_set.entries) {
+          if (preceding_entry.first.next_symbol() == symbol) {
+            actual_conflict.insert(preceding_entry.first.lhs());
+          }
+        }
+      } else {
+        actual_conflict.insert(symbol);
+      }
    }

    for (const auto &expected_conflict : grammar.expected_conflicts) {
--- a/test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt
+++ b/test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt
@ -0,0 +1,14 @@
+Unresolved conflict for symbol sequence:
+
+  '['  identifier  •  ']'  …
+
+Possible interpretations:
+
+  1:  '['  (array_repeat1  identifier)  •  ']'  …
+  2:  '['  (array_type_repeat1  identifier)  •  ']'  …
+
+Possible resolutions:
+
+  1:  Specify a higher precedence in `array_repeat1` than in the other rules.
+  2:  Specify a higher precedence in `array_type_repeat1` than in the other rules.
+  3:  Add a conflict for these rules: `array` `array_type`
--- a/test/fixtures/test_grammars/conflict_in_repeat_rule/grammar.json
+++ b/test/fixtures/test_grammars/conflict_in_repeat_rule/grammar.json
@ -0,0 +1,76 @@
+{
+  "name": "conflict_in_repeat_rule",
+
+  "rules": {
+    "statement": {
+      "type": "CHOICE",
+      "members": [
+        {
+          "type": "SEQ",
+          "members": [
+            {"type": "SYMBOL", "name": "array"},
+            {"type": "STRING", "value": ";"}
+          ]
+        },
+        {
+          "type": "SEQ",
+          "members": [
+            {"type": "SYMBOL", "name": "array_type"},
+            {"type": "SYMBOL", "name": "identifier"},
+            {"type": "STRING", "value": ";"}
+          ]
+        }
+      ]
+    },
+
+    "array": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "STRING",
+          "value": "["
+        },
+        {
+          "type": "REPEAT",
+          "content": {
+            "type": "CHOICE",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "STRING", "value": "0"}
+            ]
+          }
+        },
+        {
+          "type": "STRING",
+          "value": "]"
+        }
+      ]
+    },
+
+    "array_type": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "STRING",
+          "value": "["
+        },
+        {
+          "type": "REPEAT",
+          "content": {
+            "type": "CHOICE",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "STRING", "value": "void"}
+            ]
+          }
+        },
+        {
+          "type": "STRING",
+          "value": "]"
+        }
+      ]
+    },
+
+    "identifier": {"type": "PATTERN", "value": "\\a+"}
+  }
+}
--- a/test/fixtures/test_grammars/conflict_in_repeat_rule/readme.md
+++ b/test/fixtures/test_grammars/conflict_in_repeat_rule/readme.md
@ -0,0 +1 @@
+This grammar has a conflict that involves *repeat rules*: auxiliary rules that are added by the  parser generator in order to implement repetition. There is no way of referring to these rules in the grammar DSL, so these conflicts must be resolved by referring to their parent rules.
				`@ -0,0 +1 @@`
				`This grammar has a conflict that involves repeat rules: auxiliary rules that are added by the parser generator in order to implement repetition. There is no way of referring to these rules in the grammar DSL, so these conflicts must be resolved by referring to their parent rules.`