From c9a482bbf375676a087e044920c9d5834106f563 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@github.com>
Date: Fri, 26 Jun 2015 16:14:08 -0500
Subject: [PATCH] Add expected_conflicts field to grammar

---
 include/tree_sitter/compiler.h                |  3 +
 .../build_tables/build_parse_table_spec.cc    |  2 +-
 .../build_tables/item_set_transitions_spec.cc |  2 +-
 .../parse_conflict_manager_spec.cc            |  2 +-
 .../build_tables/rule_can_be_blank_spec.cc    |  2 +-
 .../prepare_grammar/expand_repeats_spec.cc    | 12 +--
 .../prepare_grammar/extract_tokens_spec.cc    | 84 ++++++++++++-------
 .../prepare_grammar/intern_symbols_spec.cc    |  6 +-
 src/compiler/grammar.cc                       |  9 ++
 .../prepare_grammar/expand_repeats.cc         |  3 +-
 .../prepare_grammar/extract_tokens.cc         | 16 +++-
 src/compiler/prepare_grammar/extract_tokens.h |  3 +-
 .../prepare_grammar/intern_symbols.cc         | 35 +++++---
 src/compiler/prepare_grammar/intern_symbols.h |  3 +-
 .../prepare_grammar/interned_grammar.h        | 24 ++++++
 src/compiler/syntax_grammar.cc                |  6 +-
 src/compiler/syntax_grammar.h                 |  6 +-
 17 files changed, 151 insertions(+), 67 deletions(-)
 create mode 100644 src/compiler/prepare_grammar/interned_grammar.h
diff --git a/include/tree_sitter/compiler.h b/include/tree_sitter/compiler.h
index 71f11e0a..0a9b2245 100644
--- a/include/tree_sitter/compiler.h
+++ b/include/tree_sitter/compiler.h
@@ -38,6 +38,7 @@ std::ostream &operator<<(std::ostream &stream, const rules::rule_ptr &rule);
 class Grammar {
   const std::vector<std::pair<std::string, rules::rule_ptr>> rules_;
   std::set<rules::rule_ptr> ubiquitous_tokens_;
+  std::set<std::set<std::string>> expected_conflicts_;
 
  public:
   explicit Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &);
@@ -47,6 +48,8 @@ class Grammar {
   const std::vector<std::pair<std::string, rules::rule_ptr>> &rules() const;
   const std::set<rules::rule_ptr> &ubiquitous_tokens() const;
   Grammar &ubiquitous_tokens(const std::set<rules::rule_ptr> &);
+  const std::set<std::set<std::string>> &expected_conflicts() const;
+  Grammar &expected_conflicts(const std::set<std::set<std::string>> &);
 };
 
 enum GrammarErrorType {
diff --git a/spec/compiler/build_tables/build_parse_table_spec.cc b/spec/compiler/build_tables/build_parse_table_spec.cc
index 3057f3e5..1a45b37e 100644
--- a/spec/compiler/build_tables/build_parse_table_spec.cc
+++ b/spec/compiler/build_tables/build_parse_table_spec.cc
@@ -15,7 +15,7 @@ describe("build_parse_table", []() {
       { "rule0", choice({ i_sym(1), i_sym(2) }) },
       { "rule1", i_token(0) },
       { "rule2", i_token(1) },
-  }, {}, { Symbol(2, SymbolOptionToken) });
+  }, {}, { Symbol(2, SymbolOptionToken) }, set<set<Symbol>>());
 
   LexicalGrammar lex_grammar({
       { "token0", pattern("[a-c]") },
diff --git a/spec/compiler/build_tables/item_set_transitions_spec.cc b/spec/compiler/build_tables/item_set_transitions_spec.cc
index 78a78a16..e67d72b0 100644
--- a/spec/compiler/build_tables/item_set_transitions_spec.cc
+++ b/spec/compiler/build_tables/item_set_transitions_spec.cc
@@ -45,7 +45,7 @@ describe("sym_transitions(ParseItemSet, SyntaxGrammar)", [&]() {
   SyntaxGrammar grammar({
       { "A", blank() },
       { "B", i_token(21) },
-  }, {}, set<Symbol>());
+  }, {}, set<Symbol>(), set<set<Symbol>>());
 
   it("computes the closure of the new item sets", [&]() {
     ParseItemSet set1({
diff --git a/spec/compiler/build_tables/parse_conflict_manager_spec.cc b/spec/compiler/build_tables/parse_conflict_manager_spec.cc
index 34b8e506..de9375c8 100644
--- a/spec/compiler/build_tables/parse_conflict_manager_spec.cc
+++ b/spec/compiler/build_tables/parse_conflict_manager_spec.cc
@@ -16,7 +16,7 @@ describe("ParseConflictManager", []() {
     { "reduced_rule", i_token(0) },
     { "other_rule1", i_token(0) },
     { "other_rule2", i_token(0) },
-  }, {}, { Symbol(2, SymbolOptionToken) });
+  }, {}, { Symbol(2, SymbolOptionToken) }, set<set<Symbol>>());
 
   LexicalGrammar lexical_grammar({
     { "other_token", pattern("[a-b]") },
diff --git a/spec/compiler/build_tables/rule_can_be_blank_spec.cc b/spec/compiler/build_tables/rule_can_be_blank_spec.cc
index 4c0d03fd..1e9716f1 100644
--- a/spec/compiler/build_tables/rule_can_be_blank_spec.cc
+++ b/spec/compiler/build_tables/rule_can_be_blank_spec.cc
@@ -63,7 +63,7 @@ describe("rule_can_be_blank", [&]() {
         { "B", choice({
             seq({ i_sym(1), i_token(12) }),
             i_token(13) }) },
-    }, {}, set<Symbol>());
+    }, {}, set<Symbol>(), set<set<Symbol>>());
 
     it("terminates for left-recursive rules that can be blank", [&]() {
       rule = i_sym(0);
diff --git a/spec/compiler/prepare_grammar/expand_repeats_spec.cc b/spec/compiler/prepare_grammar/expand_repeats_spec.cc
index b54049fc..ed993f11 100644
--- a/spec/compiler/prepare_grammar/expand_repeats_spec.cc
+++ b/spec/compiler/prepare_grammar/expand_repeats_spec.cc
@@ -12,7 +12,7 @@ describe("expand_repeats", []() {
   it("replaces repeat rules with pairs of recursive rules", [&]() {
     SyntaxGrammar grammar({
         { "rule0", repeat(i_token(0)) },
-    }, {}, set<Symbol>());
+    }, {}, set<Symbol>(), set<set<Symbol>>());
 
     auto match = expand_repeats(grammar);
 
@@ -32,7 +32,7 @@ describe("expand_repeats", []() {
         { "rule0", seq({
             i_token(10),
             repeat(i_token(11)) }) },
-    }, {}, set<Symbol>());
+    }, {}, set<Symbol>(), set<set<Symbol>>());
 
     auto match = expand_repeats(grammar);
 
@@ -52,7 +52,7 @@ describe("expand_repeats", []() {
   it("replaces repeats inside of choices", [&]() {
     SyntaxGrammar grammar({
         { "rule0", choice({ i_token(10), repeat(i_token(11)) }) },
-    }, {}, set<Symbol>());
+    }, {}, set<Symbol>(), set<set<Symbol>>());
 
     auto match = expand_repeats(grammar);
 
@@ -73,7 +73,7 @@ describe("expand_repeats", []() {
             seq({ i_token(1), repeat(i_token(4)) }),
             seq({ i_token(2), repeat(i_token(4)) }) }) },
         { "rule1", seq({ i_token(3), repeat(i_token(4)) }) },
-    }, {}, set<Symbol>());
+    }, {}, set<Symbol>(), set<set<Symbol>>());
 
     auto match = expand_repeats(grammar);
 
@@ -96,7 +96,7 @@ describe("expand_repeats", []() {
         { "rule0", seq({
             repeat(i_token(10)),
             repeat(i_token(11)) }) },
-    }, {}, set<Symbol>());
+    }, {}, set<Symbol>(), set<set<Symbol>>());
 
     auto match = expand_repeats(grammar);
 
@@ -120,7 +120,7 @@ describe("expand_repeats", []() {
     SyntaxGrammar grammar({
         { "rule0", repeat(i_token(10)) },
         { "rule1", repeat(i_token(11)) },
-    }, {}, set<Symbol>());
+    }, {}, set<Symbol>(), set<set<Symbol>>());
 
     auto match = expand_repeats(grammar);
 
diff --git a/spec/compiler/prepare_grammar/extract_tokens_spec.cc b/spec/compiler/prepare_grammar/extract_tokens_spec.cc
index 40d614d4..1258e864 100644
--- a/spec/compiler/prepare_grammar/extract_tokens_spec.cc
+++ b/spec/compiler/prepare_grammar/extract_tokens_spec.cc
@@ -1,6 +1,7 @@
 #include "compiler/compiler_spec_helper.h"
 #include "compiler/lexical_grammar.h"
 #include "compiler/syntax_grammar.h"
+#include "compiler/prepare_grammar/interned_grammar.h"
 #include "compiler/prepare_grammar/extract_tokens.h"
 #include "compiler/helpers/containers.h"
 
@@ -8,13 +9,16 @@ START_TEST
 
 using namespace rules;
 using prepare_grammar::extract_tokens;
+using prepare_grammar::InternedGrammar;
 
 describe("extract_tokens", []() {
+  const set<rules::rule_ptr> no_ubiquitous_tokens;
+  const set<set<rules::Symbol>> no_expected_conflicts;
+
   it("moves string rules into the lexical grammar", [&]() {
-    tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> result =
-        extract_tokens(Grammar({
-            { "rule_A", seq({ str("ab"), i_sym(0) }) }
-        }));
+    auto result = extract_tokens(InternedGrammar{{
+        { "rule_A", seq({ str("ab"), i_sym(0) }) }
+    }, no_ubiquitous_tokens, no_expected_conflicts});
 
     AssertThat(get<0>(result).rules, Equals(rule_list({
         { "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
@@ -28,9 +32,9 @@ describe("extract_tokens", []() {
   });
 
   it("moves pattern rules into the lexical grammar", [&]() {
-    auto result = extract_tokens(Grammar({
+    auto result = extract_tokens(InternedGrammar{{
         { "rule_A", seq({ pattern("a+"), i_sym(0) }) }
-    }));
+    }, no_ubiquitous_tokens, no_expected_conflicts});
 
     AssertThat(get<0>(result).rules, Equals(rule_list({
         { "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
@@ -44,11 +48,11 @@ describe("extract_tokens", []() {
   });
 
   it("moves other rules marked as tokens into the lexical grammar", [&]() {
-    auto result = extract_tokens(Grammar({
+    auto result = extract_tokens(InternedGrammar{{
         { "rule_A", seq({
             token(seq({ pattern("."), choice({ str("a"), str("b") }) })),
             i_sym(0) }) }
-    }));
+    }, no_ubiquitous_tokens, no_expected_conflicts});
 
     AssertThat(get<0>(result).rules, Equals(rule_list({
         { "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
@@ -62,9 +66,9 @@ describe("extract_tokens", []() {
   });
 
   it("does not move blank rules", [&]() {
-    auto result = extract_tokens(Grammar({
+    auto result = extract_tokens(InternedGrammar{{
         { "rule_A", choice({ i_sym(0), blank() }) },
-    }));
+    }, no_ubiquitous_tokens, no_expected_conflicts});
 
     AssertThat(get<0>(result).rules, Equals(rule_list({
         { "rule_A", choice({ i_sym(0), blank() }) },
@@ -76,9 +80,9 @@ describe("extract_tokens", []() {
   });
 
   it("does not create duplicate tokens in the lexical grammar", [&]() {
-    auto result = extract_tokens(Grammar({
+    auto result = extract_tokens(InternedGrammar{{
         { "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
-    }));
+    }, no_ubiquitous_tokens, no_expected_conflicts});
 
     AssertThat(get<0>(result).rules, Equals(rule_list({
         { "rule_A", seq({ i_aux_token(0), i_sym(0), i_aux_token(0) }) }
@@ -91,13 +95,31 @@ describe("extract_tokens", []() {
     })))
   });
 
+  it("updates the grammar's expected conflict symbols", [&]() {
+    auto result = extract_tokens(InternedGrammar{
+      {
+        { "rule_A", str("ok") },
+        { "rule_B", repeat(i_sym(0)) },
+        { "rule_C", repeat(seq({ i_sym(0), i_sym(0) })) },
+      },
+      { str(" ") },
+      { { Symbol(1), Symbol(2) } }
+    });
+
+    AssertThat(get<0>(result).rules.size(), Equals<size_t>(2));
+    AssertThat(get<1>(result).rules.size(), Equals<size_t>(1));
+    AssertThat(get<0>(result).expected_conflicts, Equals(set<set<Symbol>>({
+      { Symbol(0), Symbol(1) },
+    })));
+  });
+
   describe("when an entire rule can be extracted", [&]() {
     it("moves the rule the lexical grammar when possible and updates referencing symbols", [&]() {
-      auto result = extract_tokens(Grammar({
+      auto result = extract_tokens(InternedGrammar{{
           { "rule_A", i_sym(1) },
           { "rule_B", pattern("a|b") },
           { "rule_C", token(seq({ str("a"), str("b") })) },
-      }));
+      }, no_ubiquitous_tokens, no_expected_conflicts});
 
       AssertThat(get<0>(result).rules, Equals(rule_list({
           { "rule_A", i_token(0) }
@@ -112,11 +134,11 @@ describe("extract_tokens", []() {
     });
 
     it("updates symbols whose indices need to change due to deleted rules", [&]() {
-      auto result = extract_tokens(Grammar({
+      auto result = extract_tokens(InternedGrammar{{
           { "rule_A", str("ab") },
           { "rule_B", i_sym(0) },
           { "rule_C", i_sym(1) },
-      }));
+      }, no_ubiquitous_tokens, no_expected_conflicts});
 
       AssertThat(get<0>(result).rules, Equals(rule_list({
           { "rule_B", i_token(0) },
@@ -134,12 +156,12 @@ describe("extract_tokens", []() {
   describe("handling ubiquitous tokens", [&]() {
     describe("ubiquitous tokens that are not symbols", [&]() {
       it("adds them to the lexical grammar's separators", [&]() {
-        auto result = extract_tokens(Grammar({
+        auto result = extract_tokens(InternedGrammar{{
             { "rule_A", str("x") },
-        }).ubiquitous_tokens({
+        }, {
             pattern("\\s+"),
             str("y"),
-        }));
+        }, no_expected_conflicts});
 
         AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
 
@@ -154,13 +176,13 @@ describe("extract_tokens", []() {
 
     describe("ubiquitous tokens that point to moved rules", [&]() {
       it("updates them according to the new symbol numbers", [&]() {
-        auto result = extract_tokens(Grammar( {
+        auto result = extract_tokens(InternedGrammar{ {
             { "rule_A", seq({ str("w"), i_sym(1) }) },
             { "rule_B", str("x") },
             { "rule_C", str("y") },
-        }).ubiquitous_tokens({
+        }, {
             i_sym(2),
-        }));
+        }, no_expected_conflicts});
 
         AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
 
@@ -174,10 +196,10 @@ describe("extract_tokens", []() {
 
     describe("ubiquitous tokens that are visible", [&]() {
       it("preserves them in the syntactic grammar", [&]() {
-        auto result = extract_tokens(Grammar({
+        auto result = extract_tokens(InternedGrammar{{
             { "rule_A", str("ab") },
             { "rule_B", str("bc") },
-        }).ubiquitous_tokens({ i_sym(1) }));
+        }, { i_sym(1) }, no_expected_conflicts});
 
         AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
 
@@ -191,10 +213,10 @@ describe("extract_tokens", []() {
 
     describe("ubiquitous tokens that are used in other grammar rules", [&]() {
       it("preserves them in the syntactic grammar", [&]() {
-        auto result = extract_tokens(Grammar({
+        auto result = extract_tokens(InternedGrammar{{
             { "rule_A", seq({ i_sym(1), str("ab") }) },
             { "_rule_B", str("bc") },
-        }).ubiquitous_tokens({ i_sym(1) }));
+        }, { i_sym(1) }, no_expected_conflicts});
 
         AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
 
@@ -208,10 +230,10 @@ describe("extract_tokens", []() {
 
     describe("ubiquitous tokens that are non-token symbols", [&]() {
       it("returns an error", [&]() {
-        auto result = extract_tokens(Grammar({
+        auto result = extract_tokens(InternedGrammar{{
             { "rule_A", seq({ str("x"), i_sym(1) }), },
             { "rule_B", seq({ str("y"), str("z") }) },
-        }).ubiquitous_tokens({ i_sym(1) }));
+        }, { i_sym(1) }, no_expected_conflicts});
 
         AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
         AssertThat(get<2>(result), EqualsPointer(
@@ -220,12 +242,12 @@ describe("extract_tokens", []() {
       });
     });
 
-    describe("ubiquitous tokens that are non-token symbols", [&]() {
+    describe("ubiquitous tokens that are not symbols", [&]() {
       it("returns an error", [&]() {
-        auto result = extract_tokens(Grammar({
+        auto result = extract_tokens(InternedGrammar{{
             { "rule_A", str("x") },
             { "rule_B", str("y") },
-        }).ubiquitous_tokens({ choice({ i_sym(1), blank() }) }));
+        }, { choice({ i_sym(1), blank() }) }, no_expected_conflicts});
 
         AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
         AssertThat(get<2>(result), EqualsPointer(
diff --git a/spec/compiler/prepare_grammar/intern_symbols_spec.cc b/spec/compiler/prepare_grammar/intern_symbols_spec.cc
index 0c486182..5b70d9b6 100644
--- a/spec/compiler/prepare_grammar/intern_symbols_spec.cc
+++ b/spec/compiler/prepare_grammar/intern_symbols_spec.cc
@@ -20,7 +20,7 @@ describe("intern_symbols", []() {
     auto result = intern_symbols(grammar);
 
     AssertThat(result.second, Equals((GrammarError *)nullptr));
-    AssertThat(result.first.rules(), Equals(rule_list({
+    AssertThat(result.first.rules, Equals(rule_list({
         { "x", choice({ i_sym(1), i_sym(2) }) },
         { "y", i_sym(2) },
         { "z", str("stuff") },
@@ -49,8 +49,8 @@ describe("intern_symbols", []() {
     auto result = intern_symbols(grammar);
 
     AssertThat(result.second, Equals((GrammarError *)nullptr));
-    AssertThat(result.first.ubiquitous_tokens().size(), Equals<size_t>(1));
-    AssertThat(*result.first.ubiquitous_tokens().begin(), EqualsPointer(i_sym(2)));
+    AssertThat(result.first.ubiquitous_tokens.size(), Equals<size_t>(1));
+    AssertThat(*result.first.ubiquitous_tokens.begin(), EqualsPointer(i_sym(2)));
   });
 });
 
diff --git a/src/compiler/grammar.cc b/src/compiler/grammar.cc
index 715c4277..0376d77d 100644
--- a/src/compiler/grammar.cc
+++ b/src/compiler/grammar.cc
@@ -69,6 +69,15 @@ Grammar &Grammar::ubiquitous_tokens(const set<rule_ptr> &ubiquitous_tokens) {
   return *this;
 }
 
+const set<set<string>> &Grammar::expected_conflicts() const {
+  return expected_conflicts_;
+}
+
+Grammar &Grammar::expected_conflicts(const set<set<string>> &expected_conflicts) {
+  expected_conflicts_ = expected_conflicts;
+  return *this;
+}
+
 const vector<pair<string, rule_ptr>> &Grammar::rules() const { return rules_; }
 
 }  // namespace tree_sitter
diff --git a/src/compiler/prepare_grammar/expand_repeats.cc b/src/compiler/prepare_grammar/expand_repeats.cc
index 695fd0ea..2ef1a191 100644
--- a/src/compiler/prepare_grammar/expand_repeats.cc
+++ b/src/compiler/prepare_grammar/expand_repeats.cc
@@ -78,7 +78,8 @@ SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
   aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(),
                    expander.aux_rules.end());
 
-  return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens);
+  return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens,
+                       grammar.expected_conflicts);
 }
 
 }  // namespace prepare_grammar
diff --git a/src/compiler/prepare_grammar/extract_tokens.cc b/src/compiler/prepare_grammar/extract_tokens.cc
index 4dbf854b..510e320f 100644
--- a/src/compiler/prepare_grammar/extract_tokens.cc
+++ b/src/compiler/prepare_grammar/extract_tokens.cc
@@ -100,7 +100,7 @@ static tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> ubiq_token_err
 }
 
 tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
-    const Grammar &grammar) {
+    const InternedGrammar &grammar) {
   vector<pair<string, rule_ptr>> rules, tokens;
   vector<rule_ptr> separators;
   set<Symbol> ubiquitous_tokens;
@@ -109,7 +109,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
   TokenExtractor extractor;
 
   size_t i = 0;
-  for (auto &pair : grammar.rules()) {
+  for (auto &pair : grammar.rules) {
     if (is_token(pair.second)) {
       tokens.push_back(pair);
       symbol_replacer.replacements.insert(
@@ -123,7 +123,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
   for (auto &pair : rules)
     pair.second = symbol_replacer.apply(pair.second);
 
-  for (auto &rule : grammar.ubiquitous_tokens()) {
+  for (auto &rule : grammar.ubiquitous_tokens) {
     if (is_token(rule)) {
       separators.push_back(rule);
     } else {
@@ -139,7 +139,15 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
     }
   }
 
-  return make_tuple(SyntaxGrammar(rules, {}, ubiquitous_tokens),
+  set<set<rules::Symbol>> expected_conflicts;
+  for (auto &symbol_set : grammar.expected_conflicts) {
+    set<Symbol> new_symbol_set;
+    for (const Symbol &symbol : symbol_set)
+      new_symbol_set.insert(symbol_replacer.replace_symbol(symbol));
+    expected_conflicts.insert(new_symbol_set);
+  }
+
+  return make_tuple(SyntaxGrammar(rules, {}, ubiquitous_tokens, expected_conflicts),
                     LexicalGrammar(tokens, extractor.tokens, separators),
                     nullptr);
 }
diff --git a/src/compiler/prepare_grammar/extract_tokens.h b/src/compiler/prepare_grammar/extract_tokens.h
index 1f3b3413..8dfa0c6a 100644
--- a/src/compiler/prepare_grammar/extract_tokens.h
+++ b/src/compiler/prepare_grammar/extract_tokens.h
@@ -3,6 +3,7 @@
 
 #include <utility>
 #include "tree_sitter/compiler.h"
+#include "compiler/prepare_grammar/interned_grammar.h"
 
 namespace tree_sitter {
 
@@ -13,7 +14,7 @@ class LexicalGrammar;
 namespace prepare_grammar {
 
 std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
-    const Grammar &);
+    const InternedGrammar &);
 
 }  // namespace prepare_grammar
 }  // namespace tree_sitter
diff --git a/src/compiler/prepare_grammar/intern_symbols.cc b/src/compiler/prepare_grammar/intern_symbols.cc
index c5364cf3..657f84c0 100644
--- a/src/compiler/prepare_grammar/intern_symbols.cc
+++ b/src/compiler/prepare_grammar/intern_symbols.cc
@@ -22,8 +22,10 @@ class InternSymbols : public rules::IdentityRuleFn {
 
   rule_ptr apply_to(const rules::NamedSymbol *rule) {
     auto result = symbol_for_rule_name(rule->name);
-    if (!result.get())
+    if (!result.get()) {
       missing_rule_name = rule->name;
+      return rules::blank();
+    }
     return result;
   }
 
@@ -40,31 +42,40 @@ class InternSymbols : public rules::IdentityRuleFn {
   string missing_rule_name;
 };
 
-pair<Grammar, const GrammarError *> missing_rule_error(string rule_name) {
-  return { Grammar({}), new GrammarError(GrammarErrorTypeUndefinedSymbol,
-                                         "Undefined rule '" + rule_name + "'") };
+const GrammarError * missing_rule_error(string rule_name) {
+  return new GrammarError(GrammarErrorTypeUndefinedSymbol,
+                          "Undefined rule '" + rule_name + "'");
 }
 
-pair<Grammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
+pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
+  InternedGrammar result;
   InternSymbols interner(grammar);
-  vector<pair<string, rule_ptr>> rules;
 
   for (auto &pair : grammar.rules()) {
     auto new_rule = interner.apply(pair.second);
     if (!interner.missing_rule_name.empty())
-      return missing_rule_error(interner.missing_rule_name);
-    rules.push_back({ pair.first, new_rule });
+      return {result, missing_rule_error(interner.missing_rule_name)};
+    result.rules.push_back({ pair.first, new_rule });
   }
 
-  set<rules::rule_ptr> ubiquitous_tokens;
   for (auto &rule : grammar.ubiquitous_tokens()) {
     auto new_rule = interner.apply(rule);
     if (!interner.missing_rule_name.empty())
-      return missing_rule_error(interner.missing_rule_name);
-    ubiquitous_tokens.insert(new_rule);
+      return {result, missing_rule_error(interner.missing_rule_name)};
+    result.ubiquitous_tokens.insert(new_rule);
   }
 
-  return { Grammar(rules).ubiquitous_tokens(ubiquitous_tokens), nullptr };
+  for (auto &names : grammar.expected_conflicts()) {
+    set<rules::Symbol> entry;
+    for (auto &name : names) {
+      auto symbol = interner.symbol_for_rule_name(name);
+      if (symbol.get())
+        entry.insert(*symbol);
+    }
+    result.expected_conflicts.insert(entry);
+  }
+
+  return { result, nullptr };
 }
 
 }  // namespace prepare_grammar
diff --git a/src/compiler/prepare_grammar/intern_symbols.h b/src/compiler/prepare_grammar/intern_symbols.h
index 9530f90d..1818ce91 100644
--- a/src/compiler/prepare_grammar/intern_symbols.h
+++ b/src/compiler/prepare_grammar/intern_symbols.h
@@ -4,6 +4,7 @@
 #include <utility>
 #include <string>
 #include "tree_sitter/compiler.h"
+#include "compiler/prepare_grammar/interned_grammar.h"
 
 namespace tree_sitter {
 
@@ -11,7 +12,7 @@ class Grammar;
 
 namespace prepare_grammar {
 
-std::pair<Grammar, const GrammarError *> intern_symbols(const Grammar &);
+std::pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &);
 
 }  // namespace prepare_grammar
 }  // namespace tree_sitter
diff --git a/src/compiler/prepare_grammar/interned_grammar.h b/src/compiler/prepare_grammar/interned_grammar.h
new file mode 100644
index 00000000..c29cd1b5
--- /dev/null
+++ b/src/compiler/prepare_grammar/interned_grammar.h
@@ -0,0 +1,24 @@
+#ifndef COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
+#define COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
+
+#include <string>
+#include <set>
+#include <utility>
+#include <vector>
+#include "tree_sitter/compiler.h"
+#include "compiler/rules/symbol.h"
+
+namespace tree_sitter {
+namespace prepare_grammar {
+
+struct InternedGrammar {
+  std::vector<std::pair<std::string, rules::rule_ptr>> rules;
+  std::set<rules::rule_ptr> ubiquitous_tokens;
+  std::set<std::set<rules::Symbol>> expected_conflicts;
+};
+
+}  // namespace prepare_grammar
+}  // namespace tree_sitter
+
+
+#endif  // COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
diff --git a/src/compiler/syntax_grammar.cc b/src/compiler/syntax_grammar.cc
index 88192065..a58282ce 100644
--- a/src/compiler/syntax_grammar.cc
+++ b/src/compiler/syntax_grammar.cc
@@ -19,8 +19,10 @@ SyntaxGrammar::SyntaxGrammar(const vector<pair<string, rules::rule_ptr>> &rules,
 
 SyntaxGrammar::SyntaxGrammar(const vector<pair<string, rules::rule_ptr>> &rules,
                              const vector<pair<string, rules::rule_ptr>> &aux_rules,
-                             const set<rules::Symbol> &ubiquitous_tokens)
-    : rules(rules), aux_rules(aux_rules), ubiquitous_tokens(ubiquitous_tokens) {}
+                             const set<rules::Symbol> &ubiquitous_tokens,
+                             const set<set<rules::Symbol>> &expected_conflicts)
+    : rules(rules), aux_rules(aux_rules),
+      ubiquitous_tokens(ubiquitous_tokens), expected_conflicts(expected_conflicts) {}
 
 const rules::rule_ptr &SyntaxGrammar::rule(const rules::Symbol &symbol) const {
   return symbol.is_auxiliary() ? aux_rules[symbol.index].second
diff --git a/src/compiler/syntax_grammar.h b/src/compiler/syntax_grammar.h
index 101ca78f..379bd6e1 100644
--- a/src/compiler/syntax_grammar.h
+++ b/src/compiler/syntax_grammar.h
@@ -19,14 +19,16 @@ class SyntaxGrammar {
   SyntaxGrammar(
       const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
       const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
-      const std::set<rules::Symbol> &ubiquitous_tokens);
+      const std::set<rules::Symbol> &ubiquitous_tokens,
+      const std::set<std::set<rules::Symbol>> &expected_conflicts);
 
   const std::string &rule_name(const rules::Symbol &symbol) const;
   const rules::rule_ptr &rule(const rules::Symbol &symbol) const;
-  
+
   const std::vector<std::pair<std::string, rules::rule_ptr>> rules;
   const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;
   std::set<rules::Symbol> ubiquitous_tokens;
+  std::set<std::set<rules::Symbol>> expected_conflicts;
 };
 
 }  // namespace tree_sitter