From 65bf1389e1e63f04f8f4b47753d79580130a0beb Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 11 Jul 2017 21:17:27 -0700
Subject: [PATCH 1/4] Add a way to automatically inline rules

---
 src/compiler/build_tables/parse_item.cc       |   4 +-
 .../build_tables/parse_item_set_builder.cc    | 103 ++++++++++++++++--
 .../build_tables/parse_item_set_builder.h     |   9 ++
 src/compiler/grammar.h                        |   1 +
 src/compiler/parse_grammar.cc                 |  20 +++-
 src/compiler/parse_grammar.h                  |   1 +
 .../prepare_grammar/expand_repeats.cc         |   1 +
 .../prepare_grammar/extract_tokens.cc         |   4 +
 .../prepare_grammar/flatten_grammar.cc        |   1 +
 .../prepare_grammar/initial_syntax_grammar.h  |   1 +
 .../prepare_grammar/intern_symbols.cc         |   7 ++
 .../prepare_grammar/interned_grammar.h        |   1 +
 src/compiler/syntax_grammar.h                 |   1 +
 .../test_grammars/inline_rules/corpus.txt     |  11 ++
 .../test_grammars/inline_rules/grammar.json   |  68 ++++++++++++
 15 files changed, 219 insertions(+), 14 deletions(-)
 create mode 100644 test/fixtures/test_grammars/inline_rules/corpus.txt
 create mode 100644 test/fixtures/test_grammars/inline_rules/grammar.json
diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc
index 0ef56e79..2d4257b6 100644
--- a/src/compiler/build_tables/parse_item.cc
+++ b/src/compiler/build_tables/parse_item.cc
@@ -156,14 +156,14 @@ struct hash<ParseItem> {
     if (item.is_done()) {
       if (!item.production->empty()) {
         hash_combine(&result, item.production->back().precedence);
-        hash_combine(&result, item.production->back().associativity);
+        hash_combine<unsigned>(&result, item.production->back().associativity);
       }
     } else {
       for (size_t i = 0, n = item.production->size(); i < n; i++) {
         auto &step = item.production->at(i);
         hash_combine(&result, step.symbol);
         hash_combine(&result, step.precedence);
-        hash_combine(&result, step.associativity);
+        hash_combine<unsigned>(&result, step.associativity);
       }
     }
     return result;
diff --git a/src/compiler/build_tables/parse_item_set_builder.cc b/src/compiler/build_tables/parse_item_set_builder.cc
index 236be6f0..3b119157 100644
--- a/src/compiler/build_tables/parse_item_set_builder.cc
+++ b/src/compiler/build_tables/parse_item_set_builder.cc
@@ -1,4 +1,5 @@
 #include "compiler/build_tables/parse_item_set_builder.h"
+#include <algorithm>
 #include <cassert>
 #include <set>
 #include <unordered_map>
@@ -11,8 +12,10 @@
 namespace tree_sitter {
 namespace build_tables {
 
+using std::move;
 using std::vector;
 using std::set;
+using std::find;
 using std::get;
 using std::pair;
 using std::tuple;
@@ -21,8 +24,36 @@ using std::make_tuple;
 using rules::Symbol;
 using rules::NONE;
 
+static vector<Production> inline_production(const ParseItem &item, const SyntaxGrammar &grammar) {
+  vector<Production> result;
+  for (const Production &production_to_insert : grammar.variables[item.next_symbol().index].productions) {
+    auto begin = item.production->steps.begin();
+    auto end = item.production->steps.end();
+    auto step = begin + item.step_index;
+
+    Production production{{begin, step}, item.production->dynamic_precedence};
+    production.steps.insert(
+      production.steps.end(),
+      production_to_insert.steps.begin(),
+      production_to_insert.steps.end()
+    );
+    production.back().precedence = item.precedence();
+    production.back().associativity = item.associativity();
+    production.steps.insert(
+      production.steps.end(),
+      step + 1,
+      end
+    );
+
+    if (find(result.begin(), result.end(), production) == result.end()) {
+      result.push_back(move(production));
+    }
+  }
+  return result;
+}
+
 ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
-                                         const LexicalGrammar &lexical_grammar) {
+                                         const LexicalGrammar &lexical_grammar) : grammar{grammar} {
   vector<Symbol> symbols_to_process;
   set<Symbol::Index> processed_non_terminals;
 
@@ -145,24 +176,56 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
 
     for (auto &pair : cached_lookaheads_by_non_terminal) {
       for (const Production &production : grammar.variables[pair.first].productions) {
-        component_cache[i].push_back({
-          ParseItem(Symbol::non_terminal(pair.first), production, 0),
-          pair.second.first,
-          pair.second.second
-        });
+        Symbol lhs = Symbol::non_terminal(pair.first);
+        ParseItem item(lhs, production, 0);
+
+        if (grammar.variables_to_inline.count(item.next_symbol())) {
+          vector<Production> &inlined_productions = inlined_productions_by_original_production[item];
+          if (inlined_productions.empty()) {
+            inlined_productions = inline_production(item, grammar);
+          }
+
+          for (const Production &inlined_production : inlined_productions) {
+            ParseItemSetComponent component{
+              ParseItem(lhs, inlined_production, 0),
+              pair.second.first,
+              pair.second.second
+            };
+
+            if (find(component_cache[i].begin(), component_cache[i].end(), component) == component_cache[i].end()) {
+              component_cache[i].push_back(component);
+            }
+          }
+        } else if (!grammar.variables_to_inline.count(lhs)) {
+          ParseItemSetComponent component{
+            ParseItem(lhs, production, 0),
+            pair.second.first,
+            pair.second.second
+          };
+
+          if (find(component_cache[i].begin(), component_cache[i].end(), component) == component_cache[i].end()) {
+            component_cache[i].push_back(component);
+          }
+        }
       }
     }
   }
 }
 
 void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) {
-  for (const auto &pair : item_set->entries) {
-    const ParseItem &item = pair.first;
-    const LookaheadSet &lookaheads = pair.second;
-    if (item.lhs() != rules::START() && item.step_index == 0) continue;
+  for (auto iter = item_set->entries.begin(), end = item_set->entries.end(); iter != end;) {
+    const ParseItem &item = iter->first;
+    const LookaheadSet &lookaheads = iter->second;
+    if (item.lhs() != rules::START() && item.step_index == 0) {
+      ++iter;
+      continue;
+    }
 
     const Symbol &next_symbol = item.next_symbol();
-    if (!next_symbol.is_non_terminal() || next_symbol.is_built_in()) continue;
+    if (!next_symbol.is_non_terminal() || next_symbol.is_built_in()) {
+      ++iter;
+      continue;
+    }
 
     LookaheadSet next_lookaheads;
     size_t next_step = item.step_index + 1;
@@ -178,6 +241,24 @@ void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) {
       current_lookaheads.insert_all(component.lookaheads);
       if (component.propagates_lookaheads) current_lookaheads.insert_all(next_lookaheads);
     }
+
+    if (grammar.variables_to_inline.count(next_symbol)) {
+      vector<Production> &inlined_productions = inlined_productions_by_original_production[item];
+      if (inlined_productions.empty()) {
+        inlined_productions = inline_production(item, grammar);
+      }
+
+      for (const Production &inlined_production : inlined_productions) {
+        item_set->entries.insert({
+          ParseItem(item.lhs(), inlined_production, item.step_index),
+          lookaheads
+        });
+      }
+
+      iter = item_set->entries.erase(iter);
+    } else {
+      ++iter;
+    }
   }
 }
 
diff --git a/src/compiler/build_tables/parse_item_set_builder.h b/src/compiler/build_tables/parse_item_set_builder.h
index 5357a385..c180f38b 100644
--- a/src/compiler/build_tables/parse_item_set_builder.h
+++ b/src/compiler/build_tables/parse_item_set_builder.h
@@ -4,6 +4,7 @@
 #include "compiler/build_tables/parse_item.h"
 #include "compiler/rule.h"
 #include <map>
+#include <vector>
 
 namespace tree_sitter {
 
@@ -17,11 +18,19 @@ class ParseItemSetBuilder {
     ParseItem item;
     LookaheadSet lookaheads;
     bool propagates_lookaheads;
+
+    inline bool operator==(const ParseItemSetComponent &other) {
+      return item == other.item &&
+        lookaheads == other.lookaheads &&
+        propagates_lookaheads == other.propagates_lookaheads;
+    }
   };
 
+  const SyntaxGrammar &grammar;
   std::map<rules::Symbol, LookaheadSet> first_sets;
   std::map<rules::Symbol, LookaheadSet> last_sets;
   std::map<rules::Symbol::Index, std::vector<ParseItemSetComponent>> component_cache;
+  std::map<ParseItem, std::vector<Production>> inlined_productions_by_original_production;
 
  public:
   ParseItemSetBuilder(const SyntaxGrammar &, const LexicalGrammar &);
diff --git a/src/compiler/grammar.h b/src/compiler/grammar.h
index 6d16524b..54fe69e9 100644
--- a/src/compiler/grammar.h
+++ b/src/compiler/grammar.h
@@ -31,6 +31,7 @@ struct InputGrammar {
   std::vector<rules::Rule> extra_tokens;
   std::vector<std::unordered_set<rules::NamedSymbol>> expected_conflicts;
   std::vector<Variable> external_tokens;
+  std::unordered_set<rules::NamedSymbol> variables_to_inline;
 };
 
 }  // namespace tree_sitter
diff --git a/src/compiler/parse_grammar.cc b/src/compiler/parse_grammar.cc
index 7589904c..43ceed51 100644
--- a/src/compiler/parse_grammar.cc
+++ b/src/compiler/parse_grammar.cc
@@ -205,7 +205,7 @@ ParseGrammarResult parse_grammar(const string &input) {
   string error_message;
   string name;
   InputGrammar grammar;
-  json_value name_json, rules_json, extras_json, conflicts_json, external_tokens_json;
+  json_value name_json, rules_json, extras_json, conflicts_json, external_tokens_json, inline_rules_json;
 
   json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 };
   char parse_error[json_error_max];
@@ -299,6 +299,24 @@ ParseGrammarResult parse_grammar(const string &input) {
     }
   }
 
+  inline_rules_json = grammar_json->operator[]("inline");
+  if (inline_rules_json.type != json_none) {
+    if (inline_rules_json.type != json_array) {
+      error_message = "Inline rules must be an array";
+      goto error;
+    }
+
+    for (size_t i = 0, length = inline_rules_json.u.array.length; i < length; i++) {
+      json_value *inline_rule_json = inline_rules_json.u.array.values[i];
+      if (inline_rule_json->type != json_string) {
+        error_message = "Inline rules must be an array of rule names";
+        goto error;
+      }
+
+      grammar.variables_to_inline.insert(rules::NamedSymbol{string(inline_rule_json->u.string.ptr)});
+    }
+  }
+
   external_tokens_json = grammar_json->operator[]("externals");
   if (external_tokens_json.type != json_none) {
     if (external_tokens_json.type != json_array) {
diff --git a/src/compiler/parse_grammar.h b/src/compiler/parse_grammar.h
index 04e7672b..c24cd9ca 100644
--- a/src/compiler/parse_grammar.h
+++ b/src/compiler/parse_grammar.h
@@ -2,6 +2,7 @@
 #define COMPILER_GRAMMAR_JSON_H_
 
 #include <string>
+#include <unordered_set>
 #include "tree_sitter/compiler.h"
 #include "compiler/grammar.h"
 
diff --git a/src/compiler/prepare_grammar/expand_repeats.cc b/src/compiler/prepare_grammar/expand_repeats.cc
index 39b2075d..c4a25634 100644
--- a/src/compiler/prepare_grammar/expand_repeats.cc
+++ b/src/compiler/prepare_grammar/expand_repeats.cc
@@ -94,6 +94,7 @@ InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) {
   result.extra_tokens = grammar.extra_tokens;
   result.expected_conflicts = grammar.expected_conflicts;
   result.external_tokens = grammar.external_tokens;
+  result.variables_to_inline = grammar.variables_to_inline;
 
   ExpandRepeats expander(result.variables.size());
   for (auto &variable : result.variables) {
diff --git a/src/compiler/prepare_grammar/extract_tokens.cc b/src/compiler/prepare_grammar/extract_tokens.cc
index 6893cde4..73d3d866 100644
--- a/src/compiler/prepare_grammar/extract_tokens.cc
+++ b/src/compiler/prepare_grammar/extract_tokens.cc
@@ -235,6 +235,10 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
     syntax_grammar.expected_conflicts.insert(new_conflict_set);
   }
 
+  for (const Symbol &symbol : grammar.variables_to_inline) {
+    syntax_grammar.variables_to_inline.insert(symbol_replacer.replace_symbol(symbol));
+  }
+
   // The grammar's extra tokens can be either token rules or symbols
   // pointing to token rules. If they are symbols, then they'll be handled by
   // the parser; add them to the syntax grammar's extra tokens. If they
diff --git a/src/compiler/prepare_grammar/flatten_grammar.cc b/src/compiler/prepare_grammar/flatten_grammar.cc
index 86c76a86..e41e8398 100644
--- a/src/compiler/prepare_grammar/flatten_grammar.cc
+++ b/src/compiler/prepare_grammar/flatten_grammar.cc
@@ -111,6 +111,7 @@ SyntaxVariable flatten_rule(const Variable &variable) {
 pair<SyntaxGrammar, CompileError> flatten_grammar(const InitialSyntaxGrammar &grammar) {
   SyntaxGrammar result;
   result.external_tokens = grammar.external_tokens;
+  result.variables_to_inline = grammar.variables_to_inline;
 
   for (const auto &expected_conflict : grammar.expected_conflicts) {
     result.expected_conflicts.insert({
diff --git a/src/compiler/prepare_grammar/initial_syntax_grammar.h b/src/compiler/prepare_grammar/initial_syntax_grammar.h
index 55eb2b7e..881c6396 100644
--- a/src/compiler/prepare_grammar/initial_syntax_grammar.h
+++ b/src/compiler/prepare_grammar/initial_syntax_grammar.h
@@ -16,6 +16,7 @@ struct InitialSyntaxGrammar {
   std::set<rules::Symbol> extra_tokens;
   std::set<std::set<rules::Symbol>> expected_conflicts;
   std::vector<ExternalToken> external_tokens;
+  std::set<rules::Symbol> variables_to_inline;
 };
 
 }  // namespace prepare_grammar
diff --git a/src/compiler/prepare_grammar/intern_symbols.cc b/src/compiler/prepare_grammar/intern_symbols.cc
index deaeb122..7bb2a80b 100644
--- a/src/compiler/prepare_grammar/intern_symbols.cc
+++ b/src/compiler/prepare_grammar/intern_symbols.cc
@@ -142,6 +142,13 @@ pair<InternedGrammar, CompileError> intern_symbols(const InputGrammar &grammar)
     result.expected_conflicts.insert(entry);
   }
 
+  for (auto &named_symbol : grammar.variables_to_inline) {
+    auto symbol = interner.intern_symbol(named_symbol);
+    if (symbol != rules::NONE()) {
+      result.variables_to_inline.insert(symbol);
+    }
+  }
+
   return {result, CompileError::none()};
 }
 
diff --git a/src/compiler/prepare_grammar/interned_grammar.h b/src/compiler/prepare_grammar/interned_grammar.h
index 99987f42..c96dfa66 100644
--- a/src/compiler/prepare_grammar/interned_grammar.h
+++ b/src/compiler/prepare_grammar/interned_grammar.h
@@ -15,6 +15,7 @@ struct InternedGrammar {
   std::vector<rules::Rule> extra_tokens;
   std::set<std::set<rules::Symbol>> expected_conflicts;
   std::vector<Variable> external_tokens;
+  std::set<rules::Symbol> variables_to_inline;
 };
 
 }  // namespace prepare_grammar
diff --git a/src/compiler/syntax_grammar.h b/src/compiler/syntax_grammar.h
index 55e55568..3c3d3b66 100644
--- a/src/compiler/syntax_grammar.h
+++ b/src/compiler/syntax_grammar.h
@@ -74,6 +74,7 @@ struct SyntaxGrammar {
   std::set<rules::Symbol> extra_tokens;
   std::set<ConflictSet> expected_conflicts;
   std::vector<ExternalToken> external_tokens;
+  std::set<rules::Symbol> variables_to_inline;
 };
 
 }  // namespace tree_sitter
diff --git a/test/fixtures/test_grammars/inline_rules/corpus.txt b/test/fixtures/test_grammars/inline_rules/corpus.txt
new file mode 100644
index 00000000..af5e496e
--- /dev/null
+++ b/test/fixtures/test_grammars/inline_rules/corpus.txt
@@ -0,0 +1,11 @@
+==================================
+Expressions
+==================================
+
+1 + 2 * 3;
+
+---
+
+(statement (sum
+  (number)
+  (product (number) (number))))
diff --git a/test/fixtures/test_grammars/inline_rules/grammar.json b/test/fixtures/test_grammars/inline_rules/grammar.json
new file mode 100644
index 00000000..4438004f
--- /dev/null
+++ b/test/fixtures/test_grammars/inline_rules/grammar.json
@@ -0,0 +1,68 @@
+{
+  "name": "inline_rules",
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "inline": [
+    "expression"
+  ],
+
+  "rules": {
+    "statement": {
+      "type": "SEQ",
+      "members": [
+        {"type": "SYMBOL", "name": "expression"},
+        {"type": "STRING", "value": ";"}
+      ]
+    },
+
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "sum"},
+        {"type": "SYMBOL", "name": "product"},
+        {"type": "SYMBOL", "name": "number"},
+        {"type": "SYMBOL", "name": "parenthesized_expression"}
+      ]
+    },
+
+    "parenthesized_expression": {
+      "type": "SEQ",
+      "members": [
+        {"type": "STRING", "value": "("},
+        {"type": "SYMBOL", "name": "expression"},
+        {"type": "STRING", "value": ")"}
+      ]
+    },
+
+    "sum": {
+      "type": "PREC_LEFT",
+      "value": 0,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "+"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    "product": {
+      "type": "PREC_LEFT",
+      "value": 2,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "*"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    "number": {"type": "PATTERN", "value": "\\d+"}
+  }
+}

From 5c8f7c035e7cfa8b65f119114a3a0f181ecf418a Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 12 Jul 2017 09:42:56 -0700
Subject: [PATCH 2/4] Add stream operator for ParseItemSet

---
 test/helpers/stream_methods.cc | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/test/helpers/stream_methods.cc b/test/helpers/stream_methods.cc
index f88ccaee..c8c4eb30 100644
--- a/test/helpers/stream_methods.cc
+++ b/test/helpers/stream_methods.cc
@@ -180,6 +180,35 @@ ostream &operator<<(ostream &stream, const LexItemSet &item_set) {
   return stream << item_set.entries;
 }
 
+ostream &operator<<(ostream &stream, const ParseItem &item) {
+  stream << "(ParseItem " << item.lhs() << " ->";
+  for (size_t i = 0; i < item.production->size(); i++) {
+    if (i == item.step_index) {
+      stream << " •";
+    }
+    stream << " " << item.production->at(i).symbol << " " << item.production->at(i).precedence <<
+      " " << (int)item.production->at(i).associativity;
+  }
+
+  if (item.step_index == item.production->size()) {
+    stream << " • ";
+  }
+
+  return stream << ")";
+}
+
+ostream &operator<<(ostream &stream, const ParseItemSet &item_set) {
+  return stream << item_set.entries;
+}
+
+ostream &operator<<(ostream &stream, const LookaheadSet &lookaheads) {
+  if (lookaheads.entries.get()) {
+    return stream << *lookaheads.entries;
+  } else {
+    return stream << "()";
+  }
+}
+
 ostream &operator<<(ostream &stream, const LexItemSet::Transition &transition) {
   return stream << "(Transition " << transition.destination << " prec:" << transition.precedence << ")";
 }

From e4f57d6fee2ce6c7cc68a1502d1e181ee8c0179f Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 12 Jul 2017 10:12:42 -0700
Subject: [PATCH 3/4] Test more cases in fixture grammar with inline rules

---
 .../test_grammars/inline_rules/corpus.txt         | 15 ++++++++++++---
 .../test_grammars/inline_rules/grammar.json       |  7 +++++++
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/test/fixtures/test_grammars/inline_rules/corpus.txt b/test/fixtures/test_grammars/inline_rules/corpus.txt
index af5e496e..df37566f 100644
--- a/test/fixtures/test_grammars/inline_rules/corpus.txt
+++ b/test/fixtures/test_grammars/inline_rules/corpus.txt
@@ -3,9 +3,18 @@ Expressions
 ==================================
 
 1 + 2 * 3;
+4 * 5 + 6;
+7 * (8 + 9);
 
 ---
 
-(statement (sum
-  (number)
-  (product (number) (number))))
+(program
+  (statement (sum
+    (number)
+    (product (number) (number))))
+  (statement (sum
+    (product (number) (number))
+    (number)))
+  (statement (product
+    (number)
+    (parenthesized_expression (sum (number) (number))))))
diff --git a/test/fixtures/test_grammars/inline_rules/grammar.json b/test/fixtures/test_grammars/inline_rules/grammar.json
index 4438004f..7825314b 100644
--- a/test/fixtures/test_grammars/inline_rules/grammar.json
+++ b/test/fixtures/test_grammars/inline_rules/grammar.json
@@ -10,6 +10,13 @@
   ],
 
   "rules": {
+    "program": {
+      "type": "REPEAT1",
+      "content": {
+        "type": "SYMBOL",
+        "name": "statement"
+      }
+    },
     "statement": {
       "type": "SEQ",
       "members": [

From a3006bc2b58c361295c9786186a38cc782c5b080 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Wed, 12 Jul 2017 16:02:01 -0700
Subject: [PATCH 4/4] Represent LookaheadSet using vectors of bool

---
 .../build_tables/build_parse_table.cc         | 12 +--
 src/compiler/build_tables/lookahead_set.cc    | 98 +++++++++++++++----
 src/compiler/build_tables/lookahead_set.h     | 37 ++++++-
 src/compiler/build_tables/parse_item.cc       |  7 +-
 test/helpers/stream_methods.cc                | 10 +-
 5 files changed, 128 insertions(+), 36 deletions(-)

diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc
index 872177bf..f61e1271 100644
--- a/src/compiler/build_tables/build_parse_table.cc
+++ b/src/compiler/build_tables/build_parse_table.cc
@@ -196,7 +196,7 @@ class ParseTableBuilder {
           ParseAction::Reduce(item.lhs(), item.step_index, *item.production);
 
         int precedence = item.precedence();
-        for (Symbol lookahead : *lookahead_symbols.entries) {
+        lookahead_symbols.for_each([&](Symbol lookahead) {
           ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead];
 
           // Only add the highest-precedence Reduce actions to the parse table.
@@ -223,7 +223,7 @@ class ParseTableBuilder {
               }
             }
           }
-        }
+        });
 
       // If the item is unfinished, create a new item by advancing one symbol.
       // Add that new item to a successor item set.
@@ -694,15 +694,15 @@ class ParseTableBuilder {
       const LookaheadSet &right_tokens = item_set_builder.get_first_set(symbol);
 
       if (!left_tokens.empty() && !right_tokens.empty()) {
-        for (const Symbol &left_symbol : *left_tokens.entries) {
+        left_tokens.for_each([&](Symbol left_symbol) {
           if (left_symbol.is_terminal() && !left_symbol.is_built_in()) {
-            for (const Symbol &right_symbol : *right_tokens.entries) {
+            right_tokens.for_each([&](Symbol right_symbol) {
               if (right_symbol.is_terminal() && !right_symbol.is_built_in()) {
                 following_terminals_by_terminal_index[left_symbol.index].insert(right_symbol.index);
               }
-            }
+            });
           }
-        }
+        });
       }
     }
 
diff --git a/src/compiler/build_tables/lookahead_set.cc b/src/compiler/build_tables/lookahead_set.cc
index b9604c24..443ba0cd 100644
--- a/src/compiler/build_tables/lookahead_set.cc
+++ b/src/compiler/build_tables/lookahead_set.cc
@@ -6,41 +6,103 @@
 namespace tree_sitter {
 namespace build_tables {
 
-using std::set;
-using std::make_shared;
+using std::vector;
 using rules::Symbol;
 
-LookaheadSet::LookaheadSet() : entries(nullptr) {}
+LookaheadSet::LookaheadSet() {}
 
-LookaheadSet::LookaheadSet(const set<Symbol> &symbols)
-    : entries(make_shared<set<Symbol>>(symbols)) {}
+LookaheadSet::LookaheadSet(const vector<Symbol> &symbols) {
+  for (auto symbol : symbols) insert(symbol);
+}
 
 bool LookaheadSet::empty() const {
-  return !entries.get() || entries->empty();
+  return terminal_bits.empty() && external_bits.empty() && !eof;
 }
 
 bool LookaheadSet::operator==(const LookaheadSet &other) const {
-  return *entries == *other.entries;
+  return
+    eof == other.eof &&
+    external_bits == other.external_bits &&
+    terminal_bits == other.terminal_bits;
 }
 
 bool LookaheadSet::contains(const Symbol &symbol) const {
-  return entries->find(symbol) != entries->end();
+  if (symbol == rules::END_OF_INPUT()) return eof;
+  auto &bits = symbol.is_external() ? external_bits : terminal_bits;
+  return bits.size() > symbol.index && bits[symbol.index];
+}
+
+size_t LookaheadSet::size() const {
+  size_t result = 0;
+  for (bool bit : external_bits) if (bit) result++;
+  for (bool bit : terminal_bits) if (bit) result++;
+  if (eof) result++;
+  return result;
 }
 
 bool LookaheadSet::insert_all(const LookaheadSet &other) {
-  if (!other.entries.get())
-    return false;
-  if (!entries.get())
-    entries = make_shared<set<Symbol>>();
-  size_t previous_size = entries->size();
-  entries->insert(other.entries->begin(), other.entries->end());
-  return entries->size() > previous_size;
+  bool result = false;
+
+  if (other.eof) {
+    if (!eof) {
+      eof = true;
+      result = true;
+    }
+  }
+
+  if (other.external_bits.size() > external_bits.size()) {
+    external_bits.resize(other.external_bits.size());
+  }
+
+  auto iter = external_bits.begin();
+  auto other_iter = other.external_bits.begin();
+  auto other_end = other.external_bits.end();
+  while (other_iter != other_end) {
+    if (*other_iter && !*iter) {
+      result = true;
+      *iter = true;
+    }
+    ++iter;
+    ++other_iter;
+  }
+
+  if (other.terminal_bits.size() > terminal_bits.size()) {
+    terminal_bits.resize(other.terminal_bits.size());
+  }
+
+  iter = terminal_bits.begin();
+  other_iter = other.terminal_bits.begin();
+  other_end = other.terminal_bits.end();
+  while (other_iter != other_end) {
+    if (*other_iter && !*iter) {
+      result = true;
+      *iter = true;
+    }
+    ++iter;
+    ++other_iter;
+  }
+
+  return result;
 }
 
 bool LookaheadSet::insert(const Symbol &symbol) {
-  if (!entries.get())
-    entries = make_shared<set<Symbol>>();
-  return entries->insert(symbol).second;
+  if (symbol == rules::END_OF_INPUT()) {
+    if (!eof) {
+      eof = true;
+      return true;
+    }
+    return false;
+  }
+
+  auto &bits = symbol.is_external() ? external_bits : terminal_bits;
+  if (bits.size() <= symbol.index) {
+    bits.resize(symbol.index + 1);
+  }
+  if (!bits[symbol.index]) {
+    bits[symbol.index] = true;
+    return true;
+  }
+  return false;
 }
 
 }  // namespace build_tables
diff --git a/src/compiler/build_tables/lookahead_set.h b/src/compiler/build_tables/lookahead_set.h
index 74cd63e2..d0aa9ee7 100644
--- a/src/compiler/build_tables/lookahead_set.h
+++ b/src/compiler/build_tables/lookahead_set.h
@@ -1,25 +1,54 @@
 #ifndef COMPILER_BUILD_TABLES_LOOKAHEAD_SET_H_
 #define COMPILER_BUILD_TABLES_LOOKAHEAD_SET_H_
 
-#include <set>
-#include <memory>
+#include <vector>
 #include "compiler/rule.h"
 
 namespace tree_sitter {
 namespace build_tables {
 
 class LookaheadSet {
+  std::vector<bool> terminal_bits;
+  std::vector<bool> external_bits;
+  bool eof = false;
+
  public:
   LookaheadSet();
-  explicit LookaheadSet(const std::set<rules::Symbol> &);
+  explicit LookaheadSet(const std::vector<rules::Symbol> &);
 
   bool empty() const;
+  size_t size() const;
   bool operator==(const LookaheadSet &) const;
   bool contains(const rules::Symbol &) const;
   bool insert_all(const LookaheadSet &);
   bool insert(const rules::Symbol &);
 
-  std::shared_ptr<std::set<rules::Symbol>> entries;
+  template <typename Callback>
+  void for_each(const Callback &callback) const {
+    for (auto begin = external_bits.begin(),
+         end = external_bits.end(),
+         iter = begin;
+         iter != end;
+         ++iter) {
+      if (*iter) {
+        callback(rules::Symbol::external(iter - begin));
+      }
+    }
+
+    if (eof) {
+      callback(rules::END_OF_INPUT());
+    }
+
+    for (auto begin = terminal_bits.begin(),
+         end = terminal_bits.end(),
+         iter = begin;
+         iter != end;
+         ++iter) {
+      if (*iter) {
+        callback(rules::Symbol::terminal(iter - begin));
+      }
+    }
+  }
 };
 
 }  // namespace build_tables
diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc
index 2d4257b6..55db646c 100644
--- a/src/compiler/build_tables/parse_item.cc
+++ b/src/compiler/build_tables/parse_item.cc
@@ -178,9 +178,10 @@ size_t hash<ParseItemSet>::operator()(const ParseItemSet &item_set) const {
     const auto &lookahead_set = pair.second;
 
     hash_combine(&result, item);
-    hash_combine(&result, lookahead_set.entries->size());
-    for (auto index : *pair.second.entries)
-      hash_combine(&result, index);
+    hash_combine(&result, lookahead_set.size());
+    lookahead_set.for_each([&result](Symbol symbol) {
+      hash_combine(&result, symbol);
+    });
   }
   return result;
 }
diff --git a/test/helpers/stream_methods.cc b/test/helpers/stream_methods.cc
index c8c4eb30..9b13303c 100644
--- a/test/helpers/stream_methods.cc
+++ b/test/helpers/stream_methods.cc
@@ -202,11 +202,11 @@ ostream &operator<<(ostream &stream, const ParseItemSet &item_set) {
 }
 
 ostream &operator<<(ostream &stream, const LookaheadSet &lookaheads) {
-  if (lookaheads.entries.get()) {
-    return stream << *lookaheads.entries;
-  } else {
-    return stream << "()";
-  }
+  stream << "(LookaheadSet";
+  lookaheads.for_each([&stream](Symbol symbol) {
+    stream << " " << symbol;
+  });
+  return stream << ")";
 }
 
 ostream &operator<<(ostream &stream, const LexItemSet::Transition &transition) {