Store productions' end rule ids in the vector

2015-01-27 19:56:49 -08:00 · 2015-01-27 19:56:49 -08:00 · 8ac4b9fc17
commit 8ac4b9fc17
parent 1ba8701ada
12 changed files with 74 additions and 81 deletions
--- a/src/compiler/build_tables/build_parse_table.cc
+++ b/src/compiler/build_tables/build_parse_table.cc
@ -176,11 +176,12 @@ class ParseTableBuilder {
  }

  bool item_is_done(const ParseItem &item) {
-    return item.consumed_symbol_count == grammar.productions(item.lhs)[item.production_index].size();
+    return item.consumed_symbol_count ==
+      grammar.productions(item.lhs)[item.production_index].symbol_count();
  }

  int item_precedence(const ParseItem &item) {
-    return grammar.productions(item.lhs)[item.production_index].precedence_at(item.consumed_symbol_count - 1);
+    return grammar.productions(item.lhs)[item.production_index][item.consumed_symbol_count - 1].precedence;
  }

  void record_conflict(const Symbol &sym, const ParseAction &left,
--- a/src/compiler/build_tables/item_set_closure.cc
+++ b/src/compiler/build_tables/item_set_closure.cc
@ -35,18 +35,18 @@ void item_set_closure(ParseItemSet *item_set, const SyntaxGrammar &grammar) {
      continue;

    const Production &item_production = grammar.productions(item.lhs)[item.production_index];
-    if (item_production.size() <= item.consumed_symbol_count)
+    if (item.consumed_symbol_count >= item_production.symbol_count())
      continue;

-    Symbol symbol = item_production.symbol_at(item.consumed_symbol_count);
+    Symbol symbol = item_production[item.consumed_symbol_count].symbol;
    if (symbol.is_token() || symbol.is_built_in())
      continue;

    set<Symbol> next_lookahead_symbols;
-    if (item.consumed_symbol_count + 1 >= item_production.size()) {
+    if (item.consumed_symbol_count + 1 >= item_production.symbol_count()) {
      next_lookahead_symbols = lookahead_symbols;
    } else {
-      vector<Symbol> symbols_to_process({ item_production.symbol_at(item.consumed_symbol_count + 1) });
+      vector<Symbol> symbols_to_process({ item_production[item.consumed_symbol_count + 1].symbol });

      while (!symbols_to_process.empty()) {
        Symbol following_symbol = symbols_to_process.back();
@ -55,14 +55,14 @@ void item_set_closure(ParseItemSet *item_set, const SyntaxGrammar &grammar) {
          continue;

        for (const auto &production : grammar.productions(following_symbol))
-          symbols_to_process.push_back(production.symbol_at(0));
+          symbols_to_process.push_back(production[0].symbol);
      }
    }

    size_t i = 0;
    for (const Production &production : grammar.productions(symbol)) {
      items_to_process.push_back({
-        ParseItem(symbol, i, production.rule_id_at(0), 0),
+        ParseItem(symbol, i, production[0].rule_id, 0),
        next_lookahead_symbols
      });
      i++;
--- a/src/compiler/build_tables/item_set_transitions.cc
+++ b/src/compiler/build_tables/item_set_transitions.cc
@ -23,11 +23,11 @@ map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set,
    const ParseItem &item = pair.first;
    const set<Symbol> &lookahead_symbols = pair.second;
    const Production &production = grammar.productions(item.lhs)[item.production_index];
-    if (production.size() <= item.consumed_symbol_count)
+    if (item.consumed_symbol_count >= production.symbol_count())
      continue;

-    const Symbol &symbol = production.symbol_at(item.consumed_symbol_count);
-    int rule_id = production.rule_id_at(item.consumed_symbol_count + 1);
+    const Symbol &symbol = production[item.consumed_symbol_count].symbol;
+    int rule_id = production[item.consumed_symbol_count + 1].rule_id;
    ParseItem new_item(item.lhs, item.production_index, rule_id, item.consumed_symbol_count + 1);

    result[symbol][new_item].insert(lookahead_symbols.begin(), lookahead_symbols.end());
--- a/src/compiler/prepare_grammar/flatten_grammar.cc
+++ b/src/compiler/prepare_grammar/flatten_grammar.cc
@ -5,6 +5,7 @@
 #include "compiler/rules/seq.h"
 #include "compiler/rules/symbol.h"
 #include "compiler/rules/metadata.h"
+#include "compiler/rules/built_in_symbols.h"
 #include <string>
 #include <algorithm>

@ -63,16 +64,16 @@ class FlattenRule : public rules::RuleFn<void> {
 Production flatten_rule(const rule_ptr &rule) {
  FlattenRule flattener;
  flattener.apply(rule);
-  return Production(flattener.entries, 0);
+  int end_precedence = flattener.entries.back().precedence;
+  flattener.entries.push_back({ rules::NONE(), end_precedence, 0 });
+  return Production(flattener.entries);
 }

 struct ProductionSlice {
  vector<ProductionEntry>::const_iterator start;
  vector<ProductionEntry>::const_iterator end;
-  int end_precedence;

  bool operator==(const ProductionSlice &other) const {
-    if (end_precedence != other.end_precedence) return false;
    if (end - start != other.end - other.start) return false;
    for (auto iter1 = start, iter2 = other.start; iter1 != end; ++iter1, ++iter2)
      if (!(iter1->symbol == iter2->symbol) || iter1->precedence != iter2->precedence)
@ -82,11 +83,10 @@ struct ProductionSlice {
 };

 void assign_rule_ids(Production *production, vector<ProductionSlice> *unique_slices) {
-  auto &entries = production->entries;
-  auto end = entries.end();
+  auto end = production->entries.end();

-  for (auto iter = entries.begin(); iter != end; ++iter) {
-    ProductionSlice slice{iter, end, 0};
+  for (auto iter = production->entries.begin(); iter != end; ++iter) {
+    ProductionSlice slice{iter, end};
    auto existing_id = find(unique_slices->cbegin(), unique_slices->cend(), slice);
    if (existing_id == unique_slices->end()) {
      unique_slices->push_back(slice);
@ -95,15 +95,6 @@ void assign_rule_ids(Production *production, vector<ProductionSlice> *unique_sli
      iter->rule_id = existing_id - unique_slices->cbegin();
    }
  }
-
-  ProductionSlice slice{end, end, production->precedence_at(production->size() - 1)};
-  auto existing_id = find(unique_slices->cbegin(), unique_slices->cend(), slice);
-  if (existing_id == unique_slices->end()) {
-    unique_slices->push_back(slice);
-    production->end_rule_id = unique_slices->size() - 1;
-  } else {
-    production->end_rule_id = existing_id - unique_slices->cbegin();
-  }
 }

 SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {
@ -126,7 +117,7 @@ SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {
  if (rules.empty()) {
    rules.push_back({
      "START",
-      { Production({ {rules::Symbol(0, rules::SymbolOptionToken), 0, 0} }, 0) }
+      { Production({ {rules::Symbol(0, rules::SymbolOptionToken), 0, 0} }) }
    });
  }

--- a/src/compiler/rules/built_in_symbols.cc
+++ b/src/compiler/rules/built_in_symbols.cc
@ -7,6 +7,7 @@ Symbol END_OF_INPUT() { return Symbol(-1, SymbolOptionToken); }
 Symbol ERROR() { return Symbol(-2, SymbolOptionToken); }
 Symbol START() { return Symbol(-3); }
 Symbol DOCUMENT() { return Symbol(-4); }
+Symbol NONE() { return Symbol(-5); }

 }  // namespace rules
 }  // namespace tree_sitter
--- a/src/compiler/rules/built_in_symbols.h
+++ b/src/compiler/rules/built_in_symbols.h
@ -10,6 +10,7 @@ Symbol ERROR();
 Symbol START();
 Symbol END_OF_INPUT();
 Symbol DOCUMENT();
+Symbol NONE();

 }  // namespace rules
 }  // namespace tree_sitter
--- a/src/compiler/syntax_grammar.cc
+++ b/src/compiler/syntax_grammar.cc
@ -14,7 +14,7 @@ using std::vector;
 using std::set;

 static const vector<Production> START_PRODUCTIONS({
-  Production({ {rules::Symbol(0), 0, -1} }, 2)
+  Production({ {rules::Symbol(0), 0, -1}, { rules::NONE(), 0, -2} })
 });

 static const vector<Production> NO_PRODUCTIONS({});
@ -24,29 +24,14 @@ bool ProductionEntry::operator==(const ProductionEntry &other) const {
    rule_id == other.rule_id;
 }

-Production::Production(const vector<ProductionEntry> &entries, int last_rule_id) :
-  entries(entries), end_rule_id(last_rule_id) {}
+Production::Production(const vector<ProductionEntry> &entries) : entries(entries) {}

-int Production::precedence_at(size_t index) const {
-  if (index >= size())
-    return 0;
-  else
-    return entries[index].precedence;
+size_t Production::symbol_count() const {
+  return entries.size() - 1;
 }

-int Production::rule_id_at(size_t index) const {
-  if (index >= size())
-    return end_rule_id;
-  else
-    return entries[index].rule_id;
-}
-
-const rules::Symbol &Production::symbol_at(size_t index) const {
-  return entries[index].symbol;
-}
-
-size_t Production::size() const {
-  return entries.size();
+const ProductionEntry &Production::operator[](int i) const {
+  return entries[i];
 }

 SyntaxGrammar::SyntaxGrammar() {}
@ -87,8 +72,7 @@ std::ostream &operator<<(std::ostream &stream, const Production &production) {
    stream << entry;
    started = true;
  }
-  return stream << string(") end_rule_id: ") <<
-    to_string(production.end_rule_id) << string(")");
+  return stream << string(")");
 }

 }  // namespace tree_sitter
--- a/src/compiler/syntax_grammar.h
+++ b/src/compiler/syntax_grammar.h
@ -18,15 +18,12 @@ struct ProductionEntry {
  bool operator==(const ProductionEntry &) const;
 };

-class Production {
-public:
+struct Production {
+  Production();
+  Production(const std::vector<ProductionEntry> &);
+  size_t symbol_count() const;
+  const ProductionEntry &operator[](int) const;
  std::vector<ProductionEntry> entries;
-  int end_rule_id;
-  Production(const std::vector<ProductionEntry> &, int);
-  size_t size() const;
-  const rules::Symbol &symbol_at(size_t) const;
-  int precedence_at(size_t) const;
-  int rule_id_at(size_t) const;
 };

 std::ostream &operator<<(std::ostream &, const ProductionEntry &);