From 26a25278cd02641520a9da2e671fdb1464566f25 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 11 Jul 2017 17:17:09 -0700 Subject: [PATCH] When comparing parse items, ignore consumed part of their productions This speeds up parser generation by increasing the likelihood that we'll recognize parse item sets as equivalent in advance, rather than having to merge their states after the fact. --- src/compiler/build_tables/parse_item.cc | 61 +++++++++++++++---- src/compiler/syntax_grammar.h | 12 ++++ .../parse_item_set_builder_test.cc | 9 +-- 3 files changed, 66 insertions(+), 16 deletions(-) diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc index 99acb3fc..0ef56e79 100644 --- a/src/compiler/build_tables/parse_item.cc +++ b/src/compiler/build_tables/parse_item.cc @@ -24,20 +24,43 @@ ParseItem::ParseItem(const Symbol &lhs, const Production &production, step_index(step_index) {} bool ParseItem::operator==(const ParseItem &other) const { - return ((variable_index == other.variable_index) && - (step_index == other.step_index) && (production == other.production)); + if (step_index != other.step_index) return false; + if (variable_index != other.variable_index) return false; + if (production->size() != other.production->size()) return false; + if (is_done()) { + if (!production->empty()) { + if (production->back().precedence != other.production->back().precedence) return false; + if (production->back().associativity != other.production->back().associativity) return false; + } + } else { + for (size_t i = step_index, n = production->size(); i < n; i++) { + if (production->at(i) != other.production->at(i)) return false; + } + } + return true; } bool ParseItem::operator<(const ParseItem &other) const { - if (step_index < other.step_index) - return true; - if (step_index > other.step_index) - return false; - if (variable_index < other.variable_index) - return true; - if (variable_index > other.variable_index) - return false; - return production < other.production; + if (step_index < other.step_index) return true; + if (other.step_index < step_index) return false; + if (variable_index < other.variable_index) return true; + if (other.variable_index < variable_index) return false; + if (production->size() < other.production->size()) return true; + if (other.production->size() < production->size()) return false; + if (is_done()) { + if (!production->empty()) { + if (production->back().precedence < other.production->back().precedence) return true; + if (other.production->back().precedence < production->back().precedence) return false; + if (production->back().associativity < other.production->back().associativity) return true; + if (other.production->back().associativity < production->back().associativity) return false; + } + } else { + for (size_t i = step_index, n = production->size(); i < n; i++) { + if (production->at(i) < other.production->at(i)) return true; + if (other.production->at(i) < production->at(i)) return false; + } + } + return false; } Symbol ParseItem::lhs() const { @@ -128,7 +151,21 @@ struct hash { size_t result = 0; hash_combine(&result, item.variable_index); hash_combine(&result, item.step_index); - hash_combine(&result, item.production); + hash_combine(&result, item.production->dynamic_precedence); + hash_combine(&result, item.production->size()); + if (item.is_done()) { + if (!item.production->empty()) { + hash_combine(&result, item.production->back().precedence); + hash_combine(&result, item.production->back().associativity); + } + } else { + for (size_t i = 0, n = item.production->size(); i < n; i++) { + auto &step = item.production->at(i); + hash_combine(&result, step.symbol); + hash_combine(&result, step.precedence); + hash_combine(&result, step.associativity); + } + } return result; } }; diff --git a/src/compiler/syntax_grammar.h b/src/compiler/syntax_grammar.h index 4eeba90c..55e55568 100644 --- a/src/compiler/syntax_grammar.h +++ b/src/compiler/syntax_grammar.h @@ -16,6 +16,18 @@ struct ProductionStep { associativity == other.associativity; } + inline bool operator!=(const ProductionStep &other) const { + return !operator==(other); + } + + inline bool operator<(const ProductionStep &other) const { + if (symbol < other.symbol) return true; + if (other.symbol < symbol) return false; + if (precedence < other.precedence) return true; + if (other.precedence < precedence) return false; + return associativity < other.associativity; + } + rules::Symbol symbol; int precedence; rules::Associativity associativity; diff --git a/test/compiler/build_tables/parse_item_set_builder_test.cc b/test/compiler/build_tables/parse_item_set_builder_test.cc index ab1efed2..2884c523 100644 --- a/test/compiler/build_tables/parse_item_set_builder_test.cc +++ b/test/compiler/build_tables/parse_item_set_builder_test.cc @@ -1,4 +1,5 @@ #include "test_helper.h" +#include "helpers/stream_methods.h" #include "compiler/syntax_grammar.h" #include "compiler/lexical_grammar.h" #include "compiler/build_tables/parse_item_set_builder.h" @@ -53,7 +54,7 @@ describe("ParseItemSetBuilder", []() { ParseItemSet item_set({ { - ParseItem(Symbol::non_terminal(0), production(0, 0), 0), + ParseItem(rules::START(), production(0, 0), 0), LookaheadSet({ Symbol::terminal(10) }), } }); @@ -63,7 +64,7 @@ describe("ParseItemSetBuilder", []() { AssertThat(item_set, Equals(ParseItemSet({ { - ParseItem(Symbol::non_terminal(0), production(0, 0), 0), + ParseItem(rules::START(), production(0, 0), 0), LookaheadSet({ Symbol::terminal(10) }) }, { @@ -104,7 +105,7 @@ describe("ParseItemSetBuilder", []() { ParseItemSet item_set({ { - ParseItem(Symbol::non_terminal(0), production(0, 0), 0), + ParseItem(rules::START(), production(0, 0), 0), LookaheadSet({ Symbol::terminal(10) }), } }); @@ -114,7 +115,7 @@ describe("ParseItemSetBuilder", []() { AssertThat(item_set, Equals(ParseItemSet({ { - ParseItem(Symbol::non_terminal(0), production(0, 0), 0), + ParseItem(rules::START(), production(0, 0), 0), LookaheadSet({ Symbol::terminal(10) }) }, {