From 33d781f4927fc0b94bd49d25f88deb4ece44e035 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 17 Apr 2014 13:20:43 -0700 Subject: [PATCH] Refactor bookkeeping of token starts in lexical rules - Move lex items and parse items into their own files --- examples/parsers/javascript.c | 1 - .../build_tables/conflict_manager_spec.cc | 28 ++++---- .../build_tables/get_metadata_spec.cc | 43 ------------- spec/compiler/build_tables/lex_item_spec.cc | 43 +++++++++++++ .../build_tables/rule_transitions_spec.cc | 4 +- src/compiler/build_tables/build_tables.cc | 6 +- src/compiler/build_tables/conflict_manager.cc | 6 +- src/compiler/build_tables/first_set.h | 2 +- src/compiler/build_tables/follow_sets.h | 2 +- src/compiler/build_tables/get_metadata.cc | 45 ++++--------- src/compiler/build_tables/item.cc | 64 +------------------ src/compiler/build_tables/item.h | 63 ------------------ src/compiler/build_tables/item_set_closure.h | 2 +- .../build_tables/item_set_transitions.h | 3 +- src/compiler/build_tables/lex_item.cc | 51 +++++++++++++++ src/compiler/build_tables/lex_item.h | 44 +++++++++++++ src/compiler/build_tables/parse_item.cc | 45 +++++++++++++ src/compiler/build_tables/parse_item.h | 54 ++++++++++++++++ src/compiler/util/string_helpers.cc | 6 +- 19 files changed, 280 insertions(+), 232 deletions(-) create mode 100644 spec/compiler/build_tables/lex_item_spec.cc create mode 100644 src/compiler/build_tables/lex_item.cc create mode 100644 src/compiler/build_tables/lex_item.h create mode 100644 src/compiler/build_tables/parse_item.cc create mode 100644 src/compiler/build_tables/parse_item.h diff --git a/examples/parsers/javascript.c b/examples/parsers/javascript.c index 88fdea19..da4e2ddb 100644 --- a/examples/parsers/javascript.c +++ b/examples/parsers/javascript.c @@ -661,7 +661,6 @@ LEX_FN() { ADVANCE(53); LEX_ERROR(); case 52: - START_TOKEN(); if ((lookahead == '\t') || (lookahead == '\r') || (lookahead == ' ')) diff --git a/spec/compiler/build_tables/conflict_manager_spec.cc b/spec/compiler/build_tables/conflict_manager_spec.cc index cd569f7b..a5370288 100644 --- a/spec/compiler/build_tables/conflict_manager_spec.cc +++ b/spec/compiler/build_tables/conflict_manager_spec.cc @@ -70,35 +70,35 @@ describe("resolving parse conflicts", []() { describe("when the shift has higher precedence", [&]() { ParseAction shift = ParseAction::Shift(2, { 3 }); ParseAction reduce = ParseAction::Reduce(sym2, 1, 1); - + it("does not record a conflict", [&]() { manager->resolve_parse_action(sym1, shift, reduce); manager->resolve_parse_action(sym1, reduce, shift); AssertThat(manager->conflicts(), IsEmpty()); }); - + it("favors the shift", [&]() { AssertThat(manager->resolve_parse_action(sym1, shift, reduce), IsFalse()); AssertThat(manager->resolve_parse_action(sym1, reduce, shift), IsTrue()); }); }); - + describe("when the reduce has higher precedence", [&]() { ParseAction shift = ParseAction::Shift(2, { 1 }); ParseAction reduce = ParseAction::Reduce(sym2, 1, 3); - + it("does not record a conflict", [&]() { manager->resolve_parse_action(sym1, reduce, shift); manager->resolve_parse_action(sym1, shift, reduce); AssertThat(manager->conflicts(), IsEmpty()); }); - + it("favors the reduce", [&]() { AssertThat(manager->resolve_parse_action(sym1, reduce, shift), IsFalse()); AssertThat(manager->resolve_parse_action(sym1, shift, reduce), IsTrue()); }); }); - + describe("when the precedences are equal", [&]() { ParseAction shift = ParseAction::Shift(2, { 0 }); ParseAction reduce = ParseAction::Reduce(sym2, 1, 0); @@ -110,17 +110,17 @@ describe("resolving parse conflicts", []() { Conflict("rule1: shift (precedence 0) / reduce rule2 (precedence 0)") }))); }); - + it("favors the shift", [&]() { AssertThat(manager->resolve_parse_action(sym1, shift, reduce), IsFalse()); AssertThat(manager->resolve_parse_action(sym1, reduce, shift), IsTrue()); }); }); - + describe("when the shift has conflicting precedences compared to the reduce", [&]() { ParseAction shift = ParseAction::Shift(2, { 0, 1, 3 }); ParseAction reduce = ParseAction::Reduce(sym2, 1, 2); - + it("records a conflict", [&]() { manager->resolve_parse_action(sym1, reduce, shift); manager->resolve_parse_action(sym1, shift, reduce); @@ -128,7 +128,7 @@ describe("resolving parse conflicts", []() { Conflict("rule1: shift (precedence 0, 1, 3) / reduce rule2 (precedence 2)") }))); }); - + it("favors the shift", [&]() { AssertThat(manager->resolve_parse_action(sym1, shift, reduce), IsFalse()); AssertThat(manager->resolve_parse_action(sym1, reduce, shift), IsTrue()); @@ -140,23 +140,23 @@ describe("resolving parse conflicts", []() { describe("when one action has higher precedence", [&]() { ParseAction left = ParseAction::Reduce(sym2, 1, 0); ParseAction right = ParseAction::Reduce(sym2, 1, 3); - + it("favors that action", [&]() { AssertThat(manager->resolve_parse_action(sym1, left, right), IsTrue()); AssertThat(manager->resolve_parse_action(sym1, right, left), IsFalse()); }); - + it("does not record a conflict", [&]() { manager->resolve_parse_action(sym1, left, right); manager->resolve_parse_action(sym1, right, left); AssertThat(manager->conflicts(), IsEmpty()); }); }); - + describe("when the actions have the same precedence", [&]() { ParseAction left = ParseAction::Reduce(sym1, 1, 0); ParseAction right = ParseAction::Reduce(sym2, 1, 0); - + it("favors the symbol listed earlier in the grammar", [&]() { AssertThat(manager->resolve_parse_action(sym1, right, left), IsTrue()); AssertThat(manager->resolve_parse_action(sym1, left, right), IsFalse()); diff --git a/spec/compiler/build_tables/get_metadata_spec.cc b/spec/compiler/build_tables/get_metadata_spec.cc index 4f806b6c..988c4c11 100644 --- a/spec/compiler/build_tables/get_metadata_spec.cc +++ b/spec/compiler/build_tables/get_metadata_spec.cc @@ -33,49 +33,6 @@ describe("getting metadata for rules", []() { AssertThat(get_metadata(rule, key1), Equals(0)); }); }); - - it("works for metadata rules preceded by other rules that can be blank", [&]() { - auto rule = seq({ - repeat(sym("x")), - make_shared(sym("x"), map({ - { key1, 1 }, - { key2, 2 }, - })), - }); - - AssertThat(get_metadata(rule, key2), Equals(2)); - }); - - it("works for choices containing metadata rule", [&]() { - auto rule = choice({ - sym("x"), - make_shared(sym("x"), map({ - { key1, 1 }, - { key2, 2 }, - })), - }); - - AssertThat(get_metadata(rule, key2), Equals(1)); - }); - - it("works for repetitions containing metadata rules", [&]() { - auto rule = repeat(make_shared(sym("x"), map({ - { key1, 1 }, - { key2, 2 }, - }))); - AssertThat(get_metadata(rule, key2), Equals(2)); - }); - - it("returns 0 for metadata rules preceded by rules that can't be blank", [&]() { - auto rule = seq({ - sym("x"), - make_shared(sym("y"), map({ - { key1, 1 }, - { key2, 2 }, - })), - }); - AssertThat(get_metadata(rule, key2), Equals(0)); - }); }); END_TEST diff --git a/spec/compiler/build_tables/lex_item_spec.cc b/spec/compiler/build_tables/lex_item_spec.cc new file mode 100644 index 00000000..28c1dcff --- /dev/null +++ b/spec/compiler/build_tables/lex_item_spec.cc @@ -0,0 +1,43 @@ +#include "compiler_spec_helper.h" +#include "compiler/build_tables/item_set_transitions.h" +#include "compiler/prepared_grammar.h" + +using namespace rules; +using namespace build_tables; + +START_TEST + +describe("lex items", []() { + describe("determining if an item is the start of a token", [&]() { + Symbol sym("x"); + rule_ptr token_start = make_shared(str("a"), map({ + { START_TOKEN, 1 } + })); + + it("returns true for rules designated as token starts", [&]() { + LexItem item(sym, token_start); + AssertThat(item.is_token_start(), IsTrue()); + }); + + it("returns false for rules not designated as token starts", [&]() { + AssertThat(LexItem(sym, make_shared(str("a"), map({ + { START_TOKEN, 0 } + }))).is_token_start(), IsFalse()); + AssertThat(LexItem(sym, str("a")).is_token_start(), IsFalse()); + }); + + describe("when given a sequence containing a token start", [&]() { + it("returns true when the rule before the token start may be blank", [&]() { + LexItem item(sym, seq({ repeat(str("a")), token_start })); + AssertThat(item.is_token_start(), IsTrue()); + }); + + it("returns false when the rule before the token start cannot be blank", [&]() { + LexItem item(sym, seq({ str("a"), token_start })); + AssertThat(item.is_token_start(), IsFalse()); + }); + }); + }); +}); + +END_TEST \ No newline at end of file diff --git a/spec/compiler/build_tables/rule_transitions_spec.cc b/spec/compiler/build_tables/rule_transitions_spec.cc index 22c3834f..479ac610 100644 --- a/spec/compiler/build_tables/rule_transitions_spec.cc +++ b/spec/compiler/build_tables/rule_transitions_spec.cc @@ -183,12 +183,12 @@ describe("rule transitions", []() { { CharacterSet({ 'a' }), rule } }))); }); - + it("preserves metadata", [&]() { map metadata_value({ { PRECEDENCE, 5 } }); - + rule_ptr rule = make_shared(seq({ sym("x"), sym("y") }), metadata_value); AssertThat( sym_transitions(rule), diff --git a/src/compiler/build_tables/build_tables.cc b/src/compiler/build_tables/build_tables.cc index bf867fa2..d16b27dd 100644 --- a/src/compiler/build_tables/build_tables.cc +++ b/src/compiler/build_tables/build_tables.cc @@ -42,7 +42,7 @@ namespace tree_sitter { result.insert(item.precedence()); return result; } - + void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) { for (auto &transition : sym_transitions(item_set, grammar)) { const Symbol &symbol = transition.first; @@ -51,7 +51,7 @@ namespace tree_sitter { auto current_actions = parse_table.states[state_id].actions; auto current_action = current_actions.find(symbol); - + if (current_action == current_actions.end() || conflict_manager.resolve_parse_action(symbol, current_action->second, ParseAction::Shift(0, precedence_values))) { ParseStateId new_state_id = add_parse_state(item_set); @@ -95,7 +95,7 @@ namespace tree_sitter { ParseAction::Reduce(item.lhs, item.consumed_symbol_count, item.precedence()); auto current_actions = parse_table.states[state_id].actions; auto current_action = current_actions.find(item.lookahead_sym); - + if (current_action == current_actions.end() || conflict_manager.resolve_parse_action(item.lookahead_sym, current_action->second, action)) { parse_table.add_action(state_id, item.lookahead_sym, action); diff --git a/src/compiler/build_tables/conflict_manager.cc b/src/compiler/build_tables/conflict_manager.cc index 7ffc3b1a..f79e0b7f 100644 --- a/src/compiler/build_tables/conflict_manager.cc +++ b/src/compiler/build_tables/conflict_manager.cc @@ -97,7 +97,7 @@ namespace tree_sitter { result.insert(result.end(), conflicts_.begin(), conflicts_.end()); return result; } - + string precedence_string(const ParseAction &action) { string precedences = "(precedence "; bool started = false; @@ -108,7 +108,7 @@ namespace tree_sitter { } return precedences + ")"; } - + string message_for_action(const ParseAction &action, const map &rule_names) { switch (action.type) { case ParseActionTypeShift: @@ -126,7 +126,7 @@ namespace tree_sitter { return "error"; } } - + void ConflictManager::record_conflict(const rules::Symbol &symbol, const ParseAction &left, const ParseAction &right) { diff --git a/src/compiler/build_tables/first_set.h b/src/compiler/build_tables/first_set.h index 346ef821..b816e8d8 100644 --- a/src/compiler/build_tables/first_set.h +++ b/src/compiler/build_tables/first_set.h @@ -2,7 +2,7 @@ #define COMPILER_BUILD_TABLES_FIRST_SET_H_ #include -#include "compiler/build_tables/item.h" +#include "compiler/build_tables/parse_item.h" #include "compiler/rules/symbol.h" namespace tree_sitter { diff --git a/src/compiler/build_tables/follow_sets.h b/src/compiler/build_tables/follow_sets.h index 53dfb516..772b16f8 100644 --- a/src/compiler/build_tables/follow_sets.h +++ b/src/compiler/build_tables/follow_sets.h @@ -3,7 +3,7 @@ #include #include -#include "compiler/build_tables/item.h" +#include "compiler/build_tables/parse_item.h" #include "compiler/rules/symbol.h" namespace tree_sitter { diff --git a/src/compiler/build_tables/get_metadata.cc b/src/compiler/build_tables/get_metadata.cc index 6fa52983..ca616458 100644 --- a/src/compiler/build_tables/get_metadata.cc +++ b/src/compiler/build_tables/get_metadata.cc @@ -1,42 +1,21 @@ #include "compiler/build_tables/get_metadata.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/repeat.h" #include "compiler/rules/visitor.h" -#include "compiler/build_tables/rule_can_be_blank.h" namespace tree_sitter { namespace build_tables { - class GetMetadata : public rules::RuleFn { - rules::MetadataKey metadata_key; - public: - GetMetadata(rules::MetadataKey key) : metadata_key(key) {} - - int apply_to(const rules::Choice *rule) { - return apply(rule->left) || apply(rule->right); - } - - int apply_to(const rules::Repeat *rule) { - return apply(rule->content); - } - - int apply_to(const rules::Seq *rule) { - int result = apply(rule->left); - if (rule_can_be_blank(rule->left) && result == 0) - result = apply(rule->right); - return result; - } - - int apply_to(const rules::Metadata *rule) { - auto pair = rule->value.find(metadata_key); - if (pair != rule->value.end()) - return pair->second; - else - return 0; - } - }; - int get_metadata(const rules::rule_ptr &rule, rules::MetadataKey key) { + class GetMetadata : public rules::RuleFn { + rules::MetadataKey metadata_key; + + int apply_to(const rules::Metadata *rule) { + auto pair = rule->value.find(metadata_key); + return (pair != rule->value.end()) ? pair->second : 0; + } + + public: + GetMetadata(rules::MetadataKey key) : metadata_key(key) {} + }; + return GetMetadata(key).apply(rule); } } diff --git a/src/compiler/build_tables/item.cc b/src/compiler/build_tables/item.cc index 5e50df96..a421290b 100644 --- a/src/compiler/build_tables/item.cc +++ b/src/compiler/build_tables/item.cc @@ -1,78 +1,16 @@ #include "compiler/build_tables/item.h" #include "compiler/build_tables/rule_can_be_blank.h" -#include "compiler/build_tables/get_metadata.h" #include "tree_sitter/compiler.h" namespace tree_sitter { - using std::string; - using std::to_string; - using std::ostream; - using std::vector; - using rules::Symbol; - using rules::rule_ptr; - namespace build_tables { - Item::Item(const Symbol &lhs, const rule_ptr rule) : + Item::Item(const rules::Symbol &lhs, const rules::rule_ptr rule) : lhs(lhs), rule(rule) {} bool Item::is_done() const { return rule_can_be_blank(rule); } - - ostream& operator<<(ostream &stream, const LexItem &item) { - return stream << - string("#"); - } - - ostream& operator<<(ostream &stream, const ParseItem &item) { - return stream << - string("#"); - } - - LexItem::LexItem(const Symbol &lhs, const rule_ptr rule) : Item(lhs, rule) {} - - bool LexItem::operator==(const LexItem &other) const { - bool lhs_eq = other.lhs == lhs; - bool rules_eq = (*other.rule == *rule); - return lhs_eq && rules_eq; - } - - bool LexItem::is_token_start() const { - return get_metadata(rule, rules::START_TOKEN) != 0; - } - - ParseItem::ParseItem(const Symbol &lhs, - const rule_ptr rule, - size_t consumed_symbol_count, - const Symbol &lookahead_sym) : - Item(lhs, rule), - consumed_symbol_count(consumed_symbol_count), - lookahead_sym(lookahead_sym) {} - - bool ParseItem::operator==(const ParseItem &other) const { - bool lhs_eq = other.lhs == lhs; - bool rules_eq = (*other.rule == *rule); - bool consumed_sym_counts_eq = (other.consumed_symbol_count == consumed_symbol_count); - bool lookaheads_eq = other.lookahead_sym == lookahead_sym; - return lhs_eq && rules_eq && consumed_sym_counts_eq && lookaheads_eq; - } - - int ParseItem::precedence() const { - return get_metadata(rule, rules::PRECEDENCE); - } } } diff --git a/src/compiler/build_tables/item.h b/src/compiler/build_tables/item.h index 0659c21a..b533b7e2 100644 --- a/src/compiler/build_tables/item.h +++ b/src/compiler/build_tables/item.h @@ -2,14 +2,9 @@ #define COMPILER_BUILD_TABLES_ITEM_H_ #include -#include -#include #include "compiler/rules/symbol.h" -#include "compiler/rules/metadata.h" namespace tree_sitter { - class Grammar; - namespace build_tables { class Item { public: @@ -19,65 +14,7 @@ namespace tree_sitter { rules::Symbol lhs; rules::rule_ptr rule; }; - - class LexItem : public Item { - public: - LexItem(const rules::Symbol &lhs, rules::rule_ptr rule); - bool operator==(const LexItem &other) const; - bool is_token_start() const; - }; - - class ParseItem : public Item { - public: - ParseItem(const rules::Symbol &lhs, - rules::rule_ptr rule, - const size_t consumed_symbol_count, - const rules::Symbol &lookahead_sym); - bool operator==(const ParseItem &other) const; - int precedence() const; - - const size_t consumed_symbol_count; - const rules::Symbol lookahead_sym; - }; - - typedef std::unordered_set ParseItemSet; - typedef std::unordered_set LexItemSet; - - std::ostream& operator<<(std::ostream &stream, const LexItem &item); - std::ostream& operator<<(std::ostream &stream, const ParseItem &item); } } -namespace std { - template<> - struct hash { - size_t operator()(const tree_sitter::build_tables::Item &item) const { - return - hash()(item.lhs) ^ - hash()(item.rule); - } - }; - - template<> - struct hash { - size_t operator()(const tree_sitter::build_tables::ParseItem &item) const { - return - hash()(item.lhs.name) ^ - hash()(item.rule) ^ - hash()(item.consumed_symbol_count) ^ - hash()(item.lookahead_sym.name); - } - }; - - template - struct hash> { - size_t operator()(const unordered_set &set) const { - size_t result = hash()(set.size()); - for (auto item : set) - result ^= hash()(item); - return result; - } - }; -} - #endif // COMPILER_BUILD_TABLES_ITEM_H_ diff --git a/src/compiler/build_tables/item_set_closure.h b/src/compiler/build_tables/item_set_closure.h index d55e4727..3c2abc26 100644 --- a/src/compiler/build_tables/item_set_closure.h +++ b/src/compiler/build_tables/item_set_closure.h @@ -1,7 +1,7 @@ #ifndef COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_ #define COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_ -#include "compiler/build_tables/item.h" +#include "compiler/build_tables/parse_item.h" namespace tree_sitter { class PreparedGrammar; diff --git a/src/compiler/build_tables/item_set_transitions.h b/src/compiler/build_tables/item_set_transitions.h index 40e23009..8eea288c 100644 --- a/src/compiler/build_tables/item_set_transitions.h +++ b/src/compiler/build_tables/item_set_transitions.h @@ -2,7 +2,8 @@ #define COMPILER_BUILD_TABLES_ITEM_SET_TRANSITIONS_H_ #include -#include "compiler/build_tables/item.h" +#include "compiler/build_tables/lex_item.h" +#include "compiler/build_tables/parse_item.h" namespace tree_sitter { class PreparedGrammar; diff --git a/src/compiler/build_tables/lex_item.cc b/src/compiler/build_tables/lex_item.cc new file mode 100644 index 00000000..6d30c8d8 --- /dev/null +++ b/src/compiler/build_tables/lex_item.cc @@ -0,0 +1,51 @@ +#include "compiler/build_tables/lex_item.h" +#include "compiler/build_tables/rule_can_be_blank.h" +#include "compiler/rules/symbol.h" +#include "compiler/rules/metadata.h" +#include "compiler/rules/seq.h" +#include "compiler/rules/visitor.h" + +namespace tree_sitter { + using std::string; + using std::ostream; + using std::vector; + + namespace build_tables { + LexItem::LexItem(const rules::Symbol &lhs, const rules::rule_ptr rule) : + Item(lhs, rule) {} + + bool LexItem::operator==(const LexItem &other) const { + bool lhs_eq = other.lhs == lhs; + bool rules_eq = (*other.rule == *rule); + return lhs_eq && rules_eq; + } + + bool LexItem::is_token_start() const { + class IsTokenStart : public rules::RuleFn { + bool apply_to(const rules::Seq *rule) { + bool result = apply(rule->left); + if (!result && rule_can_be_blank(rule->left)) + result = apply(rule->right); + return result; + } + + bool apply_to(const rules::Metadata *rule) { + auto pair = rule->value.find(rules::START_TOKEN); + return (pair != rule->value.end()) && pair->second; + } + }; + + return IsTokenStart().apply(rule); + } + + ostream& operator<<(ostream &stream, const LexItem &item) { + return stream << + string("#"); + } + } +} + diff --git a/src/compiler/build_tables/lex_item.h b/src/compiler/build_tables/lex_item.h new file mode 100644 index 00000000..7ee51993 --- /dev/null +++ b/src/compiler/build_tables/lex_item.h @@ -0,0 +1,44 @@ +#ifndef COMPILER_BUILD_TABLES_LEX_ITEM_H_ +#define COMPILER_BUILD_TABLES_LEX_ITEM_H_ + +#include +#include +#include "compiler/build_tables/item.h" + +namespace tree_sitter { + namespace build_tables { + class LexItem : public Item { + public: + LexItem(const rules::Symbol &lhs, rules::rule_ptr rule); + bool operator==(const LexItem &other) const; + bool is_token_start() const; + }; + + std::ostream& operator<<(std::ostream &stream, const LexItem &item); + + typedef std::unordered_set LexItemSet; + } +} + +namespace std { + template<> + struct hash { + size_t operator()(const tree_sitter::build_tables::Item &item) const { + return + hash()(item.lhs) ^ + hash()(item.rule); + } + }; + + template<> + struct hash { + size_t operator()(const tree_sitter::build_tables::LexItemSet &set) const { + size_t result = hash()(set.size()); + for (auto item : set) + result ^= hash()(item); + return result; + } + }; +} + +#endif // COMPILER_BUILD_TABLES_LEX_ITEM_H_ diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc new file mode 100644 index 00000000..54c88e91 --- /dev/null +++ b/src/compiler/build_tables/parse_item.cc @@ -0,0 +1,45 @@ +#include "compiler/build_tables/parse_item.h" +#include "compiler/build_tables/get_metadata.h" +#include "tree_sitter/compiler.h" + +namespace tree_sitter { + using std::string; + using std::to_string; + using std::ostream; + + namespace build_tables { + ParseItem::ParseItem(const rules::Symbol &lhs, + const rules::rule_ptr rule, + size_t consumed_symbol_count, + const rules::Symbol &lookahead_sym) : + Item(lhs, rule), + consumed_symbol_count(consumed_symbol_count), + lookahead_sym(lookahead_sym) {} + + bool ParseItem::operator==(const ParseItem &other) const { + bool lhs_eq = other.lhs == lhs; + bool rules_eq = (*other.rule == *rule); + bool consumed_sym_counts_eq = (other.consumed_symbol_count == consumed_symbol_count); + bool lookaheads_eq = other.lookahead_sym == lookahead_sym; + return lhs_eq && rules_eq && consumed_sym_counts_eq && lookaheads_eq; + } + + int ParseItem::precedence() const { + return get_metadata(rule, rules::PRECEDENCE); + } + + ostream& operator<<(ostream &stream, const ParseItem &item) { + return stream << + string("#"); + } + } +} + diff --git a/src/compiler/build_tables/parse_item.h b/src/compiler/build_tables/parse_item.h new file mode 100644 index 00000000..6b13caa5 --- /dev/null +++ b/src/compiler/build_tables/parse_item.h @@ -0,0 +1,54 @@ +#ifndef COMPILER_BUILD_TABLES_PARSE_ITEM_H_ +#define COMPILER_BUILD_TABLES_PARSE_ITEM_H_ + +#include +#include +#include "compiler/rules/symbol.h" +#include "compiler/build_tables/item.h" +#include "compiler/rules/metadata.h" + +namespace tree_sitter { + namespace build_tables { + class ParseItem : public Item { + public: + ParseItem(const rules::Symbol &lhs, + rules::rule_ptr rule, + const size_t consumed_symbol_count, + const rules::Symbol &lookahead_sym); + bool operator==(const ParseItem &other) const; + int precedence() const; + + const size_t consumed_symbol_count; + const rules::Symbol lookahead_sym; + }; + + std::ostream& operator<<(std::ostream &stream, const ParseItem &item); + + typedef std::unordered_set ParseItemSet; + } +} + +namespace std { + template<> + struct hash { + size_t operator()(const tree_sitter::build_tables::ParseItem &item) const { + return + hash()(item.lhs.name) ^ + hash()(item.rule) ^ + hash()(item.consumed_symbol_count) ^ + hash()(item.lookahead_sym.name); + } + }; + + template<> + struct hash { + size_t operator()(const tree_sitter::build_tables::ParseItemSet &set) const { + size_t result = hash()(set.size()); + for (auto item : set) + result ^= hash()(item); + return result; + } + }; +} + +#endif // COMPILER_BUILD_TABLES_PARSE_ITEM_H_ diff --git a/src/compiler/util/string_helpers.cc b/src/compiler/util/string_helpers.cc index c54c75dd..ace3da2b 100644 --- a/src/compiler/util/string_helpers.cc +++ b/src/compiler/util/string_helpers.cc @@ -23,7 +23,7 @@ namespace tree_sitter { str_replace(&input, "\n", "\\n"); return input; } - + string join(vector lines, string separator) { string result; bool started = false; @@ -38,13 +38,13 @@ namespace tree_sitter { string join(vector lines) { return join(lines, "\n"); } - + string indent(string input) { string tab = " "; util::str_replace(&input, "\n", "\n" + tab); return tab + input; } - + string character_code(char character) { switch (character) { case '\0':