From db9966b57c78d87a5b08ef8a8bd8d8f5f3a5da17 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sat, 10 Oct 2015 12:33:12 -0700 Subject: [PATCH] Simplify lex item set transitions code --- project.gyp | 2 +- spec/compiler/build_tables/lex_item_spec.cc | 254 ++++++++++++++++-- .../build_tables/merge_transitions_spec.cc | 90 ------- .../build_tables/rule_transitions_spec.cc | 173 ------------ src/compiler/build_tables/build_lex_table.cc | 4 +- src/compiler/build_tables/item_set_closure.cc | 2 - src/compiler/build_tables/lex_item.cc | 15 +- .../build_tables/lex_item_transitions.cc | 127 +++++++++ ...e_transitions.h => lex_item_transitions.h} | 5 +- src/compiler/build_tables/merge_transitions.h | 54 ---- src/compiler/build_tables/rule_transitions.cc | 74 ----- src/compiler/lex_table.cc | 3 +- .../prepare_grammar/prepare_grammar.cc | 3 +- 13 files changed, 378 insertions(+), 428 deletions(-) delete mode 100644 spec/compiler/build_tables/merge_transitions_spec.cc delete mode 100644 spec/compiler/build_tables/rule_transitions_spec.cc create mode 100644 src/compiler/build_tables/lex_item_transitions.cc rename src/compiler/build_tables/{rule_transitions.h => lex_item_transitions.h} (65%) delete mode 100644 src/compiler/build_tables/merge_transitions.h delete mode 100644 src/compiler/build_tables/rule_transitions.cc diff --git a/project.gyp b/project.gyp index 488f513e..5e79eba6 100644 --- a/project.gyp +++ b/project.gyp @@ -17,12 +17,12 @@ 'src/compiler/build_tables/get_metadata.cc', 'src/compiler/build_tables/item_set_closure.cc', 'src/compiler/build_tables/lex_item.cc', + 'src/compiler/build_tables/lex_item_transitions.cc', 'src/compiler/build_tables/lex_conflict_manager.cc', 'src/compiler/build_tables/lookahead_set.cc', 'src/compiler/build_tables/parse_item.cc', 'src/compiler/build_tables/parse_conflict_manager.cc', 'src/compiler/build_tables/rule_can_be_blank.cc', - 'src/compiler/build_tables/rule_transitions.cc', 'src/compiler/compile.cc', 'src/compiler/generate_code/c_code.cc', 'src/compiler/grammar.cc', diff --git a/spec/compiler/build_tables/lex_item_spec.cc b/spec/compiler/build_tables/lex_item_spec.cc index 21f5f41a..8dc963f0 100644 --- a/spec/compiler/build_tables/lex_item_spec.cc +++ b/spec/compiler/build_tables/lex_item_spec.cc @@ -40,36 +40,256 @@ describe("LexItem", []() { }); }); -describe("lex_item_set_transitions", [&]() { - describe("when two items in the set have transitions on the same character", [&]() { - it("merges the transitions by computing the union of the two item sets", [&]() { - LexItemSet set1({ - LexItem(Symbol(1), CharacterSet().include('a', 'f').copy()), - LexItem(Symbol(2), CharacterSet().include('e', 'x').copy()) - }); +describe("LexItemSet::transitions()", [&]() { + it("handles single characters", [&]() { + LexItemSet item_set({ + LexItem(Symbol(1), character({ 'x' })), + }); - AssertThat(set1.transitions(), Equals(map({ + AssertThat( + item_set.transitions(), + Equals(map({ { - CharacterSet().include('a', 'd'), + CharacterSet().include('x'), LexItemSet({ LexItem(Symbol(1), blank()), }) + } + }))); + }); + + it("handles sequences", [&]() { + LexItemSet item_set({ + LexItem(Symbol(1), seq({ + character({ 'w' }), + character({ 'x' }), + character({ 'y' }), + character({ 'z' }), + })), + }); + + AssertThat( + item_set.transitions(), + Equals(map({ + { + CharacterSet().include('w'), + LexItemSet({ + LexItem(Symbol(1), seq({ + character({ 'x' }), + character({ 'y' }), + character({ 'z' }), + })), + }) + } + }))); + }); + + it("handles sequences where the left hand side can be blank", [&]() { + LexItemSet item_set({ + LexItem(Symbol(1), seq({ + choice({ + character({ 'x' }), + blank(), + }), + character({ 'y' }), + character({ 'z' }), + })), + }); + + AssertThat( + item_set.transitions(), + Equals(map({ + { + CharacterSet().include('x'), + LexItemSet({ + LexItem(Symbol(1), seq({ + character({ 'y' }), + character({ 'z' }), + })), + }) + }, + { + CharacterSet().include('y'), + LexItemSet({ + LexItem(Symbol(1), character({ 'z' })), + }) + } + }))); + }); + + it("handles blanks", [&]() { + LexItemSet item_set({ + LexItem(Symbol(1), blank()), + }); + + AssertThat(item_set.transitions(), IsEmpty()); + }); + + it("handles repeats", [&]() { + LexItemSet item_set({ + LexItem(Symbol(1), repeat(seq({ + character({ 'a' }), + character({ 'b' }), + }))), + LexItem(Symbol(2), repeat(character({ 'c' }))), + }); + + AssertThat( + item_set.transitions(), + Equals(map({ + { + CharacterSet().include('a'), + LexItemSet({ + LexItem(Symbol(1), seq({ + character({ 'b' }), + repeat(seq({ + character({ 'a' }), + character({ 'b' }), + })) + })) + }) + }, + { + CharacterSet().include('c'), + LexItemSet({ + LexItem(Symbol(2), repeat(character({ 'c' }))), + }) + } + }))); + }); + + it("handles choices between overlapping character sets", [&]() { + LexItemSet item_set({ + LexItem(Symbol(1), choice({ + seq({ + character({ 'a', 'b', 'c', 'd' }), + character({ 'x' }), + }), + seq({ + character({ 'c', 'd', 'e', 'f' }), + character({ 'y' }), + }), + })) + }); + + AssertThat( + item_set.transitions(), + Equals(map({ + { + CharacterSet().include('a', 'b'), + LexItemSet({ + LexItem(Symbol(1), character({ 'x' })), + }) + }, + { + CharacterSet().include('c', 'd'), + LexItemSet({ + LexItem(Symbol(1), character({ 'x' })), + LexItem(Symbol(1), character({ 'y' })), + }) }, { CharacterSet().include('e', 'f'), LexItemSet({ - LexItem(Symbol(1), blank()), - LexItem(Symbol(2), blank()), - }) - }, - { - CharacterSet().include('g', 'x'), - LexItemSet({ - LexItem(Symbol(2), blank()), + LexItem(Symbol(1), character({ 'y' })), }) }, }))); + }); + + it("handles choices between a subset and a superset of characters", [&]() { + LexItemSet item_set({ + LexItem(Symbol(1), choice({ + seq({ + character({ 'b', 'c', 'd' }), + character({ 'x' }), + }), + seq({ + character({ 'a', 'b', 'c', 'd', 'e', 'f' }), + character({ 'y' }), + }), + })), }); + + AssertThat( + item_set.transitions(), + Equals(map({ + { + CharacterSet().include('a').include('e', 'f'), + LexItemSet({ + LexItem(Symbol(1), character({ 'y' })), + }) + }, + { + CharacterSet().include('b', 'd'), + LexItemSet({ + LexItem(Symbol(1), character({ 'x' })), + LexItem(Symbol(1), character({ 'y' })), + }) + }, + }))); + }); + + it("handles choices between whitelisted and blacklisted character sets", [&]() { + LexItemSet item_set({ + LexItem(Symbol(1), seq({ + choice({ + character({ '/' }, false), + seq({ + character({ '\\' }), + character({ '/' }), + }), + }), + character({ '/' }), + })) + }); + + AssertThat( + item_set.transitions(), + Equals(map({ + { + CharacterSet().include_all().exclude('/').exclude('\\'), + LexItemSet({ + LexItem(Symbol(1), character({ '/' })), + }) + }, + { + CharacterSet().include('\\'), + LexItemSet({ + LexItem(Symbol(1), character({ '/' })), + LexItem(Symbol(1), seq({ character({ '/' }), character({ '/' }) })), + }) + }, + }))); + }); + + it("handles different items with overlapping character sets", [&]() { + LexItemSet set1({ + LexItem(Symbol(1), character({ 'a', 'b', 'c', 'd', 'e', 'f' })), + LexItem(Symbol(2), character({ 'e', 'f', 'g', 'h', 'i' })) + }); + + AssertThat(set1.transitions(), Equals(map({ + { + CharacterSet().include('a', 'd'), + LexItemSet({ + LexItem(Symbol(1), blank()), + }) + }, + { + CharacterSet().include('e', 'f'), + LexItemSet({ + LexItem(Symbol(1), blank()), + LexItem(Symbol(2), blank()), + }) + }, + { + CharacterSet().include('g', 'i'), + LexItemSet({ + LexItem(Symbol(2), blank()), + }) + }, + }))); }); }); diff --git a/spec/compiler/build_tables/merge_transitions_spec.cc b/spec/compiler/build_tables/merge_transitions_spec.cc deleted file mode 100644 index 58f4fad4..00000000 --- a/spec/compiler/build_tables/merge_transitions_spec.cc +++ /dev/null @@ -1,90 +0,0 @@ -#include "compiler/compiler_spec_helper.h" -#include "compiler/build_tables/merge_transitions.h" - -using namespace rules; -using namespace build_tables; - -START_TEST - -describe("merge_transition", []() { - typedef map int_map; - - auto do_merge = [&](int_map *left, const pair &new_pair) { - merge_transition(left, new_pair, [](int *l, const int *r) { - *l = *l | *r; - }); - }; - - describe("when none of the transitions intersect", [&]() { - it("returns the union of the two sets of transitions", [&]() { - int_map map({ - { CharacterSet().include('a').include('c'), 1 }, - { CharacterSet().include('x').include('y'), 2 }, - { CharacterSet().include('1').include('9'), 4 }, - }); - - do_merge(&map, { CharacterSet().include(' '), 8 }); - do_merge(&map, { CharacterSet().include('\t'), 16 }); - - AssertThat(map, Equals(int_map({ - { CharacterSet().include('a').include('c'), 1 }, - { CharacterSet().include('x').include('y'), 2 }, - { CharacterSet().include('1').include('9'), 4 }, - { CharacterSet().include(' '), 8 }, - { CharacterSet().include('\t'), 16 }, - }))); - }); - }); - - describe("when transitions intersect", [&]() { - it("merges the intersecting transitions using the provided function", [&]() { - int_map map({ - { CharacterSet().include('a', 'f').include('A', 'F'), 1 }, - { CharacterSet().include('0', '9'), 2 }, - }); - - do_merge(&map, { CharacterSet().include('c'), 4 }); - do_merge(&map, { CharacterSet().include('3'), 8 }); - - AssertThat(map, Equals(int_map({ - { - CharacterSet() - .include('a', 'b') - .include('d', 'f') - .include('A', 'F'), - 1 - }, - { - CharacterSet().include('c'), - 5 - }, - { - CharacterSet().include('0', '2').include('4', '9'), - 2 - }, - { - CharacterSet().include('3'), - 10 - }, - }))); - }); - }); - - describe("when two of the right transitions intersect the same left transition", [&]() { - it("splits the left-hand transition correctly", [&]() { - int_map map1({ - { CharacterSet().include('a').include('c'), 1 }, - }); - - do_merge(&map1, { CharacterSet().include('a'), 2 }); - do_merge(&map1, { CharacterSet().include('c'), 4 }); - - AssertThat(map1, Equals(int_map({ - { CharacterSet().include('a'), 3 }, - { CharacterSet().include('c'), 5 }, - }))); - }); - }); -}); - -END_TEST diff --git a/spec/compiler/build_tables/rule_transitions_spec.cc b/spec/compiler/build_tables/rule_transitions_spec.cc deleted file mode 100644 index 57e6fb47..00000000 --- a/spec/compiler/build_tables/rule_transitions_spec.cc +++ /dev/null @@ -1,173 +0,0 @@ -#include "compiler/compiler_spec_helper.h" -#include "compiler/build_tables/rule_transitions.h" -#include "compiler/rules/metadata.h" - -using namespace rules; -using namespace build_tables; - -class transition_map : public std::map { - public: - bool operator==(const std::map &other) const { - if (this->size() != other.size()) return false; - for (const auto &pair : *this) { - auto other_pair = other.find(pair.first); - if (other_pair == other.end()) return false; - if (!pair.second->operator==(*other_pair->second)) return false; - } - return true; - } - - transition_map(const std::initializer_list> &list) : - std::map(list) {} -}; - -START_TEST - -describe("rule_transitions", []() { - it("handles single characters", [&]() { - AssertThat( - rule_transitions(character({ '1' })), - Equals(transition_map({ - { CharacterSet().include('1'), blank() } - }))); - }); - - it("handles sequences", [&]() { - AssertThat( - rule_transitions(seq({ character({ '1' }), character({ '2' }) })), - Equals(transition_map({ - { CharacterSet().include('1'), character({ '2' }) } - }))); - }); - - it("handles long sequences", [&]() { - AssertThat( - rule_transitions(seq({ - character({ '1' }), - character({ '2' }), - character({ '3' }), - character({ '4' }) - })), - Equals(transition_map({ - { - CharacterSet().include('1'), - seq({ character({ '2' }), character({ '3' }), character({ '4' }) }), - } - }))); - }); - - it("handles sequences whose left sides can be blank", [&]() { - AssertThat( - rule_transitions(seq({ - choice({ - character({ '1' }), - blank() }), - seq({ - character({ '1' }), - character({ '2' }) }) - })), Equals(transition_map({ - { - CharacterSet().include('1'), - choice({ seq({ character({ '1' }), character({ '2' }) }), character({ '2' }), }), - } - }))); - }); - - it("handles choices between overlapping character sets", [&]() { - AssertThat( - rule_transitions(choice({ - seq({ - character({ 'a', 'b', 'c', 'd' }), - sym("x") }), - seq({ - character({ 'c', 'd', 'e', 'f' }), - sym("y") }) })), - Equals(transition_map({ - { CharacterSet().include('a', 'b'), sym("x") }, - { CharacterSet().include('c', 'd'), choice({ sym("x"), sym("y") }) }, - { CharacterSet().include('e', 'f'), sym("y") }, - }))); - }); - - it("handles choices between whitelisted and blacklisted character sets", [&]() { - AssertThat( - rule_transitions(seq({ - choice({ - character({ '/' }, false), - seq({ - character({ '\\' }), - character({ '/' }) }) }), - character({ '/' }) })), - - Equals(transition_map({ - { CharacterSet() - .include_all() - .exclude('/') - .exclude('\\'), - character({ '/' }) }, - { CharacterSet() - .include('\\'), - seq({ - choice({ - blank(), - character({ '/' }) }), - character({ '/' }) }) }, - }))); - }); - - it("handles choices between a subset and a superset of characters", [&]() { - AssertThat( - rule_transitions(choice({ - seq({ - character({ 'b', 'c', 'd' }), - sym("x") }), - seq({ - character({ 'a', 'b', 'c', 'd', 'e', 'f' }), - sym("y") }) })), - Equals(transition_map({ - { CharacterSet().include('b', 'd'), choice({ sym("x"), sym("y") }) }, - { CharacterSet().include('a').include('e', 'f'), sym("y") }, - }))); - - AssertThat( - rule_transitions(choice({ - seq({ - character({ 'a', 'b', 'c', 'd', 'e', 'f' }), - sym("x") }), - seq({ - character({ 'b', 'c', 'd' }), - sym("y") }) })), - Equals(transition_map({ - { CharacterSet().include('b', 'd'), choice({ sym("x"), sym("y") }) }, - { CharacterSet().include('a').include('e', 'f'), sym("x") }, - }))); - }); - - it("handles blanks", [&]() { - AssertThat(rule_transitions(blank()), Equals(transition_map({}))); - }); - - it("handles repeats", [&]() { - rule_ptr rule = repeat(seq({ character({ 'a' }), character({ 'b' }) })); - - AssertThat( - rule_transitions(rule), - Equals(transition_map({ - { - CharacterSet().include('a'), - seq({ - character({ 'b' }), - rule }) - }}))); - - rule = repeat(character({ 'a' })); - - AssertThat( - rule_transitions(rule), - Equals(transition_map({ - { CharacterSet().include('a'), rule } - }))); - }); -}); - -END_TEST diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc index b9a98615..b74a1a7d 100644 --- a/src/compiler/build_tables/build_lex_table.cc +++ b/src/compiler/build_tables/build_lex_table.cc @@ -130,8 +130,8 @@ class LexTableBuilder { return rules::Seq::build({ make_shared( separator_rule, map({ - { rules::START_TOKEN, 1 }, { rules::PRECEDENCE, -1 }, - })), + { rules::START_TOKEN, 1 }, { rules::PRECEDENCE, -1 }, + })), rule, }); } diff --git a/src/compiler/build_tables/item_set_closure.cc b/src/compiler/build_tables/item_set_closure.cc index 6e138798..d23d552a 100644 --- a/src/compiler/build_tables/item_set_closure.cc +++ b/src/compiler/build_tables/item_set_closure.cc @@ -3,8 +3,6 @@ #include #include #include "tree_sitter/compiler.h" -#include "compiler/build_tables/rule_transitions.h" -#include "compiler/build_tables/rule_can_be_blank.h" #include "compiler/syntax_grammar.h" namespace tree_sitter { diff --git a/src/compiler/build_tables/lex_item.cc b/src/compiler/build_tables/lex_item.cc index d8f7b464..dae87fd4 100644 --- a/src/compiler/build_tables/lex_item.cc +++ b/src/compiler/build_tables/lex_item.cc @@ -1,7 +1,6 @@ #include "compiler/build_tables/lex_item.h" #include "compiler/build_tables/get_metadata.h" -#include "compiler/build_tables/rule_transitions.h" -#include "compiler/build_tables/merge_transitions.h" +#include "compiler/build_tables/lex_item_transitions.h" #include "compiler/rules/symbol.h" #include @@ -48,16 +47,8 @@ bool LexItemSet::operator==(const LexItemSet &other) const { map LexItemSet::transitions() const { map result; - for (const LexItem &item : entries) { - for (auto &transition : rule_transitions(item.rule)) { - LexItem next_item(item.lhs, transition.second); - merge_transition( - &result, { transition.first, LexItemSet({ next_item }) }, - [](LexItemSet *left, const LexItemSet *right) { - left->entries.insert(right->entries.begin(), right->entries.end()); - }); - } - } + for (const LexItem &item : entries) + lex_item_transitions(&result, item); return result; } diff --git a/src/compiler/build_tables/lex_item_transitions.cc b/src/compiler/build_tables/lex_item_transitions.cc new file mode 100644 index 00000000..2fc6e7e6 --- /dev/null +++ b/src/compiler/build_tables/lex_item_transitions.cc @@ -0,0 +1,127 @@ +#include "compiler/build_tables/lex_item_transitions.h" +#include +#include +#include +#include "compiler/build_tables/rule_can_be_blank.h" +#include "compiler/rules/blank.h" +#include "compiler/rules/choice.h" +#include "compiler/rules/seq.h" +#include "compiler/rules/repeat.h" +#include "compiler/rules/metadata.h" +#include "compiler/rules/symbol.h" +#include "compiler/rules/character_set.h" +#include "compiler/rules/visitor.h" +#include "compiler/build_tables/lex_item.h" + +namespace tree_sitter { +namespace build_tables { + +using std::function; +using std::make_shared; +using std::map; +using std::pair; +using std::vector; +using rules::CharacterSet; + +class LexItemTransitions : public rules::RuleFn { + map *transitions; + const rules::Symbol &item_lhs; + + LexItemSet transform_item_set(const LexItemSet &item_set, + function transform) { + LexItemSet new_set; + for (const LexItem &item : item_set.entries) + new_set.entries.insert(LexItem(item.lhs, transform(item.rule))); + return new_set; + } + + void merge_transition(map *transitions, + CharacterSet new_char_set, LexItemSet new_item_set) { + vector> new_entries; + + auto iter = transitions->begin(); + while (iter != transitions->end()) { + CharacterSet existing_char_set = iter->first; + LexItemSet &existing_item_set = iter->second; + + CharacterSet intersection = existing_char_set.remove_set(new_char_set); + if (!intersection.is_empty()) { + new_char_set.remove_set(intersection); + if (!existing_char_set.is_empty()) + new_entries.push_back({ existing_char_set, existing_item_set }); + existing_item_set.entries.insert(new_item_set.entries.begin(), + new_item_set.entries.end()); + new_entries.push_back({ intersection, existing_item_set }); + transitions->erase(iter++); + } else { + iter++; + } + } + + transitions->insert(new_entries.begin(), new_entries.end()); + + if (!new_char_set.is_empty()) + transitions->insert({ new_char_set, new_item_set }); + } + + void apply_to(const CharacterSet *rule) { + merge_transition(transitions, *rule, + LexItemSet({ + LexItem(item_lhs, rules::Blank::build()), + })); + } + + void apply_to(const rules::Choice *rule) { + for (const rule_ptr &element : rule->elements) + apply(element); + } + + void apply_to(const rules::Seq *rule) { + map left_transitions; + LexItemTransitions(&left_transitions, item_lhs).apply(rule->left); + for (auto &pair : left_transitions) + merge_transition( + transitions, pair.first, + transform_item_set(pair.second, [&rule](rule_ptr item_rule) { + return rules::Seq::build({ item_rule, rule->right }); + })); + + if (rule_can_be_blank(rule->left)) + apply(rule->right); + } + + void apply_to(const rules::Repeat *rule) { + map content_transitions; + LexItemTransitions(&content_transitions, item_lhs).apply(rule->content); + for (auto &pair : content_transitions) + merge_transition( + transitions, pair.first, + transform_item_set(pair.second, [&rule](rule_ptr item_rule) { + return rules::Seq::build({ item_rule, rule->copy() }); + })); + } + + void apply_to(const rules::Metadata *rule) { + map content_transitions; + LexItemTransitions(&content_transitions, item_lhs).apply(rule->rule); + for (auto &pair : content_transitions) + merge_transition( + transitions, pair.first, + transform_item_set(pair.second, [&rule](rule_ptr item_rule) { + return make_shared(item_rule, rule->value); + })); + } + + public: + LexItemTransitions(map *transitions, + const rules::Symbol &item_lhs) + : transitions(transitions), item_lhs(item_lhs) {} +}; + +void lex_item_transitions(map *transitions, + const LexItem &item) { + LexItemTransitions(transitions, item.lhs).apply(item.rule); +} + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/rule_transitions.h b/src/compiler/build_tables/lex_item_transitions.h similarity index 65% rename from src/compiler/build_tables/rule_transitions.h rename to src/compiler/build_tables/lex_item_transitions.h index b97ded09..b6f24b0a 100644 --- a/src/compiler/build_tables/rule_transitions.h +++ b/src/compiler/build_tables/lex_item_transitions.h @@ -2,13 +2,16 @@ #define COMPILER_BUILD_TABLES_RULE_TRANSITIONS_H_ #include +#include #include "compiler/rules/character_set.h" #include "compiler/rules/symbol.h" +#include "compiler/build_tables/lex_item.h" namespace tree_sitter { namespace build_tables { -std::map rule_transitions(const rule_ptr &); +void lex_item_transitions(std::map *transitions, + const LexItem &); } // namespace build_tables } // namespace tree_sitter diff --git a/src/compiler/build_tables/merge_transitions.h b/src/compiler/build_tables/merge_transitions.h deleted file mode 100644 index 0e2ecf7c..00000000 --- a/src/compiler/build_tables/merge_transitions.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef COMPILER_BUILD_TABLES_MERGE_TRANSITIONS_H_ -#define COMPILER_BUILD_TABLES_MERGE_TRANSITIONS_H_ - -#include -#include -#include "compiler/rules/character_set.h" -#include "compiler/rules/symbol.h" - -namespace tree_sitter { -namespace build_tables { - -/* - * Merges two transition maps with character set keys. If the - * two maps contain values for overlapping character sets, the - * new value for the two sets' intersection will be computed by - * merging the old and new values using the given function. - */ -template -void merge_transition(std::map *left, - const std::pair &new_pair, - std::function merge_fn) { - rules::CharacterSet new_char_set = new_pair.first; - T new_value = new_pair.second; - - std::map pairs_to_insert; - - auto iter = left->begin(); - while (iter != left->end()) { - rules::CharacterSet char_set = iter->first; - T value = iter->second; - - rules::CharacterSet intersection = char_set.remove_set(new_char_set); - if (!intersection.is_empty()) { - new_char_set.remove_set(intersection); - if (!char_set.is_empty()) - pairs_to_insert.insert({ char_set, value }); - merge_fn(&value, &new_value); - pairs_to_insert.insert({ intersection, value }); - left->erase(iter++); - } else { - ++iter; - } - } - - left->insert(pairs_to_insert.begin(), pairs_to_insert.end()); - - if (!new_char_set.is_empty()) - left->insert({ new_char_set, new_value }); -} - -} // namespace build_tables -} // namespace tree_sitter - -#endif // COMPILER_BUILD_TABLES_MERGE_TRANSITIONS_H_ diff --git a/src/compiler/build_tables/rule_transitions.cc b/src/compiler/build_tables/rule_transitions.cc deleted file mode 100644 index c0c1d3ad..00000000 --- a/src/compiler/build_tables/rule_transitions.cc +++ /dev/null @@ -1,74 +0,0 @@ -#include "compiler/build_tables/rule_transitions.h" -#include "compiler/build_tables/rule_can_be_blank.h" -#include "compiler/build_tables/merge_transitions.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/seq.h" -#include "compiler/rules/repeat.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/symbol.h" -#include "compiler/rules/character_set.h" -#include "compiler/rules/visitor.h" - -namespace tree_sitter { -namespace build_tables { - -using std::map; -using std::make_shared; -using rules::CharacterSet; -using rules::Choice; -using rules::Symbol; - -class RuleTransitions : public rules::RuleFn> { - private: - void merge_transitions(map *left, - const map &right) { - for (auto &pair : right) - merge_transition(left, pair, - [](rule_ptr *left, const rule_ptr *right) { - *left = Choice::build({ *left, *right }); - }); - } - - map apply_to(const CharacterSet *rule) { - return map( - { { *rule, make_shared() } }); - } - - map apply_to(const rules::Choice *rule) { - map result; - for (const auto &el : rule->elements) - merge_transitions(&result, this->apply(el)); - return result; - } - - map apply_to(const rules::Seq *rule) { - auto result = this->apply(rule->left); - for (auto &pair : result) - pair.second = rules::Seq::build({ pair.second, rule->right }); - if (rule_can_be_blank(rule->left)) - merge_transitions(&result, this->apply(rule->right)); - return result; - } - - map apply_to(const rules::Repeat *rule) { - auto result = this->apply(rule->content); - for (auto &pair : result) - pair.second = rules::Seq::build({ pair.second, rule->copy() }); - return result; - } - - map apply_to(const rules::Metadata *rule) { - auto result = this->apply(rule->rule); - for (auto &pair : result) - pair.second = make_shared(pair.second, rule->value); - return result; - } -}; - -map rule_transitions(const rule_ptr &rule) { - return RuleTransitions().apply(rule); -} - -} // namespace build_tables -} // namespace tree_sitter diff --git a/src/compiler/lex_table.cc b/src/compiler/lex_table.cc index d50ecef0..255c24f4 100644 --- a/src/compiler/lex_table.cc +++ b/src/compiler/lex_table.cc @@ -27,7 +27,8 @@ LexAction LexAction::Error() { return LexAction(LexActionTypeError, -1, Symbol(-1), { 0, 0 }); } -LexAction LexAction::Advance(size_t state_index, PrecedenceRange precedence_range) { +LexAction LexAction::Advance(size_t state_index, + PrecedenceRange precedence_range) { return LexAction(LexActionTypeAdvance, state_index, Symbol(-1), precedence_range); } diff --git a/src/compiler/prepare_grammar/prepare_grammar.cc b/src/compiler/prepare_grammar/prepare_grammar.cc index ad80aaa7..f8ff3a59 100644 --- a/src/compiler/prepare_grammar/prepare_grammar.cc +++ b/src/compiler/prepare_grammar/prepare_grammar.cc @@ -40,7 +40,8 @@ tuple prepare_grammar( if (error) return make_tuple(SyntaxGrammar(), LexicalGrammar(), error); - return make_tuple(flatten_grammar(syntax_grammar), normalize_rules(lex_grammar), nullptr); + return make_tuple(flatten_grammar(syntax_grammar), + normalize_rules(lex_grammar), nullptr); } } // namespace prepare_grammar