From 73b3280fbb4a80675d1cd2e2d87e1aeeb46062d8 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 30 Oct 2015 16:07:29 -0700 Subject: [PATCH] Include precedence calculation in LexItemSet::transitions --- spec/compiler/build_tables/lex_item_spec.cc | 262 ++++++++++++------ src/compiler/build_tables/build_lex_table.cc | 34 +-- .../build_tables/build_parse_table.cc | 7 +- src/compiler/build_tables/lex_item.cc | 4 +- src/compiler/build_tables/lex_item.h | 7 +- .../build_tables/lex_item_transitions.cc | 84 ++++-- .../build_tables/lex_item_transitions.h | 4 +- src/compiler/build_tables/parse_item.cc | 3 +- src/compiler/build_tables/parse_item.h | 3 +- src/compiler/precedence_range.cc | 10 + src/compiler/precedence_range.h | 2 + 11 files changed, 278 insertions(+), 142 deletions(-) diff --git a/spec/compiler/build_tables/lex_item_spec.cc b/spec/compiler/build_tables/lex_item_spec.cc index db35b659..f80ace52 100644 --- a/spec/compiler/build_tables/lex_item_spec.cc +++ b/spec/compiler/build_tables/lex_item_spec.cc @@ -48,12 +48,15 @@ describe("LexItemSet::transitions()", [&]() { AssertThat( item_set.transitions(), - Equals(map({ + Equals(LexItemSet::TransitionMap({ { CharacterSet().include('x'), - LexItemSet({ - LexItem(Symbol(1), blank()), - }) + { + LexItemSet({ + LexItem(Symbol(1), blank()), + }), + PrecedenceRange() + } } }))); }); @@ -70,16 +73,77 @@ describe("LexItemSet::transitions()", [&]() { AssertThat( item_set.transitions(), - Equals(map({ + Equals(LexItemSet::TransitionMap({ { CharacterSet().include('w'), - LexItemSet({ - LexItem(Symbol(1), seq({ - character({ 'x' }), - character({ 'y' }), - character({ 'z' }), - })), - }) + { + LexItemSet({ + LexItem(Symbol(1), seq({ + character({ 'x' }), + character({ 'y' }), + character({ 'z' }), + })), + }), + PrecedenceRange() + } + } + }))); + }); + + it("handles sequences with nested precedence", [&]() { + LexItemSet item_set({ + LexItem(Symbol(1), seq({ + prec(3, seq({ + prec(4, seq({ + character({ 'w' }), + character({ 'x' }) })), + character({ 'y' }) })), + character({ 'z' }), + })), + }); + + AssertThat( + item_set.transitions(), + Equals(LexItemSet::TransitionMap({ + { + CharacterSet().include('w'), + { + LexItemSet({ + LexItem(Symbol(1), seq({ + prec(3, seq({ + prec(4, character({ 'x' })), + character({ 'y' }) })), + character({ 'z' }), + })), + }), + PrecedenceRange(4) + } + } + }))); + + LexItemSet item_set2({ + LexItem(Symbol(1), seq({ + prec(3, seq({ + prec(4, character({ 'x' })), + character({ 'y' }) })), + character({ 'z' }), + })), + }); + + AssertThat( + item_set2.transitions(), + Equals(LexItemSet::TransitionMap({ + { + CharacterSet().include('x'), + { + LexItemSet({ + LexItem(Symbol(1), seq({ + prec(3, character({ 'y' })), + character({ 'z' }), + })), + }), + PrecedenceRange(3) + } } }))); }); @@ -98,21 +162,27 @@ describe("LexItemSet::transitions()", [&]() { AssertThat( item_set.transitions(), - Equals(map({ + Equals(LexItemSet::TransitionMap({ { CharacterSet().include('x'), - LexItemSet({ - LexItem(Symbol(1), seq({ - character({ 'y' }), - character({ 'z' }), - })), - }) + { + LexItemSet({ + LexItem(Symbol(1), seq({ + character({ 'y' }), + character({ 'z' }), + })), + }), + PrecedenceRange() + } }, { CharacterSet().include('y'), - LexItemSet({ - LexItem(Symbol(1), character({ 'z' })), - }) + { + LexItemSet({ + LexItem(Symbol(1), character({ 'z' })), + }), + PrecedenceRange() + } } }))); }); @@ -136,26 +206,32 @@ describe("LexItemSet::transitions()", [&]() { AssertThat( item_set.transitions(), - Equals(map({ + Equals(LexItemSet::TransitionMap({ { CharacterSet().include('a'), - LexItemSet({ - LexItem(Symbol(1), seq({ - character({ 'b' }), - repeat1(seq({ - character({ 'a' }), + { + LexItemSet({ + LexItem(Symbol(1), seq({ character({ 'b' }), - })) - })), - LexItem(Symbol(1), character({ 'b' })), - }) + repeat1(seq({ + character({ 'a' }), + character({ 'b' }), + })) + })), + LexItem(Symbol(1), character({ 'b' })), + }), + PrecedenceRange() + } }, { CharacterSet().include('c'), - LexItemSet({ - LexItem(Symbol(2), repeat1(character({ 'c' }))), - LexItem(Symbol(2), blank()), - }) + { + LexItemSet({ + LexItem(Symbol(2), repeat1(character({ 'c' }))), + LexItem(Symbol(2), blank()), + }), + PrecedenceRange() + } } }))); }); @@ -163,38 +239,47 @@ describe("LexItemSet::transitions()", [&]() { it("handles choices between overlapping character sets", [&]() { LexItemSet item_set({ LexItem(Symbol(1), choice({ - seq({ + prec(2, seq({ character({ 'a', 'b', 'c', 'd' }), character({ 'x' }), - }), - seq({ + })), + prec(3, seq({ character({ 'c', 'd', 'e', 'f' }), character({ 'y' }), - }), + })), })) }); AssertThat( item_set.transitions(), - Equals(map({ + Equals(LexItemSet::TransitionMap({ { CharacterSet().include('a', 'b'), - LexItemSet({ - LexItem(Symbol(1), character({ 'x' })), - }) + { + LexItemSet({ + LexItem(Symbol(1), prec(2, character({ 'x' }))), + }), + PrecedenceRange(2) + } }, { CharacterSet().include('c', 'd'), - LexItemSet({ - LexItem(Symbol(1), character({ 'x' })), - LexItem(Symbol(1), character({ 'y' })), - }) + { + LexItemSet({ + LexItem(Symbol(1), prec(2, character({ 'x' }))), + LexItem(Symbol(1), prec(3, character({ 'y' }))), + }), + PrecedenceRange(2, 3) + } }, { CharacterSet().include('e', 'f'), - LexItemSet({ - LexItem(Symbol(1), character({ 'y' })), - }) + { + LexItemSet({ + LexItem(Symbol(1), prec(3, character({ 'y' }))), + }), + PrecedenceRange(3) + } }, }))); }); @@ -215,19 +300,25 @@ describe("LexItemSet::transitions()", [&]() { AssertThat( item_set.transitions(), - Equals(map({ + Equals(LexItemSet::TransitionMap({ { CharacterSet().include('a').include('e', 'f'), - LexItemSet({ - LexItem(Symbol(1), character({ 'y' })), - }) + { + LexItemSet({ + LexItem(Symbol(1), character({ 'y' })), + }), + PrecedenceRange() + } }, { CharacterSet().include('b', 'd'), - LexItemSet({ - LexItem(Symbol(1), character({ 'x' })), - LexItem(Symbol(1), character({ 'y' })), - }) + { + LexItemSet({ + LexItem(Symbol(1), character({ 'x' })), + LexItem(Symbol(1), character({ 'y' })), + }), + PrecedenceRange() + } }, }))); }); @@ -248,19 +339,25 @@ describe("LexItemSet::transitions()", [&]() { AssertThat( item_set.transitions(), - Equals(map({ + Equals(LexItemSet::TransitionMap({ { CharacterSet().include_all().exclude('/').exclude('\\'), - LexItemSet({ - LexItem(Symbol(1), character({ '/' })), - }) + { + LexItemSet({ + LexItem(Symbol(1), character({ '/' })), + }), + PrecedenceRange() + } }, { CharacterSet().include('\\'), - LexItemSet({ - LexItem(Symbol(1), character({ '/' })), - LexItem(Symbol(1), seq({ character({ '/' }), character({ '/' }) })), - }) + { + LexItemSet({ + LexItem(Symbol(1), character({ '/' })), + LexItem(Symbol(1), seq({ character({ '/' }), character({ '/' }) })), + }), + PrecedenceRange() + } }, }))); }); @@ -271,25 +368,34 @@ describe("LexItemSet::transitions()", [&]() { LexItem(Symbol(2), character({ 'e', 'f', 'g', 'h', 'i' })) }); - AssertThat(set1.transitions(), Equals(map({ + AssertThat(set1.transitions(), Equals(LexItemSet::TransitionMap({ { CharacterSet().include('a', 'd'), - LexItemSet({ - LexItem(Symbol(1), blank()), - }) + { + LexItemSet({ + LexItem(Symbol(1), blank()), + }), + PrecedenceRange() + } }, { CharacterSet().include('e', 'f'), - LexItemSet({ - LexItem(Symbol(1), blank()), - LexItem(Symbol(2), blank()), - }) + { + LexItemSet({ + LexItem(Symbol(1), blank()), + LexItem(Symbol(2), blank()), + }), + PrecedenceRange() + } }, { CharacterSet().include('g', 'i'), - LexItemSet({ - LexItem(Symbol(2), blank()), - }) + { + LexItemSet({ + LexItem(Symbol(2), blank()), + }), + PrecedenceRange() + } }, }))); }); diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc index 49396137..583892f0 100644 --- a/src/compiler/build_tables/build_lex_table.cc +++ b/src/compiler/build_tables/build_lex_table.cc @@ -7,7 +7,6 @@ #include #include "compiler/build_tables/lex_conflict_manager.h" #include "compiler/build_tables/get_completion_status.h" -#include "compiler/build_tables/get_metadata.h" #include "compiler/build_tables/lex_item.h" #include "compiler/build_tables/does_match_any_line.h" #include "compiler/parse_table.h" @@ -51,7 +50,8 @@ class LexTableBuilder { LexTable build() { for (ParseState &parse_state : parse_table->states) { - LexItemSet item_set = build_lex_item_set(parse_state.expected_inputs(), false); + LexItemSet item_set = + build_lex_item_set(parse_state.expected_inputs(), false); parse_state.lex_state_id = add_lex_state(item_set); } @@ -120,13 +120,14 @@ class LexTableBuilder { void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) { for (const auto &transition : item_set.transitions()) { const CharacterSet &rule = transition.first; - const LexItemSet &new_item_set = transition.second; - LexStateId new_state_id = add_lex_state(new_item_set); - auto action = LexAction::Advance( - new_state_id, precedence_range_for_item_set(new_item_set)); - if (conflict_manager.resolve(action, - lex_table.state(state_id).default_action)) + const LexItemSet &new_item_set = transition.second.first; + const PrecedenceRange &precedence = transition.second.second; + auto current_action = lex_table.state(state_id).default_action; + auto action = LexAction::Advance(-1, precedence); + if (conflict_manager.resolve(action, current_action)) { + action.state_index = add_lex_state(new_item_set); lex_table.state(state_id).actions[rule] = action; + } } } @@ -135,10 +136,9 @@ class LexTableBuilder { CompletionStatus completion_status = get_completion_status(item.rule); if (completion_status.is_done) { auto current_action = lex_table.state(state_id).default_action; - auto new_action = - LexAction::Accept(item.lhs, completion_status.precedence); - if (conflict_manager.resolve(new_action, current_action)) - lex_table.state(state_id).default_action = new_action; + auto action = LexAction::Accept(item.lhs, completion_status.precedence); + if (conflict_manager.resolve(action, current_action)) + lex_table.state(state_id).default_action = action; } } } @@ -148,16 +148,6 @@ class LexTableBuilder { if (item.is_token_start()) lex_table.state(state_id).is_token_start = true; } - - PrecedenceRange precedence_range_for_item_set(const LexItemSet &item_set) const { - PrecedenceRange result; - for (const auto &item : item_set.entries) { - auto precedence_range = get_metadata(item.rule, rules::PRECEDENCE); - result.add(precedence_range.min); - result.add(precedence_range.max); - } - return result; - } }; LexTable build_lex_table(ParseTable *table, const LexicalGrammar &grammar) { diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 2277d025..55cfcd31 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -102,9 +102,7 @@ class ParseTableBuilder { const PrecedenceRange &precedence = transition.second.second; ParseAction *new_action = add_action( - state_id, symbol, - ParseAction::Shift(0, precedence), - item_set); + state_id, symbol, ParseAction::Shift(0, precedence), item_set); if (new_action) new_action->state_index = add_parse_state(next_item_set); } @@ -121,8 +119,7 @@ class ParseTableBuilder { (item.lhs() == rules::START()) ? ParseAction::Accept() : ParseAction::Reduce(Symbol(item.variable_index), item.step_index, - status.precedence, - status.associativity, + status.precedence, status.associativity, *item.production); for (const auto &lookahead_sym : *lookahead_symbols.entries) diff --git a/src/compiler/build_tables/lex_item.cc b/src/compiler/build_tables/lex_item.cc index ddb41996..95fcb496 100644 --- a/src/compiler/build_tables/lex_item.cc +++ b/src/compiler/build_tables/lex_item.cc @@ -45,8 +45,8 @@ bool LexItemSet::operator==(const LexItemSet &other) const { return entries == other.entries; } -map LexItemSet::transitions() const { - map result; +LexItemSet::TransitionMap LexItemSet::transitions() const { + TransitionMap result; for (const LexItem &item : entries) lex_item_transitions(&result, item); return result; diff --git a/src/compiler/build_tables/lex_item.h b/src/compiler/build_tables/lex_item.h index 0ff4d0e7..f941de18 100644 --- a/src/compiler/build_tables/lex_item.h +++ b/src/compiler/build_tables/lex_item.h @@ -3,9 +3,11 @@ #include #include +#include #include #include "compiler/rules/character_set.h" #include "compiler/rules/symbol.h" +#include "compiler/precedence_range.h" namespace tree_sitter { namespace build_tables { @@ -29,8 +31,11 @@ class LexItemSet { LexItemSet(); explicit LexItemSet(const std::unordered_set &); + typedef std::map> + TransitionMap; + bool operator==(const LexItemSet &) const; - std::map transitions() const; + TransitionMap transitions() const; std::unordered_set entries; diff --git a/src/compiler/build_tables/lex_item_transitions.cc b/src/compiler/build_tables/lex_item_transitions.cc index 099205c7..48293e6e 100644 --- a/src/compiler/build_tables/lex_item_transitions.cc +++ b/src/compiler/build_tables/lex_item_transitions.cc @@ -25,8 +25,9 @@ using std::vector; using rules::CharacterSet; class LexItemTransitions : public rules::RuleFn { - map *transitions; + LexItemSet::TransitionMap *transitions; const rules::Symbol &item_lhs; + vector *precedence_stack; LexItemSet transform_item_set(const LexItemSet &item_set, function callback) { @@ -36,23 +37,29 @@ class LexItemTransitions : public rules::RuleFn { return new_set; } - void merge_transition(map *transitions, - CharacterSet new_char_set, LexItemSet new_item_set) { - vector> new_entries; + void merge_transition(LexItemSet::TransitionMap *transitions, + CharacterSet new_char_set, LexItemSet new_item_set, + PrecedenceRange new_precedence_range) { + vector>> new_entries; auto iter = transitions->begin(); while (iter != transitions->end()) { CharacterSet existing_char_set = iter->first; - LexItemSet &existing_item_set = iter->second; + LexItemSet &existing_item_set = iter->second.first; + PrecedenceRange &existing_precedence_range = iter->second.second; CharacterSet intersection = existing_char_set.remove_set(new_char_set); if (!intersection.is_empty()) { new_char_set.remove_set(intersection); if (!existing_char_set.is_empty()) - new_entries.push_back({ existing_char_set, existing_item_set }); + new_entries.push_back( + { existing_char_set, + { existing_item_set, existing_precedence_range } }); existing_item_set.entries.insert(new_item_set.entries.begin(), new_item_set.entries.end()); - new_entries.push_back({ intersection, existing_item_set }); + existing_precedence_range.add(new_precedence_range); + new_entries.push_back( + { intersection, { existing_item_set, existing_precedence_range } }); transitions->erase(iter++); } else { iter++; @@ -62,14 +69,15 @@ class LexItemTransitions : public rules::RuleFn { transitions->insert(new_entries.begin(), new_entries.end()); if (!new_char_set.is_empty()) - transitions->insert({ new_char_set, new_item_set }); + transitions->insert( + { new_char_set, { new_item_set, new_precedence_range } }); } void apply_to(const CharacterSet *rule) { merge_transition(transitions, *rule, LexItemSet({ LexItem(item_lhs, rules::Blank::build()), - })); + }), PrecedenceRange()); } void apply_to(const rules::Choice *rule) { @@ -78,52 +86,72 @@ class LexItemTransitions : public rules::RuleFn { } void apply_to(const rules::Seq *rule) { - map left_transitions; - LexItemTransitions(&left_transitions, item_lhs).apply(rule->left); - for (const auto &pair : left_transitions) + LexItemSet::TransitionMap left_transitions; + LexItemTransitions(&left_transitions, this).apply(rule->left); + for (const auto &pair : left_transitions) { + PrecedenceRange precedence(pair.second.second); + if (precedence.empty && !precedence_stack->empty()) + precedence.add(precedence_stack->back()); + merge_transition( transitions, pair.first, - transform_item_set(pair.second, [&rule](rule_ptr item_rule) { + transform_item_set(pair.second.first, [&rule](rule_ptr item_rule) { return rules::Seq::build({ item_rule, rule->right }); - })); + }), precedence); + } if (rule_can_be_blank(rule->left)) apply(rule->right); } void apply_to(const rules::Repeat *rule) { - map content_transitions; - LexItemTransitions(&content_transitions, item_lhs).apply(rule->content); + LexItemSet::TransitionMap content_transitions; + LexItemTransitions(&content_transitions, this).apply(rule->content); for (const auto &pair : content_transitions) { - merge_transition(transitions, pair.first, pair.second); + merge_transition(transitions, pair.first, pair.second.first, + pair.second.second); merge_transition( transitions, pair.first, - transform_item_set(pair.second, [&rule](rule_ptr item_rule) { + transform_item_set(pair.second.first, [&rule](rule_ptr item_rule) { return rules::Seq::build({ item_rule, rule->copy() }); - })); + }), pair.second.second); } } void apply_to(const rules::Metadata *rule) { - map content_transitions; - LexItemTransitions(&content_transitions, item_lhs).apply(rule->rule); + LexItemSet::TransitionMap content_transitions; + precedence_stack->push_back(rule->value_for(rules::PRECEDENCE)); + + LexItemTransitions(&content_transitions, this).apply(rule->rule); for (const auto &pair : content_transitions) merge_transition( transitions, pair.first, - transform_item_set(pair.second, [&rule](rule_ptr item_rule) { + transform_item_set(pair.second.first, [&rule](rule_ptr item_rule) { return rules::Metadata::build(item_rule, rule->value); - })); + }), pair.second.second); + + precedence_stack->pop_back(); } public: - LexItemTransitions(map *transitions, - const rules::Symbol &item_lhs) - : transitions(transitions), item_lhs(item_lhs) {} + LexItemTransitions(LexItemSet::TransitionMap *transitions, + const rules::Symbol &item_lhs, + vector *precedence_stack) + : transitions(transitions), + item_lhs(item_lhs), + precedence_stack(precedence_stack) {} + + LexItemTransitions(LexItemSet::TransitionMap *transitions, + LexItemTransitions *other) + : transitions(transitions), + item_lhs(other->item_lhs), + precedence_stack(other->precedence_stack) {} }; -void lex_item_transitions(map *transitions, +void lex_item_transitions(LexItemSet::TransitionMap *transitions, const LexItem &item) { - LexItemTransitions(transitions, item.lhs).apply(item.rule); + vector precedence_stack; + LexItemTransitions(transitions, item.lhs, &precedence_stack).apply(item.rule); } } // namespace build_tables diff --git a/src/compiler/build_tables/lex_item_transitions.h b/src/compiler/build_tables/lex_item_transitions.h index 342df991..b9dc25d0 100644 --- a/src/compiler/build_tables/lex_item_transitions.h +++ b/src/compiler/build_tables/lex_item_transitions.h @@ -1,8 +1,6 @@ #ifndef COMPILER_BUILD_TABLES_LEX_ITEM_TRANSITIONS_H_ #define COMPILER_BUILD_TABLES_LEX_ITEM_TRANSITIONS_H_ -#include -#include #include "compiler/rules/character_set.h" #include "compiler/rules/symbol.h" #include "compiler/build_tables/lex_item.h" @@ -10,7 +8,7 @@ namespace tree_sitter { namespace build_tables { -void lex_item_transitions(std::map *transitions, +void lex_item_transitions(LexItemSet::TransitionMap *transitions, const LexItem &); } // namespace build_tables diff --git a/src/compiler/build_tables/parse_item.cc b/src/compiler/build_tables/parse_item.cc index d9b9153b..89d42cf1 100644 --- a/src/compiler/build_tables/parse_item.cc +++ b/src/compiler/build_tables/parse_item.cc @@ -46,8 +46,7 @@ ParseItem::CompletionStatus ParseItem::completion_status() const { if (step_index == production->size()) { result.is_done = true; if (step_index > 0) { - const ProductionStep &last_step = - production->at(step_index - 1); + const ProductionStep &last_step = production->at(step_index - 1); result.precedence = last_step.precedence; result.associativity = last_step.associativity; } diff --git a/src/compiler/build_tables/parse_item.h b/src/compiler/build_tables/parse_item.h index 78eaa1bf..f26f05a0 100644 --- a/src/compiler/build_tables/parse_item.h +++ b/src/compiler/build_tables/parse_item.h @@ -44,7 +44,8 @@ class ParseItemSet { ParseItemSet(); explicit ParseItemSet(const std::map &); - typedef std::map> TransitionMap; + typedef std::map> + TransitionMap; TransitionMap transitions() const; bool operator==(const ParseItemSet &) const; diff --git a/src/compiler/precedence_range.cc b/src/compiler/precedence_range.cc index c5a228bc..2f6b7ecf 100644 --- a/src/compiler/precedence_range.cc +++ b/src/compiler/precedence_range.cc @@ -7,6 +7,9 @@ PrecedenceRange::PrecedenceRange() : min(0), max(0), empty(true) {} PrecedenceRange::PrecedenceRange(int min, int max) : min(min), max(max), empty(false) {} +PrecedenceRange::PrecedenceRange(int value) + : min(value), max(value), empty(false) {} + void PrecedenceRange::add(int new_value) { if (empty) { min = new_value; @@ -20,6 +23,13 @@ void PrecedenceRange::add(int new_value) { } } +void PrecedenceRange::add(const PrecedenceRange &other) { + if (!other.empty) { + add(other.min); + add(other.max); + } +} + bool PrecedenceRange::operator<(const PrecedenceRange &other) const { if (empty) return !other.empty; diff --git a/src/compiler/precedence_range.h b/src/compiler/precedence_range.h index 5e7903c2..f2f52de4 100644 --- a/src/compiler/precedence_range.h +++ b/src/compiler/precedence_range.h @@ -5,9 +5,11 @@ namespace tree_sitter { struct PrecedenceRange { PrecedenceRange(); + explicit PrecedenceRange(int value); PrecedenceRange(int min, int max); void add(int value); + void add(const PrecedenceRange &); bool operator==(const PrecedenceRange &other) const; bool operator<(const PrecedenceRange &other) const;