From 59712ec492b96e8d37f08e1fd4e886a96a2e4793 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 19 May 2016 09:56:49 -0700 Subject: [PATCH] Clean up lex table generation --- spec/compiler/build_tables/lex_item_spec.cc | 9 +- spec/helpers/stream_methods.cc | 4 + spec/helpers/stream_methods.h | 2 + src/compiler/build_tables/build_lex_table.cc | 108 +++++----- src/compiler/build_tables/lex_item.cc | 4 + src/compiler/build_tables/lex_item.h | 14 +- .../build_tables/lex_item_transitions.cc | 185 ++++++++++-------- 7 files changed, 182 insertions(+), 144 deletions(-) diff --git a/spec/compiler/build_tables/lex_item_spec.cc b/spec/compiler/build_tables/lex_item_spec.cc index c304fa43..b129be1d 100644 --- a/spec/compiler/build_tables/lex_item_spec.cc +++ b/spec/compiler/build_tables/lex_item_spec.cc @@ -3,6 +3,7 @@ #include "compiler/rules/metadata.h" #include "compiler/rules.h" #include "helpers/rule_helpers.h" +#include "helpers/stream_methods.h" using namespace rules; using namespace build_tables; @@ -13,7 +14,7 @@ describe("LexItem", []() { describe("is_token_start()", [&]() { Symbol sym(1); rule_ptr token_start = make_shared(str("a"), map({ - { START_TOKEN, 1 } + { START_TOKEN, true } })); it("returns true for rules designated as token starts", [&]() { @@ -155,7 +156,7 @@ describe("LexItemSet::transitions()", [&]() { } }))); - LexItemSet item_set2 = transitions[CharacterSet().include('v')].first; + LexItemSet item_set2 = transitions[CharacterSet().include('v')].destination; transitions = item_set2.transitions(); AssertThat( @@ -180,7 +181,7 @@ describe("LexItemSet::transitions()", [&]() { } }))); - LexItemSet item_set3 = transitions[CharacterSet().include('w')].first; + LexItemSet item_set3 = transitions[CharacterSet().include('w')].destination; transitions = item_set3.transitions(); AssertThat( @@ -202,7 +203,7 @@ describe("LexItemSet::transitions()", [&]() { } }))); - LexItemSet item_set4 = transitions[CharacterSet().include('x')].first; + LexItemSet item_set4 = transitions[CharacterSet().include('x')].destination; transitions = item_set4.transitions(); AssertThat( diff --git a/spec/helpers/stream_methods.cc b/spec/helpers/stream_methods.cc index d6c6c1e3..a2a364a3 100644 --- a/spec/helpers/stream_methods.cc +++ b/spec/helpers/stream_methods.cc @@ -114,6 +114,10 @@ ostream &operator<<(ostream &stream, const LexItemSet &item_set) { return stream << item_set.entries; } +ostream &operator<<(ostream &stream, const LexItemSet::Transition &transition) { + return stream << "{dest: " << transition.destination << ", prec: " << transition.precedence << "}"; +} + ostream &operator<<(ostream &stream, const ParseItem &item) { return stream << string("(item variable:") << to_string(item.variable_index) << string(" production:") << to_string((size_t)item.production % 1000) diff --git a/spec/helpers/stream_methods.h b/spec/helpers/stream_methods.h index e3ecbc15..515060eb 100644 --- a/spec/helpers/stream_methods.h +++ b/spec/helpers/stream_methods.h @@ -9,6 +9,7 @@ #include #include "compiler/grammar.h" #include "compiler/compile_error.h" +#include "compiler/build_tables/lex_item.h" using std::cout; @@ -122,6 +123,7 @@ class LookaheadSet; ostream &operator<<(ostream &, const LexItem &); ostream &operator<<(ostream &, const LexItemSet &); +ostream &operator<<(ostream &, const LexItemSet::Transition &); ostream &operator<<(ostream &, const ParseItem &); ostream &operator<<(ostream &, const ParseItemSet &); ostream &operator<<(ostream &, const LookaheadSet &); diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc index c4abe367..174f8459 100644 --- a/src/compiler/build_tables/build_lex_table.cc +++ b/src/compiler/build_tables/build_lex_table.cc @@ -21,29 +21,37 @@ namespace tree_sitter { namespace build_tables { -using std::make_shared; using std::map; using std::set; using std::string; -using std::unordered_map; using std::vector; +using std::make_shared; +using std::unordered_map; +using rules::Blank; +using rules::Choice; using rules::CharacterSet; +using rules::Repeat; using rules::Symbol; +using rules::Metadata; +using rules::Seq; +using rules::START_TOKEN; +using rules::PRECEDENCE; +using rules::IS_ACTIVE; class LexTableBuilder { - const LexicalGrammar lex_grammar; - LexConflictManager conflict_manager; - ParseTable *parse_table; - unordered_map lex_state_ids; LexTable lex_table; + ParseTable *parse_table; + const LexicalGrammar lex_grammar; vector separator_rules; + LexConflictManager conflict_manager; + unordered_map lex_state_ids; public: LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar) - : lex_grammar(lex_grammar), parse_table(parse_table) { + : parse_table(parse_table), lex_grammar(lex_grammar) { for (const rule_ptr &rule : lex_grammar.separators) - separator_rules.push_back(rules::Repeat::build(rule)); - separator_rules.push_back(rules::Blank::build()); + separator_rules.push_back(Repeat::build(rule)); + separator_rules.push_back(Blank::build()); } LexTable build() { @@ -59,44 +67,9 @@ class LexTableBuilder { } private: - LexItemSet build_lex_item_set(const set &symbols) { - LexItemSet result; - for (const Symbol &symbol : symbols) { - vector rules; - if (symbol == rules::END_OF_INPUT()) { - rules.push_back(CharacterSet().include(0).copy()); - } else if (symbol.is_token) { - rule_ptr rule = lex_grammar.variables[symbol.index].rule; - - auto choice = rule->as(); - if (choice) - for (const rule_ptr &element : choice->elements) - rules.push_back(element); - else - rules.push_back(rule); - } - - for (const rule_ptr &rule : rules) - for (const rule_ptr &separator_rule : separator_rules) - result.entries.insert(LexItem( - symbol, - rules::Metadata::build( - rules::Seq::build({ - rules::Metadata::build(separator_rule, - { { rules::START_TOKEN, 1 } }), - rules::Metadata::build(rule, { { rules::PRECEDENCE, 0 } }), - }), - { - { rules::PRECEDENCE, INT_MIN }, { rules::IS_ACTIVE, true }, - }))); - } - - return result; - } - void add_lex_state_for_parse_state(ParseState *parse_state) { parse_state->lex_state_id = - add_lex_state(build_lex_item_set(parse_state->expected_inputs())); + add_lex_state(item_set_for_tokens(parse_state->expected_inputs())); } LexStateId add_lex_state(const LexItemSet &item_set) { @@ -114,16 +87,15 @@ class LexTableBuilder { } void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) { - for (const auto &transition : item_set.transitions()) { - const CharacterSet &rule = transition.first; - const LexItemSet &new_item_set = transition.second.first; - const PrecedenceRange &precedence = transition.second.second; - AdvanceAction action(-1, precedence); + for (const auto &pair : item_set.transitions()) { + const CharacterSet &characters = pair.first; + const LexItemSet::Transition &transition = pair.second; + AdvanceAction action(-1, transition.precedence); auto current_action = lex_table.state(state_id).accept_action; if (conflict_manager.resolve(action, current_action)) { - action.state_index = add_lex_state(new_item_set); - lex_table.state(state_id).advance_actions[rule] = action; + action.state_index = add_lex_state(transition.destination); + lex_table.state(state_id).advance_actions[characters] = action; } } } @@ -170,6 +142,38 @@ class LexTableBuilder { parse_state.lex_state_id = replacement->second; } } + + LexItemSet item_set_for_tokens(const set &symbols) { + LexItemSet result; + for (const Symbol &symbol : symbols) + for (const rule_ptr &rule : rules_for_symbol(symbol)) + for (const rule_ptr &separator_rule : separator_rules) + result.entries.insert(LexItem( + symbol, + Metadata::build( + Seq::build({ + Metadata::build(separator_rule, { { START_TOKEN, true } }), + Metadata::build(rule, { { PRECEDENCE, 0 } }), + }), + { { PRECEDENCE, INT_MIN }, { IS_ACTIVE, true } }))); + return result; + } + + vector rules_for_symbol(const rules::Symbol &symbol) { + if (!symbol.is_token) + return {}; + + if (symbol == rules::END_OF_INPUT()) + return { CharacterSet().include(0).copy() }; + + rule_ptr rule = lex_grammar.variables[symbol.index].rule; + + auto choice = rule->as(); + if (choice) + return choice->elements; + else + return { rule }; + } }; LexTable build_lex_table(ParseTable *table, const LexicalGrammar &grammar) { diff --git a/src/compiler/build_tables/lex_item.cc b/src/compiler/build_tables/lex_item.cc index d97e82e8..7f5f3a45 100644 --- a/src/compiler/build_tables/lex_item.cc +++ b/src/compiler/build_tables/lex_item.cc @@ -118,5 +118,9 @@ LexItemSet::TransitionMap LexItemSet::transitions() const { return result; } +bool LexItemSet::Transition::operator==(const LexItemSet::Transition &other) const { + return destination == other.destination && precedence == other.precedence; +} + } // namespace build_tables } // namespace tree_sitter diff --git a/src/compiler/build_tables/lex_item.h b/src/compiler/build_tables/lex_item.h index 55daa80f..7c72849e 100644 --- a/src/compiler/build_tables/lex_item.h +++ b/src/compiler/build_tables/lex_item.h @@ -39,19 +39,27 @@ class LexItemSet { LexItemSet(); explicit LexItemSet(const std::unordered_set &); - typedef std::map> - TransitionMap; + bool operator==(const LexItemSet &) const; struct Hash { size_t operator()(const LexItemSet &) const; }; - bool operator==(const LexItemSet &) const; + struct Transition; + typedef std::map TransitionMap; + TransitionMap transitions() const; std::unordered_set entries; }; +struct LexItemSet::Transition { + LexItemSet destination; + PrecedenceRange precedence; + + bool operator==(const LexItemSet::Transition &) const; +}; + } // namespace build_tables } // namespace tree_sitter diff --git a/src/compiler/build_tables/lex_item_transitions.cc b/src/compiler/build_tables/lex_item_transitions.cc index 9b7500e7..d95acc66 100644 --- a/src/compiler/build_tables/lex_item_transitions.cc +++ b/src/compiler/build_tables/lex_item_transitions.cc @@ -23,147 +23,162 @@ using std::map; using std::pair; using std::vector; using rules::CharacterSet; +using rules::Symbol; +using rules::Blank; +using rules::MetadataKey; +using rules::Choice; +using rules::Seq; +using rules::Repeat; +using rules::Metadata; +using rules::PRECEDENCE; +using rules::IS_ACTIVE; +typedef LexItemSet::Transition Transition; +typedef LexItemSet::TransitionMap TransitionMap; -class LexItemTransitions : public rules::RuleFn { - LexItemSet::TransitionMap *transitions; - const rules::Symbol &item_lhs; +class TransitionBuilder : public rules::RuleFn { + TransitionMap *transitions; + const Symbol &item_lhs; vector *precedence_stack; - LexItemSet transform_item_set(const LexItemSet &item_set, - function callback) { - LexItemSet new_set; - for (const LexItem &item : item_set.entries) - new_set.entries.insert(LexItem(item.lhs, callback(item.rule))); - return new_set; + Transition transform_transition(const Transition &transition, + function callback) { + LexItemSet destination; + for (const LexItem &item : transition.destination.entries) + destination.entries.insert(LexItem(item.lhs, callback(item.rule))); + return Transition{ destination, transition.precedence }; } - void merge_transition(LexItemSet::TransitionMap *transitions, - CharacterSet new_char_set, LexItemSet new_item_set, - PrecedenceRange new_precedence_range) { - vector>> new_entries; + void add_transition(TransitionMap *transitions, CharacterSet new_characters, + Transition new_transition) { + vector> new_entries; auto iter = transitions->begin(); while (iter != transitions->end()) { - CharacterSet existing_char_set = iter->first; - LexItemSet &existing_item_set = iter->second.first; - PrecedenceRange &existing_precedence_range = iter->second.second; + CharacterSet existing_characters = iter->first; + Transition &existing_transition = iter->second; + LexItemSet &existing_item_set = existing_transition.destination; + PrecedenceRange &existing_precedence = existing_transition.precedence; - CharacterSet intersection = existing_char_set.remove_set(new_char_set); - if (!intersection.is_empty()) { - new_char_set.remove_set(intersection); - if (!existing_char_set.is_empty()) - new_entries.push_back( - { existing_char_set, - { existing_item_set, existing_precedence_range } }); - existing_item_set.entries.insert(new_item_set.entries.begin(), - new_item_set.entries.end()); - existing_precedence_range.add(new_precedence_range); - new_entries.push_back( - { intersection, { existing_item_set, existing_precedence_range } }); - transitions->erase(iter++); - } else { + CharacterSet intersecting_characters = + existing_characters.remove_set(new_characters); + if (intersecting_characters.is_empty()) { iter++; + continue; } + + new_characters.remove_set(intersecting_characters); + + if (!existing_characters.is_empty()) + new_entries.push_back({ + existing_characters, + Transition{ existing_item_set, existing_precedence }, + }); + + existing_item_set.entries.insert( + new_transition.destination.entries.begin(), + new_transition.destination.entries.end()); + existing_precedence.add(new_transition.precedence); + new_entries.push_back({ + intersecting_characters, + Transition{ existing_item_set, existing_precedence }, + }); + + transitions->erase(iter++); } transitions->insert(new_entries.begin(), new_entries.end()); - if (!new_char_set.is_empty()) - transitions->insert( - { new_char_set, { new_item_set, new_precedence_range } }); + if (!new_characters.is_empty()) + transitions->insert({ new_characters, new_transition }); } - map activate_precedence( - map metadata) { - if (metadata.count(rules::PRECEDENCE)) - metadata.insert({ rules::IS_ACTIVE, 1 }); - return metadata; - } - - void apply_to(const CharacterSet *rule) { + void apply_to(const CharacterSet *character_set) { PrecedenceRange precedence; if (!precedence_stack->empty()) precedence.add(precedence_stack->back()); - merge_transition(transitions, *rule, - LexItemSet({ LexItem(item_lhs, rules::Blank::build()) }), - precedence); + add_transition( + transitions, *character_set, + Transition{ + LexItemSet({ LexItem(item_lhs, Blank::build()) }), precedence, + }); } - void apply_to(const rules::Choice *rule) { - for (const rule_ptr &element : rule->elements) + void apply_to(const Choice *choice) { + for (const rule_ptr &element : choice->elements) apply(element); } - void apply_to(const rules::Seq *rule) { - LexItemSet::TransitionMap left_transitions; - LexItemTransitions(&left_transitions, this).apply(rule->left); + void apply_to(const Seq *sequence) { + TransitionMap left_transitions; + TransitionBuilder(&left_transitions, this).apply(sequence->left); + for (const auto &pair : left_transitions) { - merge_transition( + add_transition( transitions, pair.first, - transform_item_set(pair.second.first, [&rule](rule_ptr item_rule) { - return rules::Seq::build({ item_rule, rule->right }); - }), pair.second.second); + transform_transition(pair.second, [&sequence](rule_ptr rule) { + return Seq::build({ rule, sequence->right }); + })); } - if (rule_can_be_blank(rule->left)) - apply(rule->right); + if (rule_can_be_blank(sequence->left)) + apply(sequence->right); } - void apply_to(const rules::Repeat *rule) { - LexItemSet::TransitionMap content_transitions; - LexItemTransitions(&content_transitions, this).apply(rule->content); + void apply_to(const Repeat *repeat) { + TransitionMap content_transitions; + TransitionBuilder(&content_transitions, this).apply(repeat->content); + for (const auto &pair : content_transitions) { - merge_transition(transitions, pair.first, pair.second.first, - pair.second.second); - merge_transition( + add_transition(transitions, pair.first, pair.second); + add_transition( transitions, pair.first, - transform_item_set(pair.second.first, [&rule](rule_ptr item_rule) { - return rules::Seq::build({ item_rule, rule->copy() }); - }), pair.second.second); + transform_transition(pair.second, [&repeat](rule_ptr item_rule) { + return Seq::build({ item_rule, repeat->copy() }); + })); } } - void apply_to(const rules::Metadata *rule) { - LexItemSet::TransitionMap content_transitions; - auto precedence = rule->value_for(rules::PRECEDENCE); - bool has_active_precedence = - precedence.second && rule->value_for(rules::IS_ACTIVE).second; + void apply_to(const Metadata *metadata) { + bool has_active_precedence = metadata->value_for(IS_ACTIVE).second; if (has_active_precedence) - precedence_stack->push_back(precedence.first); + precedence_stack->push_back(metadata->value_for(PRECEDENCE).first); - LexItemTransitions(&content_transitions, this).apply(rule->rule); - for (const auto &pair : content_transitions) - merge_transition( + auto metadata_value = metadata->value; + if (metadata_value.count(PRECEDENCE)) + metadata_value.insert({ IS_ACTIVE, true }); + + TransitionMap content_transitions; + TransitionBuilder(&content_transitions, this).apply(metadata->rule); + + for (const auto &pair : content_transitions) { + add_transition( transitions, pair.first, - transform_item_set(pair.second.first, [this, &rule](rule_ptr item_rule) { - return rules::Metadata::build(item_rule, - activate_precedence(rule->value)); - }), pair.second.second); + transform_transition(pair.second, [&metadata_value](rule_ptr rule) { + return Metadata::build(rule, metadata_value); + })); + } if (has_active_precedence) precedence_stack->pop_back(); } public: - LexItemTransitions(LexItemSet::TransitionMap *transitions, - const rules::Symbol &item_lhs, - vector *precedence_stack) + TransitionBuilder(TransitionMap *transitions, const Symbol &item_lhs, + vector *precedence_stack) : transitions(transitions), item_lhs(item_lhs), precedence_stack(precedence_stack) {} - LexItemTransitions(LexItemSet::TransitionMap *transitions, - LexItemTransitions *other) + TransitionBuilder(TransitionMap *transitions, TransitionBuilder *other) : transitions(transitions), item_lhs(other->item_lhs), precedence_stack(other->precedence_stack) {} }; -void lex_item_transitions(LexItemSet::TransitionMap *transitions, - const LexItem &item) { +void lex_item_transitions(TransitionMap *transitions, const LexItem &item) { vector precedence_stack; - LexItemTransitions(transitions, item.lhs, &precedence_stack).apply(item.rule); + TransitionBuilder(transitions, item.lhs, &precedence_stack).apply(item.rule); } } // namespace build_tables