From 1fb52eacab1332536e205c1f7e76c9016f4238ee Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 12 Oct 2014 12:44:16 -0700 Subject: [PATCH] Tidy up build_lex_table function --- src/compiler/build_tables/build_lex_table.cc | 106 +++++++++---------- src/compiler/build_tables/build_lex_table.h | 3 +- 2 files changed, 54 insertions(+), 55 deletions(-) diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc index b1c86b5c..babb8098 100644 --- a/src/compiler/build_tables/build_lex_table.cc +++ b/src/compiler/build_tables/build_lex_table.cc @@ -1,33 +1,32 @@ -#include "compiler/build_tables/build_tables.h" -#include -#include #include #include -#include +#include #include -#include "compiler/prepared_grammar.h" -#include "compiler/rules/built_in_symbols.h" -#include "compiler/rules/metadata.h" -#include "compiler/rules/choice.h" -#include "compiler/rules/repeat.h" -#include "compiler/rules/blank.h" -#include "compiler/rules/seq.h" +#include +#include +#include "compiler/build_tables/item_set_transitions.h" #include "compiler/build_tables/lex_conflict_manager.h" #include "compiler/build_tables/lex_item.h" -#include "compiler/build_tables/item_set_transitions.h" +#include "compiler/parse_table.h" +#include "compiler/prepared_grammar.h" +#include "compiler/rules/built_in_symbols.h" +#include "compiler/rules/choice.h" +#include "compiler/rules/metadata.h" +#include "compiler/rules/repeat.h" +#include "compiler/rules/seq.h" namespace tree_sitter { namespace build_tables { -using std::string; -using std::map; -using std::unordered_map; -using std::set; -using std::make_shared; -using std::vector; using std::dynamic_pointer_cast; -using rules::Symbol; +using std::make_shared; +using std::map; +using std::set; +using std::string; +using std::unordered_map; +using std::vector; using rules::CharacterSet; +using rules::Symbol; class LexTableBuilder { const LexicalGrammar lex_grammar; @@ -36,14 +35,32 @@ class LexTableBuilder { unordered_map lex_state_ids; LexTable lex_table; + public: + LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar) + : lex_grammar(lex_grammar), + parse_table(parse_table), + conflict_manager(LexConflictManager(lex_grammar)) {} + + LexTable build() { + for (auto &parse_state : parse_table->states) { + LexItemSet item_set = build_lex_item_set(parse_state.expected_inputs()); + parse_state.lex_state_id = add_lex_state(item_set); + } + add_error_lex_state(); + return lex_table; + } + + private: LexItemSet build_lex_item_set(const set &symbols) { LexItemSet result; for (const auto &symbol : symbols) { if (symbol == rules::ERROR()) continue; - else if (symbol == rules::END_OF_INPUT()) + + if (symbol == rules::END_OF_INPUT()) result.insert(LexItem( symbol, after_separators(CharacterSet().include(0).copy()))); + else if (symbol.is_token()) result.insert( LexItem(symbol, after_separators(lex_grammar.rule(symbol)))); @@ -52,7 +69,7 @@ class LexTableBuilder { } LexStateId add_lex_state(const LexItemSet &item_set) { - auto pair = lex_state_ids.find(item_set); + const auto &pair = lex_state_ids.find(item_set); if (pair == lex_state_ids.end()) { LexStateId state_id = lex_table.add_state(); lex_state_ids[item_set] = state_id; @@ -88,14 +105,13 @@ class LexTableBuilder { } void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) { - for (const LexItem &item : item_set) { + for (const LexItem &item : item_set) if (item.is_done()) { auto current_action = lex_table.state(state_id).default_action; auto new_action = LexAction::Accept(item.lhs, item.precedence()); if (conflict_manager.resolve_lex_action(current_action, new_action)) lex_table.state(state_id).default_action = new_action; } - } } void add_token_start(const LexItemSet &item_set, LexStateId state_id) { @@ -104,11 +120,20 @@ class LexTableBuilder { lex_table.state(state_id).is_token_start = true; } + rules::rule_ptr after_separators(rules::rule_ptr rule) { + return rules::Seq::Build( + { make_shared( + separator_rule(), + map( + { { rules::START_TOKEN, 1 }, { rules::PRECEDENCE, -1 }, })), + rule, }); + } + // TODO - remove this hack. right now, nested repeats cause // item sets which are equivalent to appear unequal. - rules::rule_ptr separators() const { - std::vector separators; - for (auto &rule : lex_grammar.separators) { + rules::rule_ptr separator_rule() const { + vector separators; + for (const auto &rule : lex_grammar.separators) { auto repeat = dynamic_pointer_cast(rule); if (repeat.get()) separators.push_back(repeat->content); @@ -118,41 +143,16 @@ class LexTableBuilder { return rules::repeat(rules::choice(separators)); } - rules::rule_ptr after_separators(rules::rule_ptr rule) { - return rules::Seq::Build( - { make_shared( - separators(), - map( - { { rules::START_TOKEN, 1 }, { rules::PRECEDENCE, -1 }, })), - rule, }); - } - set precedence_values_for_item_set(const LexItemSet &item_set) const { set result; for (const auto &item : item_set) result.insert(item.precedence()); return result; } - - public: - LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar) - : lex_grammar(lex_grammar), - parse_table(parse_table), - conflict_manager(LexConflictManager(lex_grammar)) {} - - LexTable build() { - for (auto &parse_state : parse_table->states) { - LexItemSet item_set = build_lex_item_set(parse_state.expected_inputs()); - parse_state.lex_state_id = add_lex_state(item_set); - } - add_error_lex_state(); - return lex_table; - } }; -LexTable build_lex_table(ParseTable *parse_table, - const LexicalGrammar &lex_grammar) { - return LexTableBuilder(parse_table, lex_grammar).build(); +LexTable build_lex_table(ParseTable *table, const LexicalGrammar &grammar) { + return LexTableBuilder(table, grammar).build(); } } // namespace build_tables diff --git a/src/compiler/build_tables/build_lex_table.h b/src/compiler/build_tables/build_lex_table.h index acf35595..0f75fff6 100644 --- a/src/compiler/build_tables/build_lex_table.h +++ b/src/compiler/build_tables/build_lex_table.h @@ -11,8 +11,7 @@ class ParseTable; namespace build_tables { -LexTable build_lex_table(ParseTable *parse_table, - const LexicalGrammar &lex_grammar); +LexTable build_lex_table(ParseTable *, const LexicalGrammar &); } // namespace build_tables } // namespace tree_sitter