Clean up table builder functions
This commit is contained in:
parent
1d314d71c2
commit
63c0e27501
5 changed files with 98 additions and 107 deletions
|
|
@ -13,12 +13,12 @@
|
|||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
#include "compiler/build_tables/first_set.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::map;
|
||||
using std::unordered_map;
|
||||
using std::set;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
using rules::CharacterSet;
|
||||
|
|
@ -30,10 +30,43 @@ namespace tree_sitter {
|
|||
LexConflictManager conflict_manager;
|
||||
unordered_map<const LexItemSet, LexStateId> lex_state_ids;
|
||||
LexTable lex_table;
|
||||
|
||||
LexItemSet build_lex_item_set(const set<Symbol> &symbols) {
|
||||
LexItemSet result;
|
||||
for (const auto &symbol : symbols) {
|
||||
if (symbol == rules::ERROR())
|
||||
continue;
|
||||
else if (symbol == rules::END_OF_INPUT())
|
||||
result.insert(LexItem(symbol, after_separators(CharacterSet({ 0 }).copy())));
|
||||
else if (symbol.is_token())
|
||||
result.insert(LexItem(symbol, after_separators(lex_grammar.rule(symbol))));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
LexStateId add_lex_state(const LexItemSet &item_set) {
|
||||
auto pair = lex_state_ids.find(item_set);
|
||||
if (pair == lex_state_ids.end()) {
|
||||
LexStateId state_id = lex_table.add_state();
|
||||
lex_state_ids[item_set] = state_id;
|
||||
add_advance_actions(item_set, state_id);
|
||||
add_accept_token_actions(item_set, state_id);
|
||||
add_token_start(item_set, state_id);
|
||||
return state_id;
|
||||
} else {
|
||||
return pair->second;
|
||||
}
|
||||
}
|
||||
|
||||
void add_error_lex_state() {
|
||||
LexItemSet item_set = build_lex_item_set(parse_table->symbols);
|
||||
add_advance_actions(item_set, LexTable::ERROR_STATE_ID);
|
||||
add_accept_token_actions(item_set, LexTable::ERROR_STATE_ID);
|
||||
}
|
||||
|
||||
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
auto transitions = char_transitions(item_set, lex_grammar);
|
||||
for (auto transition : transitions) {
|
||||
for (const auto &transition : transitions) {
|
||||
CharacterSet rule = transition.first;
|
||||
LexItemSet new_item_set = transition.second;
|
||||
LexStateId new_state_id = add_lex_state(new_item_set);
|
||||
|
|
@ -41,14 +74,8 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (auto &item : item_set)
|
||||
if (item.is_token_start())
|
||||
lex_table.state(state_id).is_token_start = true;
|
||||
}
|
||||
|
||||
void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (LexItem item : item_set) {
|
||||
for (const LexItem &item : item_set) {
|
||||
if (item.is_done()) {
|
||||
auto current_action = lex_table.state(state_id).default_action;
|
||||
auto new_action = LexAction::Accept(item.lhs, item.precedence());
|
||||
|
|
@ -57,6 +84,12 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (const auto &item : item_set)
|
||||
if (item.is_token_start())
|
||||
lex_table.state(state_id).is_token_start = true;
|
||||
}
|
||||
|
||||
rules::rule_ptr after_separators(rules::rule_ptr rule) {
|
||||
return rules::Seq::Build({
|
||||
|
|
@ -68,50 +101,6 @@ namespace tree_sitter {
|
|||
});
|
||||
}
|
||||
|
||||
LexItemSet lex_item_set_for_parse_state(const ParseState &state) {
|
||||
LexItemSet result;
|
||||
for (auto &symbol : state.expected_inputs()) {
|
||||
if (symbol.is_token() && !symbol.is_built_in())
|
||||
result.insert(LexItem(symbol, after_separators(lex_grammar.rule(symbol))));
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
result.insert(LexItem(symbol, after_separators(CharacterSet({ 0 }).copy())));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void assign_lex_state(ParseState *state) {
|
||||
state->lex_state_id = add_lex_state(lex_item_set_for_parse_state(*state));
|
||||
}
|
||||
|
||||
LexStateId add_lex_state(const LexItemSet &item_set) {
|
||||
auto pair = lex_state_ids.find(item_set);
|
||||
if (pair == lex_state_ids.end()) {
|
||||
LexStateId state_id = lex_table.add_state();
|
||||
lex_state_ids[item_set] = state_id;
|
||||
add_token_start(item_set, state_id);
|
||||
add_advance_actions(item_set, state_id);
|
||||
add_accept_token_actions(item_set, state_id);
|
||||
return state_id;
|
||||
} else {
|
||||
return pair->second;
|
||||
}
|
||||
}
|
||||
|
||||
void add_error_lex_state() {
|
||||
LexItemSet error_item_set;
|
||||
for (size_t i = 0; i < lex_grammar.rules.size(); i++) {
|
||||
LexItem item(Symbol(i, rules::SymbolOptionToken), after_separators(lex_grammar.rules[i].second));
|
||||
error_item_set.insert(item);
|
||||
}
|
||||
for (size_t i = 0; i < lex_grammar.aux_rules.size(); i++) {
|
||||
LexItem item(Symbol(i, rules::SymbolOption(rules::SymbolOptionToken|rules::SymbolOptionAuxiliary)), after_separators(lex_grammar.aux_rules[i].second));
|
||||
error_item_set.insert(item);
|
||||
}
|
||||
error_item_set.insert(LexItem(rules::END_OF_INPUT(), after_separators(CharacterSet({ 0 }).copy())));
|
||||
add_advance_actions(error_item_set, LexTable::ERROR_STATE_ID);
|
||||
add_accept_token_actions(error_item_set, LexTable::ERROR_STATE_ID);
|
||||
}
|
||||
|
||||
public:
|
||||
LexTableBuilder(ParseTable *parse_table, const PreparedGrammar &lex_grammar) :
|
||||
lex_grammar(lex_grammar),
|
||||
|
|
@ -119,8 +108,10 @@ namespace tree_sitter {
|
|||
conflict_manager(LexConflictManager(lex_grammar)) {}
|
||||
|
||||
LexTable build() {
|
||||
for (auto &parse_state : parse_table->states)
|
||||
assign_lex_state(&parse_state);
|
||||
for (auto &parse_state : parse_table->states) {
|
||||
LexItemSet item_set = build_lex_item_set(parse_state.expected_inputs());
|
||||
parse_state.lex_state_id = add_lex_state(item_set);
|
||||
}
|
||||
add_error_lex_state();
|
||||
return lex_table;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,8 @@ namespace tree_sitter {
|
|||
class ParseTable;
|
||||
|
||||
namespace build_tables {
|
||||
LexTable build_lex_table(ParseTable *parse_table, const PreparedGrammar &lex_grammar);
|
||||
LexTable
|
||||
build_lex_table(ParseTable *parse_table, const PreparedGrammar &lex_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@
|
|||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
#include "compiler/build_tables/first_set.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::pair;
|
||||
|
|
@ -29,49 +28,7 @@ namespace tree_sitter {
|
|||
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
|
||||
SymTransitions sym_transitions;
|
||||
ParseTable parse_table;
|
||||
|
||||
set<int> precedence_values_for_item_set(const ParseItemSet &item_set) {
|
||||
set<int> result;
|
||||
for (const auto &item : item_set)
|
||||
if (item.consumed_symbol_count > 0)
|
||||
result.insert(item.precedence());
|
||||
return result;
|
||||
}
|
||||
|
||||
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (auto &transition : sym_transitions(item_set, grammar)) {
|
||||
const Symbol &symbol = transition.first;
|
||||
const ParseItemSet &item_set = transition.second;
|
||||
set<int> precedence_values = precedence_values_for_item_set(item_set);
|
||||
|
||||
auto current_actions = parse_table.states[state_id].actions;
|
||||
auto current_action = current_actions.find(symbol);
|
||||
|
||||
if (current_action == current_actions.end() ||
|
||||
conflict_manager.resolve_parse_action(symbol, current_action->second, ParseAction::Shift(0, precedence_values))) {
|
||||
ParseStateId new_state_id = add_parse_state(item_set);
|
||||
parse_table.add_action(state_id, symbol, ParseAction::Shift(new_state_id, precedence_values));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (ParseItem item : item_set) {
|
||||
if (item.is_done()) {
|
||||
ParseAction action = (item.lhs == rules::START()) ?
|
||||
ParseAction::Accept() :
|
||||
ParseAction::Reduce(item.lhs, item.consumed_symbol_count, item.precedence());
|
||||
auto current_actions = parse_table.states[state_id].actions;
|
||||
auto current_action = current_actions.find(item.lookahead_sym);
|
||||
|
||||
if (current_action == current_actions.end() ||
|
||||
conflict_manager.resolve_parse_action(item.lookahead_sym, current_action->second, action)) {
|
||||
parse_table.add_action(state_id, item.lookahead_sym, action);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ParseStateId add_parse_state(const ParseItemSet &item_set) {
|
||||
auto pair = parse_state_ids.find(item_set);
|
||||
if (pair == parse_state_ids.end()) {
|
||||
|
|
@ -85,6 +42,47 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (const auto &transition : sym_transitions(item_set, grammar)) {
|
||||
const Symbol &symbol = transition.first;
|
||||
const ParseItemSet &item_set = transition.second;
|
||||
auto current_actions = parse_table.states[state_id].actions;
|
||||
auto current_action = current_actions.find(symbol);
|
||||
|
||||
set<int> precedence_values = precedence_values_for_item_set(item_set);
|
||||
if (current_action == current_actions.end() ||
|
||||
conflict_manager.resolve_parse_action(symbol, current_action->second, ParseAction::Shift(0, precedence_values))) {
|
||||
ParseStateId new_state_id = add_parse_state(item_set);
|
||||
parse_table.add_action(state_id, symbol, ParseAction::Shift(new_state_id, precedence_values));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (const ParseItem &item : item_set) {
|
||||
if (item.is_done()) {
|
||||
ParseAction action = (item.lhs == rules::START()) ?
|
||||
ParseAction::Accept() :
|
||||
ParseAction::Reduce(item.lhs, item.consumed_symbol_count, item.precedence());
|
||||
auto current_actions = parse_table.states[state_id].actions;
|
||||
auto current_action = current_actions.find(item.lookahead_sym);
|
||||
|
||||
if (current_action == current_actions.end() ||
|
||||
conflict_manager.resolve_parse_action(item.lookahead_sym, current_action->second, action)) {
|
||||
parse_table.add_action(state_id, item.lookahead_sym, action);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
set<int> precedence_values_for_item_set(const ParseItemSet &item_set) {
|
||||
set<int> result;
|
||||
for (const auto &item : item_set)
|
||||
if (item.consumed_symbol_count > 0)
|
||||
result.insert(item.precedence());
|
||||
return result;
|
||||
}
|
||||
|
||||
public:
|
||||
ParseTableBuilder(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) :
|
||||
grammar(grammar),
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ namespace tree_sitter {
|
|||
auto parse_table_result = build_parse_table(grammar, lex_grammar);
|
||||
ParseTable parse_table = parse_table_result.first;
|
||||
vector<Conflict> conflicts = parse_table_result.second;
|
||||
auto lex_table = build_lex_table(&parse_table, lex_grammar);
|
||||
LexTable lex_table = build_lex_table(&parse_table, lex_grammar);
|
||||
return make_tuple(parse_table, lex_table, conflicts);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,15 +21,16 @@ namespace tree_sitter {
|
|||
bool LexItem::is_token_start() const {
|
||||
class IsTokenStart : public rules::RuleFn<bool> {
|
||||
bool apply_to(const rules::Seq *rule) {
|
||||
bool result = apply(rule->left);
|
||||
if (!result && rule_can_be_blank(rule->left))
|
||||
result = apply(rule->right);
|
||||
return result;
|
||||
if (apply(rule->left))
|
||||
return true;
|
||||
else if (rule_can_be_blank(rule->left))
|
||||
return apply(rule->right);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Metadata *rule) {
|
||||
auto pair = rule->value.find(rules::START_TOKEN);
|
||||
return (pair != rule->value.end()) && pair->second;
|
||||
return rule->value_for(rules::START_TOKEN);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue