Refactor construction of out-of-context states
This commit is contained in:
parent
e99a3925e0
commit
31f6b2e24a
7 changed files with 51 additions and 41 deletions
|
|
@ -9,7 +9,6 @@
|
|||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include "compiler/build_tables/remove_duplicate_states.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/build_tables/does_match_any_line.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
|
@ -48,11 +47,10 @@ class LexTableBuilder {
|
|||
}
|
||||
|
||||
LexTable build() {
|
||||
add_lex_state(build_lex_item_set(parse_table->all_symbols(), true));
|
||||
add_lex_state_for_parse_state(&parse_table->error_state);
|
||||
|
||||
for (ParseState &parse_state : parse_table->states)
|
||||
parse_state.lex_state_id =
|
||||
add_lex_state(build_lex_item_set(parse_state.expected_inputs(), false));
|
||||
add_lex_state_for_parse_state(&parse_state);
|
||||
|
||||
mark_fragile_tokens();
|
||||
remove_duplicate_lex_states();
|
||||
|
|
@ -61,7 +59,7 @@ class LexTableBuilder {
|
|||
}
|
||||
|
||||
private:
|
||||
LexItemSet build_lex_item_set(const set<Symbol> &symbols, bool error) {
|
||||
LexItemSet build_lex_item_set(const set<Symbol> &symbols) {
|
||||
LexItemSet result;
|
||||
for (const Symbol &symbol : symbols) {
|
||||
vector<rule_ptr> rules;
|
||||
|
|
@ -69,8 +67,6 @@ class LexTableBuilder {
|
|||
rules.push_back(CharacterSet().include(0).copy());
|
||||
} else if (symbol.is_token) {
|
||||
rule_ptr rule = lex_grammar.variables[symbol.index].rule;
|
||||
if (error && does_match_any_line(rule))
|
||||
continue;
|
||||
|
||||
auto choice = rule->as<rules::Choice>();
|
||||
if (choice)
|
||||
|
|
@ -98,6 +94,11 @@ class LexTableBuilder {
|
|||
return result;
|
||||
}
|
||||
|
||||
void add_lex_state_for_parse_state(ParseState *parse_state) {
|
||||
parse_state->lex_state_id =
|
||||
add_lex_state(build_lex_item_set(parse_state->expected_inputs()));
|
||||
}
|
||||
|
||||
LexStateId add_lex_state(const LexItemSet &item_set) {
|
||||
const auto &pair = lex_state_ids.find(item_set);
|
||||
if (pair == lex_state_ids.end()) {
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@
|
|||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/build_tables/does_match_any_line.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -105,17 +106,20 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
void add_out_of_context_parse_states() {
|
||||
map<Symbol, set<Symbol>> symbols_by_first = symbols_by_first_symbol(grammar);
|
||||
auto symbols_by_first = symbols_by_first_symbol(grammar);
|
||||
|
||||
for (size_t i = 0; i < lexical_grammar.variables.size(); i++) {
|
||||
Symbol symbol(i, true);
|
||||
if (!grammar.extra_tokens.count(symbol))
|
||||
if (!does_match_any_line(lexical_grammar.variables[i].rule))
|
||||
add_out_of_context_parse_state(symbol, symbols_by_first[symbol]);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < grammar.variables.size(); i++) {
|
||||
Symbol symbol(i, false);
|
||||
add_out_of_context_parse_state(Symbol(i, false), symbols_by_first[symbol]);
|
||||
add_out_of_context_parse_state(symbol, symbols_by_first[symbol]);
|
||||
}
|
||||
|
||||
parse_table.error_state.actions[rules::END_OF_INPUT()].clear();
|
||||
}
|
||||
|
||||
void add_out_of_context_parse_state(const rules::Symbol &symbol,
|
||||
|
|
@ -133,8 +137,11 @@ class ParseTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
ParseStateId state = add_parse_state(item_set);
|
||||
parse_table.out_of_context_state_indices[symbol] = state;
|
||||
if (!item_set.entries.empty()) {
|
||||
ParseStateId state = add_parse_state(item_set);
|
||||
parse_table.error_state.actions[symbol].push_back(
|
||||
ParseAction::Shift(state, PrecedenceRange()));
|
||||
}
|
||||
}
|
||||
|
||||
ParseStateId add_parse_state(const ParseItemSet &item_set) {
|
||||
|
|
@ -265,11 +272,12 @@ class ParseTableBuilder {
|
|||
auto replacements =
|
||||
remove_duplicate_states<ParseState, ParseAction>(&parse_table.states);
|
||||
|
||||
for (auto &pair : parse_table.out_of_context_state_indices) {
|
||||
auto replacement = replacements.find(pair.second);
|
||||
if (replacement != replacements.end())
|
||||
pair.second = replacement->second;
|
||||
}
|
||||
parse_table.error_state.each_advance_action(
|
||||
[&replacements](ParseAction *action) {
|
||||
auto replacement = replacements.find(action->state_index);
|
||||
if (replacement != replacements.end())
|
||||
action->state_index = replacement->second;
|
||||
});
|
||||
}
|
||||
|
||||
ParseAction *add_action(ParseStateId state_id, Symbol lookahead,
|
||||
|
|
|
|||
|
|
@ -46,12 +46,11 @@ std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states)
|
|||
}
|
||||
|
||||
for (StateType &state : *states)
|
||||
state.each_advance_action(
|
||||
[&duplicates, &new_replacements](ActionType *action) {
|
||||
auto new_replacement = new_replacements.find(action->state_index);
|
||||
if (new_replacement != new_replacements.end())
|
||||
action->state_index = new_replacement->second;
|
||||
});
|
||||
state.each_advance_action([&new_replacements](ActionType *action) {
|
||||
auto new_replacement = new_replacements.find(action->state_index);
|
||||
if (new_replacement != new_replacements.end())
|
||||
action->state_index = new_replacement->second;
|
||||
});
|
||||
|
||||
for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i)
|
||||
states->erase(states->begin() + i->first);
|
||||
|
|
|
|||
|
|
@ -223,15 +223,12 @@ class CCodeGenerator {
|
|||
void add_out_of_context_parse_states_list() {
|
||||
line("static TSStateId ts_out_of_context_states[SYMBOL_COUNT] = {");
|
||||
indent([&]() {
|
||||
for (const auto &entry : parse_table.symbols) {
|
||||
for (const auto &entry : parse_table.error_state.actions) {
|
||||
const rules::Symbol &symbol = entry.first;
|
||||
if (symbol.is_built_in())
|
||||
continue;
|
||||
auto iter = parse_table.out_of_context_state_indices.find(symbol);
|
||||
string state = (iter != parse_table.out_of_context_state_indices.end())
|
||||
? to_string(iter->second)
|
||||
: "ts_parse_state_error";
|
||||
line("[" + symbol_id(symbol) + "] = " + state + ",");
|
||||
if (!entry.second.empty()) {
|
||||
ParseStateId state = entry.second[0].state_index;
|
||||
line("[" + symbol_id(symbol) + "] = " + to_string(state) + ",");
|
||||
}
|
||||
}
|
||||
});
|
||||
line("};");
|
||||
|
|
|
|||
|
|
@ -102,8 +102,8 @@ class ParseTable {
|
|||
ParseAction action);
|
||||
|
||||
std::vector<ParseState> states;
|
||||
ParseState error_state;
|
||||
std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;
|
||||
std::map<rules::Symbol, size_t> out_of_context_state_indices;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue