Refactor construction of out-of-context states

This commit is contained in:
Max Brunsfeld 2016-04-25 21:59:40 -07:00
parent e99a3925e0
commit 31f6b2e24a
7 changed files with 51 additions and 41 deletions

View file

@ -9,7 +9,6 @@
#include "compiler/build_tables/lex_conflict_manager.h"
#include "compiler/build_tables/remove_duplicate_states.h"
#include "compiler/build_tables/lex_item.h"
#include "compiler/build_tables/does_match_any_line.h"
#include "compiler/parse_table.h"
#include "compiler/lexical_grammar.h"
#include "compiler/rules/built_in_symbols.h"
@ -48,11 +47,10 @@ class LexTableBuilder {
}
LexTable build() {
add_lex_state(build_lex_item_set(parse_table->all_symbols(), true));
add_lex_state_for_parse_state(&parse_table->error_state);
for (ParseState &parse_state : parse_table->states)
parse_state.lex_state_id =
add_lex_state(build_lex_item_set(parse_state.expected_inputs(), false));
add_lex_state_for_parse_state(&parse_state);
mark_fragile_tokens();
remove_duplicate_lex_states();
@ -61,7 +59,7 @@ class LexTableBuilder {
}
private:
LexItemSet build_lex_item_set(const set<Symbol> &symbols, bool error) {
LexItemSet build_lex_item_set(const set<Symbol> &symbols) {
LexItemSet result;
for (const Symbol &symbol : symbols) {
vector<rule_ptr> rules;
@ -69,8 +67,6 @@ class LexTableBuilder {
rules.push_back(CharacterSet().include(0).copy());
} else if (symbol.is_token) {
rule_ptr rule = lex_grammar.variables[symbol.index].rule;
if (error && does_match_any_line(rule))
continue;
auto choice = rule->as<rules::Choice>();
if (choice)
@ -98,6 +94,11 @@ class LexTableBuilder {
return result;
}
void add_lex_state_for_parse_state(ParseState *parse_state) {
parse_state->lex_state_id =
add_lex_state(build_lex_item_set(parse_state->expected_inputs()));
}
LexStateId add_lex_state(const LexItemSet &item_set) {
const auto &pair = lex_state_ids.find(item_set);
if (pair == lex_state_ids.end()) {

View file

@ -15,6 +15,7 @@
#include "compiler/syntax_grammar.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/build_tables/does_match_any_line.h"
namespace tree_sitter {
namespace build_tables {
@ -105,17 +106,20 @@ class ParseTableBuilder {
}
void add_out_of_context_parse_states() {
map<Symbol, set<Symbol>> symbols_by_first = symbols_by_first_symbol(grammar);
auto symbols_by_first = symbols_by_first_symbol(grammar);
for (size_t i = 0; i < lexical_grammar.variables.size(); i++) {
Symbol symbol(i, true);
if (!grammar.extra_tokens.count(symbol))
if (!does_match_any_line(lexical_grammar.variables[i].rule))
add_out_of_context_parse_state(symbol, symbols_by_first[symbol]);
}
for (size_t i = 0; i < grammar.variables.size(); i++) {
Symbol symbol(i, false);
add_out_of_context_parse_state(Symbol(i, false), symbols_by_first[symbol]);
add_out_of_context_parse_state(symbol, symbols_by_first[symbol]);
}
parse_table.error_state.actions[rules::END_OF_INPUT()].clear();
}
void add_out_of_context_parse_state(const rules::Symbol &symbol,
@ -133,8 +137,11 @@ class ParseTableBuilder {
}
}
ParseStateId state = add_parse_state(item_set);
parse_table.out_of_context_state_indices[symbol] = state;
if (!item_set.entries.empty()) {
ParseStateId state = add_parse_state(item_set);
parse_table.error_state.actions[symbol].push_back(
ParseAction::Shift(state, PrecedenceRange()));
}
}
ParseStateId add_parse_state(const ParseItemSet &item_set) {
@ -265,11 +272,12 @@ class ParseTableBuilder {
auto replacements =
remove_duplicate_states<ParseState, ParseAction>(&parse_table.states);
for (auto &pair : parse_table.out_of_context_state_indices) {
auto replacement = replacements.find(pair.second);
if (replacement != replacements.end())
pair.second = replacement->second;
}
parse_table.error_state.each_advance_action(
[&replacements](ParseAction *action) {
auto replacement = replacements.find(action->state_index);
if (replacement != replacements.end())
action->state_index = replacement->second;
});
}
ParseAction *add_action(ParseStateId state_id, Symbol lookahead,

View file

@ -46,12 +46,11 @@ std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states)
}
for (StateType &state : *states)
state.each_advance_action(
[&duplicates, &new_replacements](ActionType *action) {
auto new_replacement = new_replacements.find(action->state_index);
if (new_replacement != new_replacements.end())
action->state_index = new_replacement->second;
});
state.each_advance_action([&new_replacements](ActionType *action) {
auto new_replacement = new_replacements.find(action->state_index);
if (new_replacement != new_replacements.end())
action->state_index = new_replacement->second;
});
for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i)
states->erase(states->begin() + i->first);

View file

@ -223,15 +223,12 @@ class CCodeGenerator {
void add_out_of_context_parse_states_list() {
line("static TSStateId ts_out_of_context_states[SYMBOL_COUNT] = {");
indent([&]() {
for (const auto &entry : parse_table.symbols) {
for (const auto &entry : parse_table.error_state.actions) {
const rules::Symbol &symbol = entry.first;
if (symbol.is_built_in())
continue;
auto iter = parse_table.out_of_context_state_indices.find(symbol);
string state = (iter != parse_table.out_of_context_state_indices.end())
? to_string(iter->second)
: "ts_parse_state_error";
line("[" + symbol_id(symbol) + "] = " + state + ",");
if (!entry.second.empty()) {
ParseStateId state = entry.second[0].state_index;
line("[" + symbol_id(symbol) + "] = " + to_string(state) + ",");
}
}
});
line("};");

View file

@ -102,8 +102,8 @@ class ParseTable {
ParseAction action);
std::vector<ParseState> states;
ParseState error_state;
std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;
std::map<rules::Symbol, size_t> out_of_context_state_indices;
};
} // namespace tree_sitter