Store shift states for non-terminals directly in the main parse table

This commit is contained in:
Max Brunsfeld 2016-11-14 08:36:06 -08:00
parent 8d9c261e3a
commit fad7294ba4
20 changed files with 204 additions and 195 deletions

View file

@ -114,14 +114,11 @@ class LexTableBuilder {
void mark_fragile_tokens() {
for (ParseState &state : parse_table->states) {
for (auto &entry : state.entries) {
if (!entry.first.is_token)
continue;
for (auto &entry : state.terminal_entries) {
auto homonyms = conflict_manager.possible_homonyms.find(entry.first);
if (homonyms != conflict_manager.possible_homonyms.end())
for (const Symbol &homonym : homonyms->second)
if (state.entries.count(homonym)) {
for (int homonym : homonyms->second)
if (state.terminal_entries.count(homonym)) {
entry.second.reusable = false;
break;
}
@ -131,8 +128,8 @@ class LexTableBuilder {
auto extensions = conflict_manager.possible_extensions.find(entry.first);
if (extensions != conflict_manager.possible_extensions.end())
for (const Symbol &extension : extensions->second)
if (state.entries.count(extension)) {
for (int extension : extensions->second)
if (state.terminal_entries.count(extension)) {
entry.second.depends_on_lookahead = true;
break;
}
@ -147,7 +144,7 @@ class LexTableBuilder {
}
auto replacements =
remove_duplicate_states<LexTable, AdvanceAction>(&lex_table);
remove_duplicate_states<LexTable>(&lex_table);
for (ParseState &parse_state : parse_table->states) {
auto replacement = replacements.find(parse_state.lex_state_id);

View file

@ -116,17 +116,16 @@ class ParseTableBuilder {
}
for (const Symbol &symbol : grammar.extra_tokens) {
if (!error_state.entries.count(symbol)) {
error_state.entries[symbol].actions.push_back(ParseAction::ShiftExtra());
if (!error_state.terminal_entries.count(symbol.index)) {
error_state.terminal_entries[symbol.index].actions.push_back(ParseAction::ShiftExtra());
}
}
for (size_t i = 0; i < grammar.variables.size(); i++) {
Symbol symbol(i, false);
add_out_of_context_parse_state(&error_state, symbol);
add_out_of_context_parse_state(&error_state, Symbol(i, false));
}
error_state.entries[END_OF_INPUT()].actions.push_back(ParseAction::Recover(0));
error_state.terminal_entries[END_OF_INPUT().index].actions.push_back(ParseAction::Recover(0));
parse_table.states[0] = error_state;
}
@ -135,7 +134,11 @@ class ParseTableBuilder {
const ParseItemSet &item_set = recovery_states[symbol];
if (!item_set.entries.empty()) {
ParseStateId state = add_parse_state(item_set);
error_state->entries[symbol].actions.push_back(ParseAction::Recover(state));
if (symbol.is_token) {
error_state->terminal_entries[symbol.index].actions.assign({ ParseAction::Recover(state) });
} else {
error_state->nonterminal_entries[symbol.index] = state;
}
}
}
@ -158,14 +161,19 @@ class ParseTableBuilder {
const ParseItemSet &next_item_set = transition.second.first;
const PrecedenceRange &precedence = transition.second.second;
ParseAction *new_action = add_action(
state_id, symbol, ParseAction::Shift(0, precedence), item_set);
if (!allow_any_conflict)
if (!allow_any_conflict) {
recovery_states[symbol].add(next_item_set);
}
if (new_action)
new_action->state_index = add_parse_state(next_item_set);
if (symbol.is_token) {
ParseAction *new_action = add_terminal_action(
state_id, symbol, ParseAction::Shift(0, precedence), item_set);
if (new_action) {
new_action->state_index = add_parse_state(next_item_set);
}
} else {
parse_table.set_nonterminal_action(state_id, symbol.index, add_parse_state(next_item_set));
}
}
}
@ -185,8 +193,9 @@ class ParseTableBuilder {
status.associativity, *item.production);
}
for (const auto &lookahead_sym : *lookahead_symbols.entries)
add_action(state_id, lookahead_sym, action, item_set);
for (const Symbol &lookahead : *lookahead_symbols.entries) {
add_terminal_action(state_id, lookahead, action, item_set);
}
}
}
}
@ -195,24 +204,25 @@ class ParseTableBuilder {
ParseAction action = ParseAction::ShiftExtra();
ParseState &state = parse_table.states[state_id];
for (const Symbol &extra_symbol : grammar.extra_tokens)
if (!state.entries.count(extra_symbol) || state.has_shift_action() ||
allow_any_conflict)
parse_table.add_action(state_id, extra_symbol, action);
if (!state.terminal_entries.count(extra_symbol.index) ||
state.has_shift_action() || allow_any_conflict)
parse_table.add_terminal_action(state_id, extra_symbol.index, action);
}
void mark_fragile_actions() {
for (ParseState &state : parse_table.states) {
set<Symbol> symbols_with_multiple_actions;
for (auto &entry : state.entries) {
const Symbol &symbol = entry.first;
for (auto &entry : state.terminal_entries) {
const Symbol symbol(entry.first, true);
auto &actions = entry.second.actions;
if (actions.size() > 1)
if (actions.size() > 1) {
symbols_with_multiple_actions.insert(symbol);
}
for (ParseAction &action : actions) {
if (action.type == ParseActionTypeReduce && !action.extra) {
if (action.type == ParseActionTypeReduce) {
if (has_fragile_production(action.production))
action.fragile = true;
@ -231,21 +241,8 @@ class ParseTableBuilder {
break;
}
}
if (!erased)
if (!erased) {
++i;
}
}
if (!symbols_with_multiple_actions.empty()) {
for (auto &entry : state.entries) {
if (!entry.first.is_token) {
set<Symbol> first_set = get_first_set(entry.first);
for (const Symbol &symbol : symbols_with_multiple_actions) {
if (first_set.count(symbol)) {
entry.second.reusable = false;
break;
}
}
}
}
}
@ -253,33 +250,34 @@ class ParseTableBuilder {
}
void remove_duplicate_parse_states() {
remove_duplicate_states<ParseTable, ParseAction>(&parse_table);
remove_duplicate_states<ParseTable>(&parse_table);
}
ParseAction *add_action(ParseStateId state_id, Symbol lookahead,
const ParseAction &new_action,
const ParseItemSet &item_set) {
ParseAction *add_terminal_action(ParseStateId state_id, Symbol lookahead,
const ParseAction &new_action,
const ParseItemSet &item_set) {
const ParseState &state = parse_table.states[state_id];
const auto &current_entry = state.entries.find(lookahead);
if (current_entry == state.entries.end())
return &parse_table.set_action(state_id, lookahead, new_action);
const auto &current_entry = state.terminal_entries.find(lookahead.index);
if (current_entry == state.terminal_entries.end())
return &parse_table.set_terminal_action(state_id, lookahead.index, new_action);
if (allow_any_conflict)
return &parse_table.add_action(state_id, lookahead, new_action);
return &parse_table.add_terminal_action(state_id, lookahead.index, new_action);
const ParseAction old_action = current_entry->second.actions[0];
auto resolution = conflict_manager.resolve(new_action, old_action);
switch (resolution.second) {
case ConflictTypeNone:
if (resolution.first)
return &parse_table.set_action(state_id, lookahead, new_action);
if (resolution.first) {
return &parse_table.set_terminal_action(state_id, lookahead.index, new_action);
}
break;
case ConflictTypeResolved: {
if (resolution.first) {
if (old_action.type == ParseActionTypeReduce)
fragile_productions.insert(old_action.production);
return &parse_table.set_action(state_id, lookahead, new_action);
return &parse_table.set_terminal_action(state_id, lookahead.index, new_action);
} else {
if (new_action.type == ParseActionTypeReduce)
fragile_productions.insert(new_action.production);
@ -293,7 +291,7 @@ class ParseTableBuilder {
fragile_productions.insert(old_action.production);
if (new_action.type == ParseActionTypeReduce)
fragile_productions.insert(new_action.production);
return &parse_table.add_action(state_id, lookahead, new_action);
return &parse_table.add_terminal_action(state_id, lookahead.index, new_action);
}
break;
}

View file

@ -14,7 +14,7 @@ bool LexConflictManager::resolve(const LexItemSet &item_set,
return true;
if (new_action.precedence_range.max >= old_action.precedence) {
for (const LexItem &item : item_set.entries)
possible_extensions[old_action.symbol].insert(item.lhs);
possible_extensions[old_action.symbol.index].insert(item.lhs.index);
return true;
} else {
return false;
@ -44,9 +44,9 @@ bool LexConflictManager::resolve(const AcceptTokenAction &new_action,
result = false;
if (result)
possible_homonyms[old_action.symbol].insert(new_action.symbol);
possible_homonyms[old_action.symbol.index].insert(new_action.symbol.index);
else
possible_homonyms[new_action.symbol].insert(old_action.symbol);
possible_homonyms[new_action.symbol.index].insert(old_action.symbol.index);
return result;
}

View file

@ -21,8 +21,8 @@ class LexConflictManager {
const AcceptTokenAction &);
bool resolve(const AcceptTokenAction &, const AcceptTokenAction &);
std::map<rules::Symbol, std::set<rules::Symbol>> possible_homonyms;
std::map<rules::Symbol, std::set<rules::Symbol>> possible_extensions;
std::map<int, std::set<int>> possible_homonyms;
std::map<int, std::set<int>> possible_extensions;
};
} // namespace build_tables

View file

@ -7,7 +7,7 @@
namespace tree_sitter {
namespace build_tables {
template <typename TableType, typename ActionType>
template <typename TableType>
std::map<size_t, size_t> remove_duplicate_states(TableType *table) {
std::map<size_t, size_t> replacements;
@ -46,10 +46,10 @@ std::map<size_t, size_t> remove_duplicate_states(TableType *table) {
}
for (auto &state : table->states)
state.each_advance_action([&new_replacements](ActionType *action) {
auto new_replacement = new_replacements.find(action->state_index);
state.each_referenced_state([&new_replacements](int64_t *state_index) {
auto new_replacement = new_replacements.find(*state_index);
if (new_replacement != new_replacements.end())
action->state_index = new_replacement->second;
*state_index = new_replacement->second;
});
for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i)

View file

@ -115,6 +115,7 @@ class CCodeGenerator {
void add_state_and_symbol_counts() {
line("#define STATE_COUNT " + to_string(parse_table.states.size()));
line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size()));
line("#define TOKEN_COUNT " + to_string(lexical_grammar.variables.size() + 1));
line();
}
@ -222,10 +223,15 @@ class CCodeGenerator {
for (const auto &state : parse_table.states) {
line("[" + to_string(state_id++) + "] = {");
indent([&]() {
for (const auto &entry : state.entries) {
line("[" + symbol_id(entry.first) + "] = ");
for (const auto &entry : state.nonterminal_entries) {
line("[" + symbol_id(rules::Symbol(entry.first)) + "] = STATE(");
add(to_string(entry.second));
add("),");
}
for (const auto &entry : state.terminal_entries) {
line("[" + symbol_id(rules::Symbol(entry.first, true)) + "] = ACTIONS(");
add(to_string(add_parse_action_list_id(entry.second)));
add(",");
add("),");
}
});
line("},");

View file

@ -57,9 +57,9 @@ bool LexState::operator==(const LexState &other) const {
is_token_start == other.is_token_start;
}
void LexState::each_advance_action(function<void(AdvanceAction *)> fn) {
void LexState::each_referenced_state(function<void(LexStateId *)> fn) {
for (auto &entry : advance_actions)
fn(&entry.second);
fn(&entry.second.state_index);
}
LexStateId LexTable::add_state() {

View file

@ -11,6 +11,8 @@
namespace tree_sitter {
typedef int64_t LexStateId;
typedef enum {
LexActionTypeError,
LexActionTypeAccept,
@ -24,7 +26,7 @@ struct AdvanceAction {
bool operator==(const AdvanceAction &other) const;
size_t state_index;
LexStateId state_index;
PrecedenceRange precedence_range;
bool in_main_token;
};
@ -52,15 +54,13 @@ class LexState {
LexState();
std::set<rules::CharacterSet> expected_inputs() const;
bool operator==(const LexState &) const;
void each_advance_action(std::function<void(AdvanceAction *)>);
void each_referenced_state(std::function<void(LexStateId *)>);
std::map<rules::CharacterSet, AdvanceAction> advance_actions;
AcceptTokenAction accept_action;
bool is_token_start;
};
typedef int64_t LexStateId;
class LexTable {
public:
LexStateId add_state();

View file

@ -125,29 +125,34 @@ bool ParseTableEntry::operator==(const ParseTableEntry &other) const {
ParseState::ParseState() : lex_state_id(-1) {}
bool ParseState::has_shift_action() const {
for (const auto &pair : entries)
for (const auto &pair : terminal_entries)
if (pair.second.actions.size() > 0 &&
pair.second.actions.back().type == ParseActionTypeShift)
return true;
return false;
return (!nonterminal_entries.empty());
}
set<Symbol> ParseState::expected_inputs() const {
set<Symbol> result;
for (auto &entry : entries)
result.insert(entry.first);
for (auto &entry : terminal_entries)
result.insert(Symbol(entry.first, true));
for (auto &entry : nonterminal_entries)
result.insert(Symbol(entry.first, false));
return result;
}
void ParseState::each_advance_action(function<void(ParseAction *)> fn) {
for (auto &entry : entries)
void ParseState::each_referenced_state(function<void(ParseStateId *)> fn) {
for (auto &entry : terminal_entries)
for (ParseAction &action : entry.second.actions)
if (action.type == ParseActionTypeShift || ParseActionTypeRecover)
fn(&action);
fn(&action.state_index);
for (auto &entry : nonterminal_entries)
fn(&entry.second);
}
bool ParseState::operator==(const ParseState &other) const {
return entries == other.entries;
return terminal_entries == other.terminal_entries &&
nonterminal_entries == other.nonterminal_entries;
}
set<Symbol> ParseTable::all_symbols() const {
@ -162,35 +167,34 @@ ParseStateId ParseTable::add_state() {
return states.size() - 1;
}
ParseAction &ParseTable::set_action(ParseStateId id, Symbol symbol,
ParseAction action) {
if (action.type == ParseActionTypeShift && action.extra)
symbols[symbol].extra = true;
else
symbols[symbol].structural = true;
states[id].entries[symbol].actions = { action };
return *states[id].entries[symbol].actions.begin();
ParseAction &ParseTable::set_terminal_action(ParseStateId state_id, int index,
ParseAction action) {
states[state_id].terminal_entries[index].actions.clear();
return add_terminal_action(state_id, index, action);
}
ParseAction &ParseTable::add_action(ParseStateId id, Symbol symbol,
ParseAction action) {
ParseAction &ParseTable::add_terminal_action(ParseStateId state_id, int index,
ParseAction action) {
Symbol symbol(index, true);
if (action.type == ParseActionTypeShift && action.extra)
symbols[symbol].extra = true;
else
symbols[symbol].structural = true;
ParseState &state = states[id];
for (ParseAction &existing_action : state.entries[symbol].actions)
if (existing_action == action)
return existing_action;
ParseTableEntry &entry = states[state_id].terminal_entries[index];
entry.actions.push_back(action);
return *entry.actions.rbegin();
}
state.entries[symbol].actions.push_back(action);
return *state.entries[symbol].actions.rbegin();
void ParseTable::set_nonterminal_action(ParseStateId state_id, int index,
ParseStateId next_state_id) {
Symbol symbol(index, false);
symbols[symbol].structural = true;
states[state_id].nonterminal_entries[index] = next_state_id;
}
static bool has_entry(const ParseState &state, const ParseTableEntry &entry) {
for (const auto &pair : state.entries)
for (const auto &pair : state.terminal_entries)
if (pair.second == entry)
return true;
return false;
@ -200,13 +204,16 @@ bool ParseTable::merge_state(size_t i, size_t j) {
ParseState &state = states[i];
ParseState &other = states[j];
for (auto &entry : state.entries) {
const Symbol &symbol = entry.first;
if (state.nonterminal_entries != other.nonterminal_entries)
return false;
for (auto &entry : state.terminal_entries) {
Symbol symbol(entry.first, true);
const vector<ParseAction> &actions = entry.second.actions;
const auto &other_entry = other.entries.find(symbol);
if (other_entry == other.entries.end()) {
if (mergeable_symbols.count(symbol) == 0 && !symbol.is_built_in() && symbol.is_token)
const auto &other_entry = other.terminal_entries.find(symbol.index);
if (other_entry == other.terminal_entries.end()) {
if (mergeable_symbols.count(symbol) == 0 && !symbol.is_built_in())
return false;
if (actions.back().type != ParseActionTypeReduce)
return false;
@ -219,12 +226,12 @@ bool ParseTable::merge_state(size_t i, size_t j) {
set<Symbol> symbols_to_merge;
for (auto &entry : other.entries) {
const Symbol &symbol = entry.first;
for (auto &entry : other.terminal_entries) {
Symbol symbol(entry.first, true);
const vector<ParseAction> &actions = entry.second.actions;
if (!state.entries.count(symbol)) {
if (mergeable_symbols.count(symbol) == 0 && !symbol.is_built_in() && symbol.is_token)
if (!state.terminal_entries.count(symbol.index)) {
if (mergeable_symbols.count(symbol) == 0 && !symbol.is_built_in())
return false;
if (actions.back().type != ParseActionTypeReduce)
return false;
@ -235,7 +242,7 @@ bool ParseTable::merge_state(size_t i, size_t j) {
}
for (const Symbol &symbol : symbols_to_merge)
state.entries[symbol] = other.entries.find(symbol)->second;
state.terminal_entries[symbol.index] = other.terminal_entries.find(symbol.index)->second;
return true;
}

View file

@ -1,6 +1,7 @@
#ifndef COMPILER_PARSE_TABLE_H_
#define COMPILER_PARSE_TABLE_H_
#include <memory>
#include <map>
#include <set>
#include <utility>
@ -13,7 +14,7 @@
namespace tree_sitter {
typedef uint64_t ParseStateId;
typedef int64_t ParseStateId;
enum ParseActionType {
ParseActionTypeError,
@ -72,10 +73,11 @@ class ParseState {
std::set<rules::Symbol> expected_inputs() const;
bool operator==(const ParseState &) const;
bool merge(const ParseState &);
void each_advance_action(std::function<void(ParseAction *)>);
void each_referenced_state(std::function<void(ParseStateId *)>);
bool has_shift_action() const;
std::map<rules::Symbol, ParseTableEntry> entries;
std::map<int, ParseTableEntry> terminal_entries;
std::map<int, ParseStateId> nonterminal_entries;
LexStateId lex_state_id;
};
@ -88,10 +90,9 @@ class ParseTable {
public:
std::set<rules::Symbol> all_symbols() const;
ParseStateId add_state();
ParseAction &set_action(ParseStateId state_id, rules::Symbol symbol,
ParseAction action);
ParseAction &add_action(ParseStateId state_id, rules::Symbol symbol,
ParseAction action);
ParseAction &add_terminal_action(ParseStateId state_id, int, ParseAction);
ParseAction &set_terminal_action(ParseStateId state_id, int index, ParseAction);
void set_nonterminal_action(ParseStateId state_id, int index, ParseStateId);
bool merge_state(size_t i, size_t j);
std::vector<ParseState> states;

View file

@ -37,9 +37,9 @@ string Symbol::to_string() const {
}
bool Symbol::operator<(const Symbol &other) const {
if (!is_token && other.is_token)
return true;
if (is_token && !other.is_token)
return true;
if (!is_token && other.is_token)
return false;
return (index < other.index);
}