Ensure that there are no duplicate lex states

This commit is contained in:
Max Brunsfeld 2015-12-20 15:26:35 -08:00
parent c9db5499e9
commit 386b124866
11 changed files with 418 additions and 401 deletions

View file

@ -102,20 +102,14 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
(lookahead == ' '))
ADVANCE(14);
if (lookahead == '\n')
ADVANCE(15);
ADVANCE(2);
if (lookahead == '\r')
ADVANCE(16);
ADVANCE(3);
if (lookahead == '\"')
ADVANCE(4);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(11);
LEX_ERROR();
case 15:
START_TOKEN();
ACCEPT_TOKEN(anon_sym_LF);
case 16:
START_TOKEN();
ACCEPT_TOKEN(anon_sym_CR);
case ts_lex_state_error:
START_TOKEN();
if (lookahead == 0)

File diff suppressed because it is too large Load diff

View file

@ -7,6 +7,7 @@
#include <utility>
#include <vector>
#include "compiler/build_tables/lex_conflict_manager.h"
#include "compiler/build_tables/remove_duplicate_states.h"
#include "compiler/build_tables/lex_item.h"
#include "compiler/build_tables/does_match_any_line.h"
#include "compiler/parse_table.h"
@ -40,8 +41,7 @@ class LexTableBuilder {
public:
LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar)
: lex_grammar(lex_grammar),
parse_table(parse_table) {
: lex_grammar(lex_grammar), parse_table(parse_table) {
for (const rule_ptr &rule : lex_grammar.separators)
separator_rules.push_back(rules::Repeat::build(rule));
separator_rules.push_back(rules::Blank::build());
@ -59,6 +59,7 @@ class LexTableBuilder {
populate_lex_state(error_item_set, LexTable::ERROR_STATE_ID);
mark_fragile_tokens();
remove_duplicate_lex_states();
return lex_table;
}
@ -161,6 +162,18 @@ class LexTableBuilder {
if (conflict_manager.fragile_tokens.count(state.default_action.symbol))
state.default_action.type = LexActionTypeAcceptFragile;
}
void remove_duplicate_lex_states() {
auto replacements =
remove_duplicate_states<LexState, LexAction, LexActionTypeAdvance>(
&lex_table.states);
for (ParseState &parse_state : parse_table->states) {
auto replacement = replacements.find(parse_state.lex_state_id);
if (replacement != replacements.end())
parse_state.lex_state_id = replacement->second;
}
}
};
LexTable build_lex_table(ParseTable *table, const LexicalGrammar &grammar) {

View file

@ -7,6 +7,7 @@
#include <utility>
#include "compiler/parse_table.h"
#include "compiler/build_tables/parse_conflict_manager.h"
#include "compiler/build_tables/remove_duplicate_states.h"
#include "compiler/build_tables/parse_item.h"
#include "compiler/build_tables/item_set_closure.h"
#include "compiler/lexical_grammar.h"
@ -42,8 +43,7 @@ class ParseTableBuilder {
public:
ParseTableBuilder(const SyntaxGrammar &grammar,
const LexicalGrammar &lex_grammar)
: grammar(grammar),
lexical_grammar(lex_grammar) {}
: grammar(grammar), lexical_grammar(lex_grammar) {}
pair<ParseTable, const GrammarError *> build() {
Symbol start_symbol = Symbol(0, grammar.variables.empty());
@ -79,7 +79,7 @@ class ParseTableBuilder {
}
mark_fragile_actions();
remove_duplicate_states();
remove_duplicate_parse_states();
parse_table.symbols.insert({ rules::ERROR(), {} });
@ -192,49 +192,9 @@ class ParseTableBuilder {
}
}
void remove_duplicate_states() {
bool done = false;
while (!done) {
done = true;
map<ParseStateId, ParseStateId> replacements;
for (size_t i = 0, size = parse_table.states.size(); i < size; i++) {
for (size_t j = 0; j < i; j++) {
if (parse_table.states[i].actions == parse_table.states[j].actions) {
replacements.insert({ i, j });
done = false;
break;
}
}
}
for (ParseState &state : parse_table.states) {
for (auto &entry : state.actions) {
for (ParseAction &action : entry.second) {
if (action.type == ParseActionTypeShift) {
ParseStateId state_index = action.state_index;
auto replacement = replacements.find(action.state_index);
if (replacement != replacements.end()) {
state_index = replacement->second;
}
size_t prior_removed = 0;
for (const auto &replacement : replacements) {
if (replacement.first >= state_index)
break;
prior_removed++;
}
state_index -= prior_removed;
action.state_index = state_index;
}
}
}
}
for (auto i = replacements.rbegin(); i != replacements.rend(); ++i)
parse_table.states.erase(parse_table.states.begin() + i->first);
}
void remove_duplicate_parse_states() {
remove_duplicate_states<ParseState, ParseAction, ParseActionTypeShift>(
&parse_table.states);
}
ParseAction *add_action(ParseStateId state_id, Symbol lookahead,

View file

@ -22,9 +22,9 @@ pair<bool, ConflictType> ParseConflictManager::resolve(
case ParseActionTypeShift:
if (new_action.extra) {
return {false, ConflictTypeNone};
return { false, ConflictTypeNone };
} else if (old_action.extra) {
return {true, ConflictTypeNone};
return { true, ConflictTypeNone };
} else if (new_action.type == ParseActionTypeReduce) {
int min_precedence = old_action.precedence_range.min;
int max_precedence = old_action.precedence_range.max;

View file

@ -23,7 +23,8 @@ enum ConflictType {
class ParseConflictManager {
public:
std::pair<bool, ConflictType> resolve(const ParseAction &, const ParseAction &) const;
std::pair<bool, ConflictType> resolve(const ParseAction &,
const ParseAction &) const;
};
} // namespace build_tables

View file

@ -0,0 +1,58 @@
#ifndef COMPILER_BUILD_TABLES_REMOVE_DUPLICATE_STATES_H_
#define COMPILER_BUILD_TABLES_REMOVE_DUPLICATE_STATES_H_
#include <map>
#include <vector>
namespace tree_sitter {
namespace build_tables {
template <typename StateType, typename ActionType, int advance_action>
std::map<size_t, size_t> remove_duplicate_states(
std::vector<StateType> *states) {
std::map<size_t, size_t> replacements;
while (true) {
std::map<size_t, size_t> duplicates;
for (size_t i = 0, size = states->size(); i < size; i++)
for (size_t j = 0; j < i; j++)
if (states->at(i) == states->at(j)) {
duplicates.insert({ i, j });
break;
}
if (duplicates.empty())
break;
for (StateType &state : *states)
state.each_action([&duplicates, &replacements](ActionType *action) {
if (action->type == advance_action) {
size_t state_index = action->state_index;
auto replacement = duplicates.find(action->state_index);
if (replacement != duplicates.end())
state_index = replacement->second;
size_t prior_removed = 0;
for (const auto &replacement : duplicates) {
if (replacement.first >= state_index)
break;
prior_removed++;
}
state_index -= prior_removed;
replacements.insert({ action->state_index, state_index });
action->state_index = state_index;
}
});
for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i)
states->erase(states->begin() + i->first);
}
return replacements;
}
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_REMOVE_DUPLICATE_STATES_H_

View file

@ -3,6 +3,7 @@
namespace tree_sitter {
using std::function;
using std::string;
using std::to_string;
using std::map;
@ -53,6 +54,16 @@ set<CharacterSet> LexState::expected_inputs() const {
return result;
}
bool LexState::operator==(const LexState &other) const {
return actions == other.actions && default_action == other.default_action &&
is_token_start == other.is_token_start;
}
void LexState::each_action(function<void(LexAction *)> fn) {
for (auto &entry : actions)
fn(&entry.second);
}
LexStateId LexTable::add_state() {
states.push_back(LexState());
return states.size() - 1;

View file

@ -56,9 +56,12 @@ namespace tree_sitter {
class LexState {
public:
LexState();
std::set<rules::CharacterSet> expected_inputs() const;
bool operator==(const LexState &) const;
void each_action(std::function<void(LexAction *)>);
std::map<rules::CharacterSet, LexAction> actions;
LexAction default_action;
std::set<rules::CharacterSet> expected_inputs() const;
bool is_token_start;
};

View file

@ -9,6 +9,7 @@ using std::ostream;
using std::to_string;
using std::set;
using std::vector;
using std::function;
using rules::Symbol;
ParseAction::ParseAction(ParseActionType type, ParseStateId state_index,
@ -126,6 +127,16 @@ set<Symbol> ParseState::expected_inputs() const {
return result;
}
void ParseState::each_action(function<void(ParseAction *)> fn) {
for (auto &entry : actions)
for (ParseAction &action : entry.second)
fn(&action);
}
bool ParseState::operator==(const ParseState &other) const {
return actions == other.actions;
}
set<Symbol> ParseTable::all_symbols() const {
set<Symbol> result;
for (auto &pair : symbols)

View file

@ -79,8 +79,11 @@ namespace tree_sitter {
class ParseState {
public:
ParseState();
std::map<rules::Symbol, std::vector<ParseAction>> actions;
std::set<rules::Symbol> expected_inputs() const;
bool operator==(const ParseState &) const;
void each_action(std::function<void(ParseAction *)>);
std::map<rules::Symbol, std::vector<ParseAction>> actions;
LexStateId lex_state_id;
};