Ensure that there are no duplicate lex states
This commit is contained in:
parent
c9db5499e9
commit
386b124866
11 changed files with 418 additions and 401 deletions
10
spec/fixtures/parsers/anonymous_tokens.c
vendored
10
spec/fixtures/parsers/anonymous_tokens.c
vendored
|
|
@ -102,20 +102,14 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
|
|||
(lookahead == ' '))
|
||||
ADVANCE(14);
|
||||
if (lookahead == '\n')
|
||||
ADVANCE(15);
|
||||
ADVANCE(2);
|
||||
if (lookahead == '\r')
|
||||
ADVANCE(16);
|
||||
ADVANCE(3);
|
||||
if (lookahead == '\"')
|
||||
ADVANCE(4);
|
||||
if ('0' <= lookahead && lookahead <= '9')
|
||||
ADVANCE(11);
|
||||
LEX_ERROR();
|
||||
case 15:
|
||||
START_TOKEN();
|
||||
ACCEPT_TOKEN(anon_sym_LF);
|
||||
case 16:
|
||||
START_TOKEN();
|
||||
ACCEPT_TOKEN(anon_sym_CR);
|
||||
case ts_lex_state_error:
|
||||
START_TOKEN();
|
||||
if (lookahead == 0)
|
||||
|
|
|
|||
643
spec/fixtures/parsers/c.c
vendored
643
spec/fixtures/parsers/c.c
vendored
File diff suppressed because it is too large
Load diff
|
|
@ -7,6 +7,7 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include "compiler/build_tables/remove_duplicate_states.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/build_tables/does_match_any_line.h"
|
||||
#include "compiler/parse_table.h"
|
||||
|
|
@ -40,8 +41,7 @@ class LexTableBuilder {
|
|||
|
||||
public:
|
||||
LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar)
|
||||
: lex_grammar(lex_grammar),
|
||||
parse_table(parse_table) {
|
||||
: lex_grammar(lex_grammar), parse_table(parse_table) {
|
||||
for (const rule_ptr &rule : lex_grammar.separators)
|
||||
separator_rules.push_back(rules::Repeat::build(rule));
|
||||
separator_rules.push_back(rules::Blank::build());
|
||||
|
|
@ -59,6 +59,7 @@ class LexTableBuilder {
|
|||
populate_lex_state(error_item_set, LexTable::ERROR_STATE_ID);
|
||||
|
||||
mark_fragile_tokens();
|
||||
remove_duplicate_lex_states();
|
||||
|
||||
return lex_table;
|
||||
}
|
||||
|
|
@ -161,6 +162,18 @@ class LexTableBuilder {
|
|||
if (conflict_manager.fragile_tokens.count(state.default_action.symbol))
|
||||
state.default_action.type = LexActionTypeAcceptFragile;
|
||||
}
|
||||
|
||||
void remove_duplicate_lex_states() {
|
||||
auto replacements =
|
||||
remove_duplicate_states<LexState, LexAction, LexActionTypeAdvance>(
|
||||
&lex_table.states);
|
||||
|
||||
for (ParseState &parse_state : parse_table->states) {
|
||||
auto replacement = replacements.find(parse_state.lex_state_id);
|
||||
if (replacement != replacements.end())
|
||||
parse_state.lex_state_id = replacement->second;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
LexTable build_lex_table(ParseTable *table, const LexicalGrammar &grammar) {
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
#include <utility>
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/build_tables/parse_conflict_manager.h"
|
||||
#include "compiler/build_tables/remove_duplicate_states.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
|
|
@ -42,8 +43,7 @@ class ParseTableBuilder {
|
|||
public:
|
||||
ParseTableBuilder(const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lex_grammar)
|
||||
: grammar(grammar),
|
||||
lexical_grammar(lex_grammar) {}
|
||||
: grammar(grammar), lexical_grammar(lex_grammar) {}
|
||||
|
||||
pair<ParseTable, const GrammarError *> build() {
|
||||
Symbol start_symbol = Symbol(0, grammar.variables.empty());
|
||||
|
|
@ -79,7 +79,7 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
mark_fragile_actions();
|
||||
remove_duplicate_states();
|
||||
remove_duplicate_parse_states();
|
||||
|
||||
parse_table.symbols.insert({ rules::ERROR(), {} });
|
||||
|
||||
|
|
@ -192,49 +192,9 @@ class ParseTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
void remove_duplicate_states() {
|
||||
bool done = false;
|
||||
while (!done) {
|
||||
done = true;
|
||||
|
||||
map<ParseStateId, ParseStateId> replacements;
|
||||
for (size_t i = 0, size = parse_table.states.size(); i < size; i++) {
|
||||
for (size_t j = 0; j < i; j++) {
|
||||
if (parse_table.states[i].actions == parse_table.states[j].actions) {
|
||||
replacements.insert({ i, j });
|
||||
done = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (ParseState &state : parse_table.states) {
|
||||
for (auto &entry : state.actions) {
|
||||
for (ParseAction &action : entry.second) {
|
||||
if (action.type == ParseActionTypeShift) {
|
||||
ParseStateId state_index = action.state_index;
|
||||
auto replacement = replacements.find(action.state_index);
|
||||
if (replacement != replacements.end()) {
|
||||
state_index = replacement->second;
|
||||
}
|
||||
|
||||
size_t prior_removed = 0;
|
||||
for (const auto &replacement : replacements) {
|
||||
if (replacement.first >= state_index)
|
||||
break;
|
||||
prior_removed++;
|
||||
}
|
||||
|
||||
state_index -= prior_removed;
|
||||
action.state_index = state_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto i = replacements.rbegin(); i != replacements.rend(); ++i)
|
||||
parse_table.states.erase(parse_table.states.begin() + i->first);
|
||||
}
|
||||
void remove_duplicate_parse_states() {
|
||||
remove_duplicate_states<ParseState, ParseAction, ParseActionTypeShift>(
|
||||
&parse_table.states);
|
||||
}
|
||||
|
||||
ParseAction *add_action(ParseStateId state_id, Symbol lookahead,
|
||||
|
|
|
|||
|
|
@ -22,9 +22,9 @@ pair<bool, ConflictType> ParseConflictManager::resolve(
|
|||
|
||||
case ParseActionTypeShift:
|
||||
if (new_action.extra) {
|
||||
return {false, ConflictTypeNone};
|
||||
return { false, ConflictTypeNone };
|
||||
} else if (old_action.extra) {
|
||||
return {true, ConflictTypeNone};
|
||||
return { true, ConflictTypeNone };
|
||||
} else if (new_action.type == ParseActionTypeReduce) {
|
||||
int min_precedence = old_action.precedence_range.min;
|
||||
int max_precedence = old_action.precedence_range.max;
|
||||
|
|
|
|||
|
|
@ -23,7 +23,8 @@ enum ConflictType {
|
|||
|
||||
class ParseConflictManager {
|
||||
public:
|
||||
std::pair<bool, ConflictType> resolve(const ParseAction &, const ParseAction &) const;
|
||||
std::pair<bool, ConflictType> resolve(const ParseAction &,
|
||||
const ParseAction &) const;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
58
src/compiler/build_tables/remove_duplicate_states.h
Normal file
58
src/compiler/build_tables/remove_duplicate_states.h
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_REMOVE_DUPLICATE_STATES_H_
|
||||
#define COMPILER_BUILD_TABLES_REMOVE_DUPLICATE_STATES_H_
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
template <typename StateType, typename ActionType, int advance_action>
|
||||
std::map<size_t, size_t> remove_duplicate_states(
|
||||
std::vector<StateType> *states) {
|
||||
std::map<size_t, size_t> replacements;
|
||||
|
||||
while (true) {
|
||||
std::map<size_t, size_t> duplicates;
|
||||
for (size_t i = 0, size = states->size(); i < size; i++)
|
||||
for (size_t j = 0; j < i; j++)
|
||||
if (states->at(i) == states->at(j)) {
|
||||
duplicates.insert({ i, j });
|
||||
break;
|
||||
}
|
||||
|
||||
if (duplicates.empty())
|
||||
break;
|
||||
|
||||
for (StateType &state : *states)
|
||||
state.each_action([&duplicates, &replacements](ActionType *action) {
|
||||
if (action->type == advance_action) {
|
||||
size_t state_index = action->state_index;
|
||||
auto replacement = duplicates.find(action->state_index);
|
||||
if (replacement != duplicates.end())
|
||||
state_index = replacement->second;
|
||||
|
||||
size_t prior_removed = 0;
|
||||
for (const auto &replacement : duplicates) {
|
||||
if (replacement.first >= state_index)
|
||||
break;
|
||||
prior_removed++;
|
||||
}
|
||||
|
||||
state_index -= prior_removed;
|
||||
replacements.insert({ action->state_index, state_index });
|
||||
action->state_index = state_index;
|
||||
}
|
||||
});
|
||||
|
||||
for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i)
|
||||
states->erase(states->begin() + i->first);
|
||||
}
|
||||
|
||||
return replacements;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_REMOVE_DUPLICATE_STATES_H_
|
||||
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
|
||||
using std::function;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::map;
|
||||
|
|
@ -53,6 +54,16 @@ set<CharacterSet> LexState::expected_inputs() const {
|
|||
return result;
|
||||
}
|
||||
|
||||
bool LexState::operator==(const LexState &other) const {
|
||||
return actions == other.actions && default_action == other.default_action &&
|
||||
is_token_start == other.is_token_start;
|
||||
}
|
||||
|
||||
void LexState::each_action(function<void(LexAction *)> fn) {
|
||||
for (auto &entry : actions)
|
||||
fn(&entry.second);
|
||||
}
|
||||
|
||||
LexStateId LexTable::add_state() {
|
||||
states.push_back(LexState());
|
||||
return states.size() - 1;
|
||||
|
|
|
|||
|
|
@ -56,9 +56,12 @@ namespace tree_sitter {
|
|||
class LexState {
|
||||
public:
|
||||
LexState();
|
||||
std::set<rules::CharacterSet> expected_inputs() const;
|
||||
bool operator==(const LexState &) const;
|
||||
void each_action(std::function<void(LexAction *)>);
|
||||
|
||||
std::map<rules::CharacterSet, LexAction> actions;
|
||||
LexAction default_action;
|
||||
std::set<rules::CharacterSet> expected_inputs() const;
|
||||
bool is_token_start;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ using std::ostream;
|
|||
using std::to_string;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
using std::function;
|
||||
using rules::Symbol;
|
||||
|
||||
ParseAction::ParseAction(ParseActionType type, ParseStateId state_index,
|
||||
|
|
@ -126,6 +127,16 @@ set<Symbol> ParseState::expected_inputs() const {
|
|||
return result;
|
||||
}
|
||||
|
||||
void ParseState::each_action(function<void(ParseAction *)> fn) {
|
||||
for (auto &entry : actions)
|
||||
for (ParseAction &action : entry.second)
|
||||
fn(&action);
|
||||
}
|
||||
|
||||
bool ParseState::operator==(const ParseState &other) const {
|
||||
return actions == other.actions;
|
||||
}
|
||||
|
||||
set<Symbol> ParseTable::all_symbols() const {
|
||||
set<Symbol> result;
|
||||
for (auto &pair : symbols)
|
||||
|
|
|
|||
|
|
@ -79,8 +79,11 @@ namespace tree_sitter {
|
|||
class ParseState {
|
||||
public:
|
||||
ParseState();
|
||||
std::map<rules::Symbol, std::vector<ParseAction>> actions;
|
||||
std::set<rules::Symbol> expected_inputs() const;
|
||||
bool operator==(const ParseState &) const;
|
||||
void each_action(std::function<void(ParseAction *)>);
|
||||
|
||||
std::map<rules::Symbol, std::vector<ParseAction>> actions;
|
||||
LexStateId lex_state_id;
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue