Restructure parse state merging logic
* Remove remnants of templatized remove_duplicate_states function * Rename recovery_tokens function to get_compatible_tokens and augment it also compute pairs of tokens which could potentially be incompatible
This commit is contained in:
parent
8d3b72e1d9
commit
3c8e6f9987
13 changed files with 274 additions and 252 deletions
|
|
@ -14,7 +14,7 @@
|
|||
'src/compiler/build_tables/build_lex_table.cc',
|
||||
'src/compiler/build_tables/build_parse_table.cc',
|
||||
'src/compiler/build_tables/build_tables.cc',
|
||||
'src/compiler/build_tables/recovery_tokens.cc',
|
||||
'src/compiler/build_tables/compatible_tokens.cc',
|
||||
'src/compiler/build_tables/lex_item.cc',
|
||||
'src/compiler/build_tables/lex_item_transitions.cc',
|
||||
'src/compiler/build_tables/lex_conflict_manager.cc',
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#include "spec_helper.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/build_tables/recovery_tokens.h"
|
||||
#include "compiler/build_tables/compatible_tokens.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
|
|
@ -27,7 +27,7 @@ describe("recovery_tokens(rule)", []() {
|
|||
})),
|
||||
};
|
||||
|
||||
AssertThat(recovery_tokens(grammar), Equals<set<Symbol>>({ Symbol(1, Symbol::Terminal) }));
|
||||
AssertThat(get_compatible_tokens(grammar).recovery_tokens, Equals<set<Symbol>>({ Symbol(1, Symbol::Terminal) }));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -7,7 +7,6 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include "compiler/build_tables/remove_duplicate_states.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
|
|
@ -143,13 +142,64 @@ class LexTableBuilder {
|
|||
state.accept_action.precedence = 0;
|
||||
}
|
||||
|
||||
auto replacements =
|
||||
remove_duplicate_states<LexTable>(&lex_table);
|
||||
map<LexStateId, LexStateId> replacements;
|
||||
|
||||
while (true) {
|
||||
map<LexStateId, LexStateId> duplicates;
|
||||
for (LexStateId i = 0, size = lex_table.states.size(); i < size; i++) {
|
||||
for (LexStateId j = 0; j < i; j++) {
|
||||
if (!duplicates.count(j) && lex_table.states[j] == lex_table.states[i]) {
|
||||
duplicates.insert({ i, j });
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (duplicates.empty()) break;
|
||||
|
||||
map<size_t, size_t> new_replacements;
|
||||
for (LexStateId i = 0, size = lex_table.states.size(); i < size; i++) {
|
||||
LexStateId new_state_index = i;
|
||||
auto duplicate = duplicates.find(i);
|
||||
if (duplicate != duplicates.end()) {
|
||||
new_state_index = duplicate->second;
|
||||
}
|
||||
|
||||
size_t prior_removed = 0;
|
||||
for (const auto &duplicate : duplicates) {
|
||||
if (duplicate.first >= new_state_index) break;
|
||||
prior_removed++;
|
||||
}
|
||||
|
||||
new_state_index -= prior_removed;
|
||||
new_replacements.insert({ i, new_state_index });
|
||||
replacements.insert({ i, new_state_index });
|
||||
for (auto &replacement : replacements) {
|
||||
if (replacement.second == i) {
|
||||
replacement.second = new_state_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &state : lex_table.states) {
|
||||
for (auto &entry : state.advance_actions) {
|
||||
auto new_replacement = new_replacements.find(entry.second.state_index);
|
||||
if (new_replacement != new_replacements.end()) {
|
||||
entry.second.state_index = new_replacement->second;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i) {
|
||||
lex_table.states.erase(lex_table.states.begin() + i->first);
|
||||
}
|
||||
}
|
||||
|
||||
for (ParseState &parse_state : parse_table->states) {
|
||||
auto replacement = replacements.find(parse_state.lex_state_id);
|
||||
if (replacement != replacements.end())
|
||||
if (replacement != replacements.end()) {
|
||||
parse_state.lex_state_id = replacement->second;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,14 +6,13 @@
|
|||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/build_tables/remove_duplicate_states.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/build_tables/parse_item_set_builder.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/build_tables/recovery_tokens.h"
|
||||
#include "compiler/build_tables/compatible_tokens.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -41,6 +40,7 @@ class ParseTableBuilder {
|
|||
set<string> conflicts;
|
||||
ParseItemSetBuilder item_set_builder;
|
||||
set<const Production *> fragile_productions;
|
||||
CompatibleTokensResult compatible_tokens;
|
||||
bool allow_any_conflict;
|
||||
|
||||
public:
|
||||
|
|
@ -49,6 +49,7 @@ class ParseTableBuilder {
|
|||
: grammar(grammar),
|
||||
lexical_grammar(lex_grammar),
|
||||
item_set_builder(grammar, lex_grammar),
|
||||
compatible_tokens(get_compatible_tokens(lex_grammar)),
|
||||
allow_any_conflict(false) {}
|
||||
|
||||
pair<ParseTable, CompileError> build() {
|
||||
|
|
@ -74,7 +75,7 @@ class ParseTableBuilder {
|
|||
if (error.type != TSCompileErrorTypeNone)
|
||||
return { parse_table, error };
|
||||
|
||||
parse_table.mergeable_symbols = recovery_tokens(lexical_grammar);
|
||||
parse_table.mergeable_symbols = compatible_tokens.recovery_tokens;
|
||||
|
||||
build_error_parse_state();
|
||||
|
||||
|
|
@ -302,7 +303,7 @@ class ParseTableBuilder {
|
|||
set<ParseStateId> deleted_states;
|
||||
|
||||
while (true) {
|
||||
std::map<ParseStateId, ParseStateId> state_replacements;
|
||||
map<ParseStateId, ParseStateId> state_replacements;
|
||||
|
||||
for (auto &pair : state_indices_by_signature) {
|
||||
auto &state_group = pair.second;
|
||||
|
|
@ -310,7 +311,7 @@ class ParseTableBuilder {
|
|||
for (ParseStateId i : state_group) {
|
||||
for (ParseStateId j : state_group) {
|
||||
if (j == i) break;
|
||||
if (!state_replacements.count(j) && parse_table.merge_state(j, i)) {
|
||||
if (!state_replacements.count(j) && merge_parse_state(j, i)) {
|
||||
state_replacements.insert({ i, j });
|
||||
deleted_states.insert(i);
|
||||
break;
|
||||
|
|
@ -364,6 +365,60 @@ class ParseTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
static bool has_entry(const ParseState &state, const ParseTableEntry &entry) {
|
||||
for (const auto &pair : state.terminal_entries)
|
||||
if (pair.second == entry)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool merge_parse_state(size_t i, size_t j) {
|
||||
ParseState &state = parse_table.states[i];
|
||||
ParseState &other = parse_table.states[j];
|
||||
|
||||
if (state.nonterminal_entries != other.nonterminal_entries)
|
||||
return false;
|
||||
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
const auto &other_entry = other.terminal_entries.find(lookahead);
|
||||
if (other_entry == other.terminal_entries.end()) {
|
||||
if (compatible_tokens.recovery_tokens.count(lookahead) == 0 && !lookahead.is_built_in())
|
||||
return false;
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(other, entry.second))
|
||||
return false;
|
||||
} else if (entry.second != other_entry->second) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
set<Symbol> symbols_to_merge;
|
||||
|
||||
for (auto &entry : other.terminal_entries) {
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
if (!state.terminal_entries.count(lookahead)) {
|
||||
if (compatible_tokens.recovery_tokens.count(lookahead) == 0 && !lookahead.is_built_in())
|
||||
return false;
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(state, entry.second))
|
||||
return false;
|
||||
symbols_to_merge.insert(lookahead);
|
||||
}
|
||||
}
|
||||
|
||||
for (const Symbol &lookahead : symbols_to_merge)
|
||||
state.terminal_entries[lookahead] = other.terminal_entries.find(lookahead)->second;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
string handle_conflict(const ParseItemSet &item_set, ParseStateId state_id,
|
||||
Symbol lookahead) {
|
||||
ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead];
|
||||
|
|
|
|||
132
src/compiler/build_tables/compatible_tokens.cc
Normal file
132
src/compiler/build_tables/compatible_tokens.cc
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
#include "compiler/build_tables/compatible_tokens.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using rules::Symbol;
|
||||
using std::set;
|
||||
|
||||
template <bool left, bool right>
|
||||
class CharacterAggregator : public rules::RuleFn<void> {
|
||||
void apply_to(const rules::Seq *rule) {
|
||||
if (left) apply(rule->left);
|
||||
if (right) apply(rule->right);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Choice *rule) {
|
||||
for (const rule_ptr &element : rule->elements) {
|
||||
apply(element);
|
||||
}
|
||||
}
|
||||
|
||||
void apply_to(const rules::Repeat *rule) {
|
||||
apply(rule->content);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Metadata *rule) {
|
||||
apply(rule->rule);
|
||||
}
|
||||
|
||||
void apply_to(const rules::CharacterSet *rule) {
|
||||
result.add_set(*rule);
|
||||
}
|
||||
|
||||
public:
|
||||
rules::CharacterSet result;
|
||||
};
|
||||
|
||||
template <bool left, bool right>
|
||||
class CharacterIntersector : public rules::RuleFn<bool> {
|
||||
bool apply_to(const rules::Seq *rule) {
|
||||
bool result = false;
|
||||
if (left) result = apply(rule->left);
|
||||
if (right && !result) result = apply(rule->right);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Choice *rule) {
|
||||
for (const rule_ptr &element : rule->elements) {
|
||||
if (apply(element)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Repeat *rule) {
|
||||
return apply(rule->content);
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Metadata *rule) {
|
||||
return apply(rule->rule);
|
||||
}
|
||||
|
||||
bool apply_to(const rules::CharacterSet *rule) {
|
||||
return character_set->intersects(*rule);
|
||||
}
|
||||
|
||||
public:
|
||||
rules::CharacterSet *character_set;
|
||||
|
||||
CharacterIntersector(rules::CharacterSet *set) : character_set {set} {}
|
||||
};
|
||||
|
||||
using FirstCharacters = CharacterAggregator<true, false>;
|
||||
using LastCharacters = CharacterAggregator<false, true>;
|
||||
using AllCharacters = CharacterAggregator<true, true>;
|
||||
using FirstCharactersIntersector = CharacterIntersector<true, false>;
|
||||
|
||||
CompatibleTokensResult get_compatible_tokens(const LexicalGrammar &grammar) {
|
||||
CompatibleTokensResult result;
|
||||
|
||||
AllCharacters all_separator_characters;
|
||||
for (const rule_ptr &separator : grammar.separators)
|
||||
all_separator_characters.apply(separator);
|
||||
|
||||
for (size_t i = 0; i < grammar.variables.size(); i++) {
|
||||
Symbol symbol(i, Symbol::Terminal);
|
||||
rule_ptr rule = grammar.variables[i].rule;
|
||||
|
||||
FirstCharacters first_characters;
|
||||
first_characters.apply(rule);
|
||||
|
||||
LastCharacters last_characters;
|
||||
last_characters.apply(rule);
|
||||
|
||||
AllCharacters all_characters;
|
||||
all_characters.apply(rule);
|
||||
|
||||
bool has_distinct_start =
|
||||
!first_characters.result.includes_all &&
|
||||
!first_characters.result.intersects(all_separator_characters.result);
|
||||
|
||||
bool has_distinct_end =
|
||||
!last_characters.result.includes_all &&
|
||||
!last_characters.result.intersects(all_separator_characters.result);
|
||||
|
||||
bool has_no_separators =
|
||||
!all_characters.result.intersects(all_separator_characters.result);
|
||||
|
||||
if ((has_distinct_start && has_distinct_end) || has_no_separators)
|
||||
result.recovery_tokens.insert(symbol);
|
||||
|
||||
for (size_t j = 0; j < grammar.variables.size(); j++) {
|
||||
if (j == i) continue;
|
||||
Symbol other_symbol(j, Symbol::Terminal);
|
||||
FirstCharactersIntersector intersector(&first_characters.result);
|
||||
if (intersector.apply(grammar.variables[j].rule)) {
|
||||
result.unmergeable_pairs[symbol].insert(other_symbol);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
25
src/compiler/build_tables/compatible_tokens.h
Normal file
25
src/compiler/build_tables/compatible_tokens.h
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_COMPATIBLE_TOKENS_H_
|
||||
#define COMPILER_BUILD_TABLES_COMPATIBLE_TOKENS_H_
|
||||
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct LexicalGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
struct CompatibleTokensResult {
|
||||
std::set<rules::Symbol> recovery_tokens;
|
||||
std::map<rules::Symbol, std::set<rules::Symbol>> unmergeable_pairs;
|
||||
};
|
||||
|
||||
CompatibleTokensResult get_compatible_tokens(const LexicalGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_COMPATIBLE_TOKENS_H_
|
||||
|
|
@ -1,89 +0,0 @@
|
|||
#include "compiler/build_tables/recovery_tokens.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using rules::Symbol;
|
||||
using std::set;
|
||||
|
||||
template <bool left, bool right>
|
||||
class CharacterAggregator : public rules::RuleFn<void> {
|
||||
void apply_to(const rules::Seq *rule) {
|
||||
if (left)
|
||||
apply(rule->left);
|
||||
if (right)
|
||||
apply(rule->right);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Choice *rule) {
|
||||
for (const rule_ptr &element : rule->elements)
|
||||
apply(element);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Repeat *rule) {
|
||||
apply(rule->content);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Metadata *rule) {
|
||||
apply(rule->rule);
|
||||
}
|
||||
|
||||
void apply_to(const rules::CharacterSet *rule) {
|
||||
result.add_set(*rule);
|
||||
}
|
||||
|
||||
public:
|
||||
rules::CharacterSet result;
|
||||
};
|
||||
|
||||
class FirstCharacters : public CharacterAggregator<true, false> {};
|
||||
class LastCharacters : public CharacterAggregator<false, true> {};
|
||||
class AllCharacters : public CharacterAggregator<true, true> {};
|
||||
|
||||
set<Symbol> recovery_tokens(const LexicalGrammar &grammar) {
|
||||
set<Symbol> result;
|
||||
|
||||
AllCharacters all_separator_characters;
|
||||
for (const rule_ptr &separator : grammar.separators)
|
||||
all_separator_characters.apply(separator);
|
||||
|
||||
for (size_t i = 0; i < grammar.variables.size(); i++) {
|
||||
const Variable &variable = grammar.variables[i];
|
||||
rule_ptr rule = variable.rule;
|
||||
|
||||
FirstCharacters first_characters;
|
||||
first_characters.apply(variable.rule);
|
||||
|
||||
LastCharacters last_characters;
|
||||
last_characters.apply(variable.rule);
|
||||
|
||||
AllCharacters all_characters;
|
||||
all_characters.apply(variable.rule);
|
||||
|
||||
bool has_distinct_start =
|
||||
!first_characters.result.includes_all &&
|
||||
!first_characters.result.intersects(all_separator_characters.result);
|
||||
|
||||
bool has_distinct_end =
|
||||
!last_characters.result.includes_all &&
|
||||
!last_characters.result.intersects(all_separator_characters.result);
|
||||
|
||||
bool has_no_separators =
|
||||
!all_characters.result.intersects(all_separator_characters.result);
|
||||
|
||||
if ((has_distinct_start && has_distinct_end) || has_no_separators)
|
||||
result.insert(Symbol(i, Symbol::Terminal));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_DOES_MATCH_ANY_LINE_H_
|
||||
#define COMPILER_BUILD_TABLES_DOES_MATCH_ANY_LINE_H_
|
||||
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include <set>
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct LexicalGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
std::set<rules::Symbol> recovery_tokens(const LexicalGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_DOES_MATCH_ANY_LINE_H_
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_REMOVE_DUPLICATE_STATES_H_
|
||||
#define COMPILER_BUILD_TABLES_REMOVE_DUPLICATE_STATES_H_
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
template <typename TableType>
|
||||
std::map<size_t, size_t> remove_duplicate_states(TableType *table) {
|
||||
std::map<size_t, size_t> replacements;
|
||||
|
||||
while (true) {
|
||||
std::map<size_t, size_t> duplicates;
|
||||
for (size_t i = 0, size = table->states.size(); i < size; i++)
|
||||
for (size_t j = 0; j < i; j++)
|
||||
if (!duplicates.count(j) && table->merge_state(j, i)) {
|
||||
duplicates.insert({ i, j });
|
||||
break;
|
||||
}
|
||||
|
||||
if (duplicates.empty())
|
||||
break;
|
||||
|
||||
std::map<size_t, size_t> new_replacements;
|
||||
for (size_t i = 0, size = table->states.size(); i < size; i++) {
|
||||
size_t new_state_index = i;
|
||||
auto duplicate = duplicates.find(i);
|
||||
if (duplicate != duplicates.end())
|
||||
new_state_index = duplicate->second;
|
||||
|
||||
size_t prior_removed = 0;
|
||||
for (const auto &duplicate : duplicates) {
|
||||
if (duplicate.first >= new_state_index)
|
||||
break;
|
||||
prior_removed++;
|
||||
}
|
||||
|
||||
new_state_index -= prior_removed;
|
||||
new_replacements.insert({ i, new_state_index });
|
||||
replacements.insert({ i, new_state_index });
|
||||
for (auto &replacement : replacements)
|
||||
if (replacement.second == i)
|
||||
replacement.second = new_state_index;
|
||||
}
|
||||
|
||||
for (auto &state : table->states)
|
||||
state.each_referenced_state([&new_replacements](int64_t *state_index) {
|
||||
auto new_replacement = new_replacements.find(*state_index);
|
||||
if (new_replacement != new_replacements.end())
|
||||
*state_index = new_replacement->second;
|
||||
});
|
||||
|
||||
for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i)
|
||||
table->states.erase(table->states.begin() + i->first);
|
||||
}
|
||||
|
||||
return replacements;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_REMOVE_DUPLICATE_STATES_H_
|
||||
|
|
@ -57,11 +57,6 @@ bool LexState::operator==(const LexState &other) const {
|
|||
is_token_start == other.is_token_start;
|
||||
}
|
||||
|
||||
void LexState::each_referenced_state(function<void(LexStateId *)> fn) {
|
||||
for (auto &entry : advance_actions)
|
||||
fn(&entry.second.state_index);
|
||||
}
|
||||
|
||||
LexStateId LexTable::add_state() {
|
||||
states.push_back(LexState());
|
||||
return states.size() - 1;
|
||||
|
|
@ -71,8 +66,4 @@ LexState &LexTable::state(LexStateId id) {
|
|||
return states[id];
|
||||
}
|
||||
|
||||
bool LexTable::merge_state(size_t i, size_t j) {
|
||||
return states[i] == states[j];
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -54,7 +54,6 @@ class LexState {
|
|||
LexState();
|
||||
std::set<rules::CharacterSet> expected_inputs() const;
|
||||
bool operator==(const LexState &) const;
|
||||
void each_referenced_state(std::function<void(LexStateId *)>);
|
||||
|
||||
std::map<rules::CharacterSet, AdvanceAction> advance_actions;
|
||||
AcceptTokenAction accept_action;
|
||||
|
|
@ -66,8 +65,6 @@ class LexTable {
|
|||
LexStateId add_state();
|
||||
LexState &state(LexStateId state_id);
|
||||
std::vector<LexState> states;
|
||||
|
||||
bool merge_state(size_t i, size_t j);
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -201,58 +201,4 @@ void ParseTable::set_nonterminal_action(ParseStateId state_id,
|
|||
states[state_id].nonterminal_entries[lookahead] = next_state_id;
|
||||
}
|
||||
|
||||
static bool has_entry(const ParseState &state, const ParseTableEntry &entry) {
|
||||
for (const auto &pair : state.terminal_entries)
|
||||
if (pair.second == entry)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ParseTable::merge_state(size_t i, size_t j) {
|
||||
ParseState &state = states[i];
|
||||
ParseState &other = states[j];
|
||||
|
||||
if (state.nonterminal_entries != other.nonterminal_entries)
|
||||
return false;
|
||||
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
const auto &other_entry = other.terminal_entries.find(lookahead);
|
||||
if (other_entry == other.terminal_entries.end()) {
|
||||
if (mergeable_symbols.count(lookahead) == 0 && !lookahead.is_built_in())
|
||||
return false;
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(other, entry.second))
|
||||
return false;
|
||||
} else if (entry.second != other_entry->second) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
set<Symbol> symbols_to_merge;
|
||||
|
||||
for (auto &entry : other.terminal_entries) {
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
if (!state.terminal_entries.count(lookahead)) {
|
||||
if (mergeable_symbols.count(lookahead) == 0 && !lookahead.is_built_in())
|
||||
return false;
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(state, entry.second))
|
||||
return false;
|
||||
symbols_to_merge.insert(lookahead);
|
||||
}
|
||||
}
|
||||
|
||||
for (const Symbol &lookahead : symbols_to_merge)
|
||||
state.terminal_entries[lookahead] = other.terminal_entries.find(lookahead)->second;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -93,7 +93,6 @@ class ParseTable {
|
|||
ParseStateId add_state();
|
||||
ParseAction &add_terminal_action(ParseStateId state_id, rules::Symbol, ParseAction);
|
||||
void set_nonterminal_action(ParseStateId, rules::Symbol::Index, ParseStateId);
|
||||
bool merge_state(size_t i, size_t j);
|
||||
|
||||
std::vector<ParseState> states;
|
||||
std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue