Introduce Symbol::Index type alias
This commit is contained in:
parent
1fddb124b3
commit
1118a9142a
18 changed files with 130 additions and 97 deletions
|
|
@ -27,9 +27,7 @@ describe("recovery_tokens(rule)", []() {
|
|||
})),
|
||||
};
|
||||
|
||||
AssertThat(recovery_tokens(grammar), Equals<set<Symbol>>({
|
||||
Symbol(1, true),
|
||||
}));
|
||||
AssertThat(recovery_tokens(grammar), Equals<set<Symbol::Index>>({ 1 }));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
#include "spec_helper.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/build_tables/parse_item_set_builder.h"
|
||||
#include "compiler/build_tables/lookahead_set.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
|
||||
using namespace build_tables;
|
||||
using namespace rules;
|
||||
|
|
@ -10,6 +12,17 @@ using namespace rules;
|
|||
START_TEST
|
||||
|
||||
describe("ParseItemSetBuilder", []() {
|
||||
vector<Variable> lexical_variables;
|
||||
for (size_t i = 0; i < 20; i++) {
|
||||
lexical_variables.push_back(Variable{
|
||||
"token_" + to_string(i),
|
||||
VariableTypeNamed,
|
||||
blank(),
|
||||
});
|
||||
}
|
||||
|
||||
LexicalGrammar lexical_grammar{lexical_variables, {}};
|
||||
|
||||
it("adds items at the beginnings of referenced rules", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable("rule0", VariableTypeNamed, {
|
||||
|
|
@ -42,29 +55,29 @@ describe("ParseItemSetBuilder", []() {
|
|||
ParseItemSet item_set({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, true) }),
|
||||
LookaheadSet({ 10 }),
|
||||
}
|
||||
});
|
||||
|
||||
ParseItemSetBuilder item_set_builder(grammar);
|
||||
ParseItemSetBuilder item_set_builder(grammar, lexical_grammar);
|
||||
item_set_builder.apply_transitive_closure(&item_set);
|
||||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, true) })
|
||||
LookaheadSet({ 10 })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 0), 0),
|
||||
LookaheadSet({ Symbol(11, true) })
|
||||
LookaheadSet({ 11 })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 1), 0),
|
||||
LookaheadSet({ Symbol(11, true) })
|
||||
LookaheadSet({ 11 })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(2), production(2, 0), 0),
|
||||
LookaheadSet({ Symbol(11, true) })
|
||||
LookaheadSet({ 11 })
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
|
@ -93,25 +106,25 @@ describe("ParseItemSetBuilder", []() {
|
|||
ParseItemSet item_set({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, true) }),
|
||||
LookaheadSet({ 10 }),
|
||||
}
|
||||
});
|
||||
|
||||
ParseItemSetBuilder item_set_builder(grammar);
|
||||
ParseItemSetBuilder item_set_builder(grammar, lexical_grammar);
|
||||
item_set_builder.apply_transitive_closure(&item_set);
|
||||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, true) })
|
||||
LookaheadSet({ 10 })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 0), 0),
|
||||
LookaheadSet({ Symbol(11, true) })
|
||||
LookaheadSet({ 11 })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 1), 0),
|
||||
LookaheadSet({ Symbol(11, true) })
|
||||
LookaheadSet({ 11 })
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
|
|
|||
|
|
@ -91,25 +91,25 @@ describe("ParseItemSet::transitions())", [&]() {
|
|||
// Two symbols into the first production for rule_0
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 2),
|
||||
LookaheadSet({ Symbol(21, true) })
|
||||
LookaheadSet({ 21 })
|
||||
},
|
||||
|
||||
// Two symbols into the second production for rule_0
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 1), 2),
|
||||
LookaheadSet({ Symbol(21, true) })
|
||||
LookaheadSet({ 21 })
|
||||
},
|
||||
|
||||
// At the beginning of the first production for rule_1
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 0), 0),
|
||||
LookaheadSet({ Symbol(22, true) })
|
||||
LookaheadSet({ 22 })
|
||||
},
|
||||
|
||||
// At the end of the first production for rule_2
|
||||
{
|
||||
ParseItem(Symbol(2), production(2, 0), 1),
|
||||
LookaheadSet({ Symbol(22, true) })
|
||||
LookaheadSet({ 22 })
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -122,7 +122,7 @@ describe("ParseItemSet::transitions())", [&]() {
|
|||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 3),
|
||||
LookaheadSet({ Symbol(21, true) })
|
||||
LookaheadSet({ 21 })
|
||||
}
|
||||
}),
|
||||
PrecedenceRange(5, 5)
|
||||
|
|
@ -137,11 +137,11 @@ describe("ParseItemSet::transitions())", [&]() {
|
|||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 1), 3),
|
||||
LookaheadSet({ Symbol(21, true) })
|
||||
LookaheadSet({ 21 })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 0), 1),
|
||||
LookaheadSet({ Symbol(22, true) })
|
||||
LookaheadSet({ 22 })
|
||||
},
|
||||
}),
|
||||
PrecedenceRange(6, 7)
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ class LexTableBuilder {
|
|||
for (auto &entry : state.terminal_entries) {
|
||||
auto homonyms = conflict_manager.possible_homonyms.find(entry.first);
|
||||
if (homonyms != conflict_manager.possible_homonyms.end())
|
||||
for (int homonym : homonyms->second)
|
||||
for (Symbol::Index homonym : homonyms->second)
|
||||
if (state.terminal_entries.count(homonym)) {
|
||||
entry.second.reusable = false;
|
||||
break;
|
||||
|
|
@ -128,7 +128,7 @@ class LexTableBuilder {
|
|||
|
||||
auto extensions = conflict_manager.possible_extensions.find(entry.first);
|
||||
if (extensions != conflict_manager.possible_extensions.end())
|
||||
for (int extension : extensions->second)
|
||||
for (Symbol::Index extension : extensions->second)
|
||||
if (state.terminal_entries.count(extension)) {
|
||||
entry.second.depends_on_lookahead = true;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#include "compiler/build_tables/build_parse_table.h"
|
||||
#include "compiler/build_tables/build_parse_table.h"
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
|
@ -49,7 +49,7 @@ class ParseTableBuilder {
|
|||
const LexicalGrammar &lex_grammar)
|
||||
: grammar(grammar),
|
||||
lexical_grammar(lex_grammar),
|
||||
item_set_builder(grammar),
|
||||
item_set_builder(grammar, lex_grammar),
|
||||
allow_any_conflict(false) {}
|
||||
|
||||
pair<ParseTable, CompileError> build() {
|
||||
|
|
@ -64,7 +64,7 @@ class ParseTableBuilder {
|
|||
add_parse_state(ParseItemSet({
|
||||
{
|
||||
ParseItem(rules::START(), start_production, 0),
|
||||
LookaheadSet({ END_OF_INPUT() }),
|
||||
LookaheadSet({ END_OF_INPUT().index }),
|
||||
},
|
||||
}));
|
||||
|
||||
|
|
@ -111,8 +111,8 @@ class ParseTableBuilder {
|
|||
void build_error_parse_state() {
|
||||
ParseState error_state;
|
||||
|
||||
for (const Symbol &symbol : parse_table.mergeable_symbols) {
|
||||
add_out_of_context_parse_state(&error_state, symbol);
|
||||
for (const Symbol::Index index : parse_table.mergeable_symbols) {
|
||||
add_out_of_context_parse_state(&error_state, Symbol(index, true));
|
||||
}
|
||||
|
||||
for (const Symbol &symbol : grammar.extra_tokens) {
|
||||
|
|
@ -167,7 +167,7 @@ class ParseTableBuilder {
|
|||
|
||||
if (symbol.is_token) {
|
||||
ParseAction *new_action = add_terminal_action(
|
||||
state_id, symbol, ParseAction::Shift(0, precedence), item_set);
|
||||
state_id, symbol.index, ParseAction::Shift(0, precedence), item_set);
|
||||
if (new_action) {
|
||||
new_action->state_index = add_parse_state(next_item_set);
|
||||
}
|
||||
|
|
@ -193,7 +193,7 @@ class ParseTableBuilder {
|
|||
status.associativity, *item.production);
|
||||
}
|
||||
|
||||
for (const Symbol &lookahead : *lookahead_symbols.entries) {
|
||||
for (const Symbol::Index lookahead : *lookahead_symbols.entries) {
|
||||
add_terminal_action(state_id, lookahead, action, item_set);
|
||||
}
|
||||
}
|
||||
|
|
@ -253,15 +253,15 @@ class ParseTableBuilder {
|
|||
remove_duplicate_states<ParseTable>(&parse_table);
|
||||
}
|
||||
|
||||
ParseAction *add_terminal_action(ParseStateId state_id, Symbol lookahead,
|
||||
ParseAction *add_terminal_action(ParseStateId state_id, Symbol::Index lookahead,
|
||||
const ParseAction &new_action,
|
||||
const ParseItemSet &item_set) {
|
||||
const ParseState &state = parse_table.states[state_id];
|
||||
const auto ¤t_entry = state.terminal_entries.find(lookahead.index);
|
||||
const auto ¤t_entry = state.terminal_entries.find(lookahead);
|
||||
if (current_entry == state.terminal_entries.end())
|
||||
return &parse_table.set_terminal_action(state_id, lookahead.index, new_action);
|
||||
return &parse_table.set_terminal_action(state_id, lookahead, new_action);
|
||||
if (allow_any_conflict)
|
||||
return &parse_table.add_terminal_action(state_id, lookahead.index, new_action);
|
||||
return &parse_table.add_terminal_action(state_id, lookahead, new_action);
|
||||
|
||||
const ParseAction old_action = current_entry->second.actions[0];
|
||||
auto resolution = conflict_manager.resolve(new_action, old_action);
|
||||
|
|
@ -269,7 +269,7 @@ class ParseTableBuilder {
|
|||
switch (resolution.second) {
|
||||
case ConflictTypeNone:
|
||||
if (resolution.first) {
|
||||
return &parse_table.set_terminal_action(state_id, lookahead.index, new_action);
|
||||
return &parse_table.set_terminal_action(state_id, lookahead, new_action);
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
@ -277,7 +277,7 @@ class ParseTableBuilder {
|
|||
if (resolution.first) {
|
||||
if (old_action.type == ParseActionTypeReduce)
|
||||
fragile_productions.insert(old_action.production);
|
||||
return &parse_table.set_terminal_action(state_id, lookahead.index, new_action);
|
||||
return &parse_table.set_terminal_action(state_id, lookahead, new_action);
|
||||
} else {
|
||||
if (new_action.type == ParseActionTypeReduce)
|
||||
fragile_productions.insert(new_action.production);
|
||||
|
|
@ -291,7 +291,7 @@ class ParseTableBuilder {
|
|||
fragile_productions.insert(old_action.production);
|
||||
if (new_action.type == ParseActionTypeReduce)
|
||||
fragile_productions.insert(new_action.production);
|
||||
return &parse_table.add_terminal_action(state_id, lookahead.index, new_action);
|
||||
return &parse_table.add_terminal_action(state_id, lookahead, new_action);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -301,7 +301,7 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
bool handle_unresolved_conflict(const ParseItemSet &item_set,
|
||||
const Symbol &lookahead) {
|
||||
const Symbol::Index lookahead) {
|
||||
set<Symbol> involved_symbols;
|
||||
set<ParseItem> reduce_items;
|
||||
set<ParseItem> core_shift_items;
|
||||
|
|
@ -319,12 +319,12 @@ class ParseTableBuilder {
|
|||
}
|
||||
} else {
|
||||
if (item.step_index > 0) {
|
||||
set<Symbol> first_set = get_first_set(next_symbol);
|
||||
if (first_set.count(lookahead)) {
|
||||
LookaheadSet first_set = item_set_builder.get_first_set(next_symbol);
|
||||
if (first_set.contains(lookahead)) {
|
||||
involved_symbols.insert(item.lhs());
|
||||
core_shift_items.insert(item);
|
||||
}
|
||||
} else if (next_symbol == lookahead) {
|
||||
} else if (next_symbol.is_token && next_symbol.index == lookahead) {
|
||||
other_shift_items.insert(item);
|
||||
}
|
||||
}
|
||||
|
|
@ -334,7 +334,7 @@ class ParseTableBuilder {
|
|||
if (involved_symbols == conflict_set)
|
||||
return true;
|
||||
|
||||
string description = "Lookahead symbol: " + symbol_name(lookahead) + "\n";
|
||||
string description = "Lookahead symbol: " + symbol_name(Symbol(lookahead, true)) + "\n";
|
||||
|
||||
if (!reduce_items.empty()) {
|
||||
description += "Reduce items:\n";
|
||||
|
|
|
|||
|
|
@ -21,8 +21,8 @@ class LexConflictManager {
|
|||
const AcceptTokenAction &);
|
||||
bool resolve(const AcceptTokenAction &, const AcceptTokenAction &);
|
||||
|
||||
std::map<int, std::set<int>> possible_homonyms;
|
||||
std::map<int, std::set<int>> possible_extensions;
|
||||
std::map<rules::Symbol::Index, std::set<rules::Symbol::Index>> possible_homonyms;
|
||||
std::map<rules::Symbol::Index, std::set<rules::Symbol::Index>> possible_extensions;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@ using rules::Symbol;
|
|||
|
||||
LookaheadSet::LookaheadSet() : entries(nullptr) {}
|
||||
|
||||
LookaheadSet::LookaheadSet(const set<Symbol> &symbols)
|
||||
: entries(make_shared<set<Symbol>>(symbols)) {}
|
||||
LookaheadSet::LookaheadSet(const set<Symbol::Index> &symbols)
|
||||
: entries(make_shared<set<Symbol::Index>>(symbols)) {}
|
||||
|
||||
bool LookaheadSet::empty() const {
|
||||
return !entries.get() || entries->empty();
|
||||
|
|
@ -23,7 +23,7 @@ bool LookaheadSet::operator==(const LookaheadSet &other) const {
|
|||
return *entries == *other.entries;
|
||||
}
|
||||
|
||||
bool LookaheadSet::contains(const Symbol &symbol) const {
|
||||
bool LookaheadSet::contains(const Symbol::Index &symbol) const {
|
||||
return entries->find(symbol) != entries->end();
|
||||
}
|
||||
|
||||
|
|
@ -31,15 +31,15 @@ bool LookaheadSet::insert_all(const LookaheadSet &other) {
|
|||
if (!other.entries.get())
|
||||
return false;
|
||||
if (!entries.get())
|
||||
entries = make_shared<set<Symbol>>();
|
||||
entries = make_shared<set<Symbol::Index>>();
|
||||
size_t previous_size = entries->size();
|
||||
entries->insert(other.entries->begin(), other.entries->end());
|
||||
return entries->size() > previous_size;
|
||||
}
|
||||
|
||||
bool LookaheadSet::insert(const Symbol &symbol) {
|
||||
bool LookaheadSet::insert(const Symbol::Index &symbol) {
|
||||
if (!entries.get())
|
||||
entries = make_shared<set<Symbol>>();
|
||||
entries = make_shared<set<Symbol::Index>>();
|
||||
return entries->insert(symbol).second;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,15 +11,15 @@ namespace build_tables {
|
|||
class LookaheadSet {
|
||||
public:
|
||||
LookaheadSet();
|
||||
explicit LookaheadSet(const std::set<rules::Symbol> &);
|
||||
explicit LookaheadSet(const std::set<rules::Symbol::Index> &);
|
||||
|
||||
bool empty() const;
|
||||
bool operator==(const LookaheadSet &) const;
|
||||
bool contains(const rules::Symbol &) const;
|
||||
bool contains(const rules::Symbol::Index &) const;
|
||||
bool insert_all(const LookaheadSet &);
|
||||
bool insert(const rules::Symbol &);
|
||||
bool insert(const rules::Symbol::Index &);
|
||||
|
||||
std::shared_ptr<std::set<rules::Symbol>> entries;
|
||||
std::shared_ptr<std::set<rules::Symbol::Index>> entries;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -102,8 +102,8 @@ size_t ParseItemSet::Hash::operator()(const ParseItemSet &item_set) const {
|
|||
|
||||
const LookaheadSet &lookahead_set = pair.second;
|
||||
result ^= hash<size_t>()(lookahead_set.entries->size());
|
||||
for (auto &symbol : *pair.second.entries)
|
||||
result ^= hash<tree_sitter::rules::Symbol>()(symbol);
|
||||
for (Symbol::Index index : *pair.second.entries)
|
||||
result ^= hash<Symbol::Index>()(index);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include <vector>
|
||||
#include <utility>
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -19,7 +20,8 @@ using std::make_shared;
|
|||
using rules::Symbol;
|
||||
using rules::NONE;
|
||||
|
||||
static map<Symbol, LookaheadSet> build_first_sets(const SyntaxGrammar &grammar) {
|
||||
static map<Symbol, LookaheadSet> build_first_sets(const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lexical_grammar) {
|
||||
map<Symbol, LookaheadSet> result;
|
||||
vector<Symbol> symbol_stack;
|
||||
set<Symbol> processed_symbols;
|
||||
|
|
@ -35,7 +37,7 @@ static map<Symbol, LookaheadSet> build_first_sets(const SyntaxGrammar &grammar)
|
|||
Symbol current_symbol = symbol_stack.back();
|
||||
symbol_stack.pop_back();
|
||||
if (current_symbol.is_token) {
|
||||
first_set.insert(current_symbol);
|
||||
first_set.insert(current_symbol.index);
|
||||
} else if (processed_symbols.insert(current_symbol).second) {
|
||||
for (const Production &production : grammar.productions(current_symbol)) {
|
||||
if (!production.empty()) {
|
||||
|
|
@ -48,11 +50,17 @@ static map<Symbol, LookaheadSet> build_first_sets(const SyntaxGrammar &grammar)
|
|||
result.insert({symbol, first_set});
|
||||
}
|
||||
|
||||
for (int i = 0; i < lexical_grammar.variables.size(); i++) {
|
||||
Symbol symbol(i, true);
|
||||
result.insert({symbol, LookaheadSet({ i })});
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar) :
|
||||
grammar{&grammar}, first_sets{build_first_sets(grammar)} {
|
||||
ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lexical_grammar) :
|
||||
grammar{&grammar}, first_sets{build_first_sets(grammar, lexical_grammar)} {
|
||||
}
|
||||
|
||||
void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) {
|
||||
|
|
@ -88,11 +96,7 @@ void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) {
|
|||
next_lookahead_symbols = lookahead_symbols;
|
||||
} else {
|
||||
Symbol symbol_after_next = item.production->at(next_step).symbol;
|
||||
if (symbol_after_next.is_token) {
|
||||
next_lookahead_symbols.insert(symbol_after_next);
|
||||
} else {
|
||||
next_lookahead_symbols = first_sets.find(symbol_after_next)->second;
|
||||
}
|
||||
next_lookahead_symbols = first_sets.find(symbol_after_next)->second;
|
||||
}
|
||||
|
||||
// Add each of the next symbol's productions to be processed recursively.
|
||||
|
|
@ -105,5 +109,9 @@ void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) {
|
|||
}
|
||||
}
|
||||
|
||||
LookaheadSet ParseItemSetBuilder::get_first_set(rules::Symbol &symbol) const {
|
||||
return first_sets.find(symbol)->second;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@
|
|||
namespace tree_sitter {
|
||||
|
||||
struct SyntaxGrammar;
|
||||
struct LexicalGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
|
|
@ -17,8 +18,9 @@ class ParseItemSetBuilder {
|
|||
std::vector<std::tuple<ParseItem, LookaheadSet, bool>> items_to_process;
|
||||
|
||||
public:
|
||||
ParseItemSetBuilder(const SyntaxGrammar &);
|
||||
ParseItemSetBuilder(const SyntaxGrammar &, const LexicalGrammar &);
|
||||
void apply_transitive_closure(ParseItemSet *);
|
||||
LookaheadSet get_first_set(rules::Symbol &) const;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -47,8 +47,8 @@ class FirstCharacters : public CharacterAggregator<true, false> {};
|
|||
class LastCharacters : public CharacterAggregator<false, true> {};
|
||||
class AllCharacters : public CharacterAggregator<true, true> {};
|
||||
|
||||
set<Symbol> recovery_tokens(const LexicalGrammar &grammar) {
|
||||
set<Symbol> result;
|
||||
set<Symbol::Index> recovery_tokens(const LexicalGrammar &grammar) {
|
||||
set<Symbol::Index> result;
|
||||
|
||||
AllCharacters all_separator_characters;
|
||||
for (const rule_ptr &separator : grammar.separators)
|
||||
|
|
@ -79,7 +79,7 @@ set<Symbol> recovery_tokens(const LexicalGrammar &grammar) {
|
|||
!all_characters.result.intersects(all_separator_characters.result);
|
||||
|
||||
if ((has_distinct_start && has_distinct_end) || has_no_separators)
|
||||
result.insert(Symbol(i, true));
|
||||
result.insert(i);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ struct LexicalGrammar;
|
|||
|
||||
namespace build_tables {
|
||||
|
||||
std::set<rules::Symbol> recovery_tokens(const LexicalGrammar &);
|
||||
std::set<rules::Symbol::Index> recovery_tokens(const LexicalGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -167,13 +167,15 @@ ParseStateId ParseTable::add_state() {
|
|||
return states.size() - 1;
|
||||
}
|
||||
|
||||
ParseAction &ParseTable::set_terminal_action(ParseStateId state_id, int index,
|
||||
ParseAction action) {
|
||||
ParseAction &ParseTable::set_terminal_action(ParseStateId state_id,
|
||||
Symbol::Index index,
|
||||
ParseAction action) {
|
||||
states[state_id].terminal_entries[index].actions.clear();
|
||||
return add_terminal_action(state_id, index, action);
|
||||
}
|
||||
|
||||
ParseAction &ParseTable::add_terminal_action(ParseStateId state_id, int index,
|
||||
ParseAction &ParseTable::add_terminal_action(ParseStateId state_id,
|
||||
Symbol::Index index,
|
||||
ParseAction action) {
|
||||
Symbol symbol(index, true);
|
||||
if (action.type == ParseActionTypeShift && action.extra)
|
||||
|
|
@ -186,7 +188,8 @@ ParseAction &ParseTable::add_terminal_action(ParseStateId state_id, int index,
|
|||
return *entry.actions.rbegin();
|
||||
}
|
||||
|
||||
void ParseTable::set_nonterminal_action(ParseStateId state_id, int index,
|
||||
void ParseTable::set_nonterminal_action(ParseStateId state_id,
|
||||
Symbol::Index index,
|
||||
ParseStateId next_state_id) {
|
||||
Symbol symbol(index, false);
|
||||
symbols[symbol].structural = true;
|
||||
|
|
@ -208,12 +211,12 @@ bool ParseTable::merge_state(size_t i, size_t j) {
|
|||
return false;
|
||||
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
Symbol symbol(entry.first, true);
|
||||
Symbol::Index index = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
const auto &other_entry = other.terminal_entries.find(symbol.index);
|
||||
const auto &other_entry = other.terminal_entries.find(index);
|
||||
if (other_entry == other.terminal_entries.end()) {
|
||||
if (mergeable_symbols.count(symbol) == 0 && !symbol.is_built_in())
|
||||
if (mergeable_symbols.count(index) == 0 && !Symbol::is_built_in(index))
|
||||
return false;
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
|
|
@ -224,25 +227,25 @@ bool ParseTable::merge_state(size_t i, size_t j) {
|
|||
}
|
||||
}
|
||||
|
||||
set<Symbol> symbols_to_merge;
|
||||
set<Symbol::Index> symbols_to_merge;
|
||||
|
||||
for (auto &entry : other.terminal_entries) {
|
||||
Symbol symbol(entry.first, true);
|
||||
Symbol::Index index = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
if (!state.terminal_entries.count(symbol.index)) {
|
||||
if (mergeable_symbols.count(symbol) == 0 && !symbol.is_built_in())
|
||||
if (!state.terminal_entries.count(index)) {
|
||||
if (mergeable_symbols.count(index) == 0 && !Symbol::is_built_in(index))
|
||||
return false;
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(state, entry.second))
|
||||
return false;
|
||||
symbols_to_merge.insert(symbol);
|
||||
symbols_to_merge.insert(index);
|
||||
}
|
||||
}
|
||||
|
||||
for (const Symbol &symbol : symbols_to_merge)
|
||||
state.terminal_entries[symbol.index] = other.terminal_entries.find(symbol.index)->second;
|
||||
for (const Symbol::Index &index : symbols_to_merge)
|
||||
state.terminal_entries[index] = other.terminal_entries.find(index)->second;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -75,8 +75,8 @@ class ParseState {
|
|||
void each_referenced_state(std::function<void(ParseStateId *)>);
|
||||
bool has_shift_action() const;
|
||||
|
||||
std::map<int, ParseTableEntry> terminal_entries;
|
||||
std::map<int, ParseStateId> nonterminal_entries;
|
||||
std::map<rules::Symbol::Index, ParseTableEntry> terminal_entries;
|
||||
std::map<rules::Symbol::Index, ParseStateId> nonterminal_entries;
|
||||
LexStateId lex_state_id;
|
||||
};
|
||||
|
||||
|
|
@ -97,7 +97,7 @@ class ParseTable {
|
|||
std::vector<ParseState> states;
|
||||
std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;
|
||||
|
||||
std::set<rules::Symbol> mergeable_symbols;
|
||||
std::set<rules::Symbol::Index> mergeable_symbols;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -10,9 +10,9 @@ using std::string;
|
|||
using std::to_string;
|
||||
using std::hash;
|
||||
|
||||
Symbol::Symbol(int index) : index(index), is_token(false) {}
|
||||
Symbol::Symbol(Symbol::Index index) : index(index), is_token(false) {}
|
||||
|
||||
Symbol::Symbol(int index, bool is_token) : index(index), is_token(is_token) {}
|
||||
Symbol::Symbol(Symbol::Index index, bool is_token) : index(index), is_token(is_token) {}
|
||||
|
||||
bool Symbol::operator==(const Symbol &other) const {
|
||||
return (other.index == index) && (other.is_token == is_token);
|
||||
|
|
@ -24,7 +24,7 @@ bool Symbol::operator==(const Rule &rule) const {
|
|||
}
|
||||
|
||||
size_t Symbol::hash_code() const {
|
||||
return hash<int>()(index) ^ hash<bool>()(is_token);
|
||||
return hash<Symbol::Index>()(index) ^ hash<bool>()(is_token);
|
||||
}
|
||||
|
||||
rule_ptr Symbol::copy() const {
|
||||
|
|
@ -44,10 +44,14 @@ bool Symbol::operator<(const Symbol &other) const {
|
|||
return (index < other.index);
|
||||
}
|
||||
|
||||
bool Symbol::is_built_in() const {
|
||||
bool Symbol::is_built_in(Symbol::Index index) {
|
||||
return index < 0;
|
||||
}
|
||||
|
||||
bool Symbol::is_built_in() const {
|
||||
return is_built_in(index);
|
||||
}
|
||||
|
||||
void Symbol::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,8 +9,11 @@ namespace rules {
|
|||
|
||||
class Symbol : public Rule {
|
||||
public:
|
||||
explicit Symbol(int index);
|
||||
Symbol(int index, bool is_token);
|
||||
typedef int Index;
|
||||
|
||||
|
||||
explicit Symbol(Index index);
|
||||
Symbol(Index index, bool is_token);
|
||||
|
||||
bool operator==(const Symbol &other) const;
|
||||
bool operator==(const Rule &other) const;
|
||||
|
|
@ -21,9 +24,10 @@ class Symbol : public Rule {
|
|||
void accept(Visitor *visitor) const;
|
||||
|
||||
bool operator<(const Symbol &other) const;
|
||||
static bool is_built_in(Index);
|
||||
bool is_built_in() const;
|
||||
|
||||
int index;
|
||||
Index index;
|
||||
bool is_token;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -840,9 +840,10 @@ static StackIterateAction parser__repair_consumed_error_callback(
|
|||
SkipPrecedingTokensSession *session = payload;
|
||||
Parser *self = session->parser;
|
||||
TSSymbol lookahead_symbol = session->lookahead_symbol;
|
||||
const TSParseAction *action =
|
||||
ts_language_last_action(self->language, state, lookahead_symbol);
|
||||
if (action && action->type == TSParseActionTypeReduce) {
|
||||
size_t action_count;
|
||||
const TSParseAction *actions =
|
||||
ts_language_actions(self->language, state, lookahead_symbol, &action_count);
|
||||
if (action_count > 0 && actions[0].type == TSParseActionTypeReduce) {
|
||||
return StackIteratePop | StackIterateStop;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue