Merge pull request #46 from tree-sitter/no-reductions-on-nonterminal-lookaheads
Don't include reduce actions for nonterminal lookaheads
This commit is contained in:
commit
11e767bd81
34 changed files with 454 additions and 386 deletions
|
|
@ -14,7 +14,6 @@ typedef unsigned short TSStateId;
|
|||
|
||||
#define ts_builtin_sym_error ((TSSymbol)-1)
|
||||
#define ts_builtin_sym_end 0
|
||||
#define ts_builtin_sym_start 1
|
||||
|
||||
typedef struct {
|
||||
bool visible : 1;
|
||||
|
|
@ -60,6 +59,7 @@ typedef union {
|
|||
|
||||
typedef struct TSLanguage {
|
||||
size_t symbol_count;
|
||||
size_t token_count;
|
||||
const char **symbol_names;
|
||||
const TSSymbolMetadata *symbol_metadata;
|
||||
const unsigned short *parse_table;
|
||||
|
|
@ -103,6 +103,9 @@ typedef struct TSLanguage {
|
|||
* Parse Table Macros
|
||||
*/
|
||||
|
||||
#define STATE(id) id
|
||||
#define ACTIONS(id) id
|
||||
|
||||
#define SHIFT(to_state_value) \
|
||||
{ \
|
||||
{ \
|
||||
|
|
@ -146,6 +149,7 @@ typedef struct TSLanguage {
|
|||
#define EXPORT_LANGUAGE(language_name) \
|
||||
static TSLanguage language = { \
|
||||
.symbol_count = SYMBOL_COUNT, \
|
||||
.token_count = TOKEN_COUNT, \
|
||||
.symbol_metadata = ts_symbol_metadata, \
|
||||
.parse_table = (const unsigned short *)ts_parse_table, \
|
||||
.parse_actions = ts_parse_actions, \
|
||||
|
|
|
|||
|
|
@ -15,12 +15,12 @@
|
|||
'src/compiler/build_tables/build_parse_table.cc',
|
||||
'src/compiler/build_tables/build_tables.cc',
|
||||
'src/compiler/build_tables/recovery_tokens.cc',
|
||||
'src/compiler/build_tables/item_set_closure.cc',
|
||||
'src/compiler/build_tables/lex_item.cc',
|
||||
'src/compiler/build_tables/lex_item_transitions.cc',
|
||||
'src/compiler/build_tables/lex_conflict_manager.cc',
|
||||
'src/compiler/build_tables/lookahead_set.cc',
|
||||
'src/compiler/build_tables/parse_item.cc',
|
||||
'src/compiler/build_tables/parse_item_set_builder.cc',
|
||||
'src/compiler/build_tables/parse_conflict_manager.cc',
|
||||
'src/compiler/build_tables/rule_can_be_blank.cc',
|
||||
'src/compiler/compile.cc',
|
||||
|
|
|
|||
|
|
@ -27,9 +27,7 @@ describe("recovery_tokens(rule)", []() {
|
|||
})),
|
||||
};
|
||||
|
||||
AssertThat(recovery_tokens(grammar), Equals<set<Symbol>>({
|
||||
Symbol(1, true),
|
||||
}));
|
||||
AssertThat(recovery_tokens(grammar), Equals<set<Symbol::Index>>({ 1 }));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#include "spec_helper.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
|
|
@ -36,7 +37,7 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() {
|
|||
|
||||
it("adds the preferred token as a possible homonym for the discarded one", [&]() {
|
||||
conflict_manager.resolve(AcceptTokenAction(sym2, 1, false), AcceptTokenAction(sym1, 2, false));
|
||||
AssertThat(conflict_manager.possible_homonyms[sym2], Contains(sym1));
|
||||
AssertThat(conflict_manager.possible_homonyms[sym2.index], Contains(sym1.index));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -78,7 +79,7 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() {
|
|||
|
||||
it("adds the in-progress tokens as possible extensions of the discarded token", [&]() {
|
||||
conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 3, true));
|
||||
AssertThat(conflict_manager.possible_extensions[sym3], Contains(sym4));
|
||||
AssertThat(conflict_manager.possible_extensions[sym3.index], Contains(sym4.index));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,15 +1,28 @@
|
|||
#include "spec_helper.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/build_tables/parse_item_set_builder.h"
|
||||
#include "compiler/build_tables/lookahead_set.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
|
||||
using namespace build_tables;
|
||||
using namespace rules;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("item_set_closure", []() {
|
||||
describe("ParseItemSetBuilder", []() {
|
||||
vector<Variable> lexical_variables;
|
||||
for (size_t i = 0; i < 20; i++) {
|
||||
lexical_variables.push_back(Variable{
|
||||
"token_" + to_string(i),
|
||||
VariableTypeNamed,
|
||||
blank(),
|
||||
});
|
||||
}
|
||||
|
||||
LexicalGrammar lexical_grammar{lexical_variables, {}};
|
||||
|
||||
it("adds items at the beginnings of referenced rules", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable("rule0", VariableTypeNamed, {
|
||||
|
|
@ -39,29 +52,32 @@ describe("item_set_closure", []() {
|
|||
return grammar.variables[variable_index].productions[production_index];
|
||||
};
|
||||
|
||||
ParseItemSet item_set = item_set_closure(ParseItemSet({
|
||||
ParseItemSet item_set({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, true) }),
|
||||
LookaheadSet({ 10 }),
|
||||
}
|
||||
}), grammar);
|
||||
});
|
||||
|
||||
ParseItemSetBuilder item_set_builder(grammar, lexical_grammar);
|
||||
item_set_builder.apply_transitive_closure(&item_set);
|
||||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, true) })
|
||||
LookaheadSet({ 10 })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 0), 0),
|
||||
LookaheadSet({ Symbol(11, true) })
|
||||
LookaheadSet({ 11 })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 1), 0),
|
||||
LookaheadSet({ Symbol(11, true) })
|
||||
LookaheadSet({ 11 })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(2), production(2, 0), 0),
|
||||
LookaheadSet({ Symbol(11, true) })
|
||||
LookaheadSet({ 11 })
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
|
@ -87,25 +103,28 @@ describe("item_set_closure", []() {
|
|||
return grammar.variables[variable_index].productions[production_index];
|
||||
};
|
||||
|
||||
ParseItemSet item_set = item_set_closure(ParseItemSet({
|
||||
ParseItemSet item_set({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, true) }),
|
||||
LookaheadSet({ 10 }),
|
||||
}
|
||||
}), grammar);
|
||||
});
|
||||
|
||||
ParseItemSetBuilder item_set_builder(grammar, lexical_grammar);
|
||||
item_set_builder.apply_transitive_closure(&item_set);
|
||||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, true) })
|
||||
LookaheadSet({ 10 })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 0), 0),
|
||||
LookaheadSet({ Symbol(11, true) })
|
||||
LookaheadSet({ 11 })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 1), 0),
|
||||
LookaheadSet({ Symbol(11, true) })
|
||||
LookaheadSet({ 11 })
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
|
@ -91,25 +91,25 @@ describe("ParseItemSet::transitions())", [&]() {
|
|||
// Two symbols into the first production for rule_0
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 2),
|
||||
LookaheadSet({ Symbol(21, true) })
|
||||
LookaheadSet({ 21 })
|
||||
},
|
||||
|
||||
// Two symbols into the second production for rule_0
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 1), 2),
|
||||
LookaheadSet({ Symbol(21, true) })
|
||||
LookaheadSet({ 21 })
|
||||
},
|
||||
|
||||
// At the beginning of the first production for rule_1
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 0), 0),
|
||||
LookaheadSet({ Symbol(22, true) })
|
||||
LookaheadSet({ 22 })
|
||||
},
|
||||
|
||||
// At the end of the first production for rule_2
|
||||
{
|
||||
ParseItem(Symbol(2), production(2, 0), 1),
|
||||
LookaheadSet({ Symbol(22, true) })
|
||||
LookaheadSet({ 22 })
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -122,7 +122,7 @@ describe("ParseItemSet::transitions())", [&]() {
|
|||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 3),
|
||||
LookaheadSet({ Symbol(21, true) })
|
||||
LookaheadSet({ 21 })
|
||||
}
|
||||
}),
|
||||
PrecedenceRange(5, 5)
|
||||
|
|
@ -137,11 +137,11 @@ describe("ParseItemSet::transitions())", [&]() {
|
|||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 1), 3),
|
||||
LookaheadSet({ Symbol(21, true) })
|
||||
LookaheadSet({ 21 })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 0), 1),
|
||||
LookaheadSet({ Symbol(22, true) })
|
||||
LookaheadSet({ 22 })
|
||||
},
|
||||
}),
|
||||
PrecedenceRange(6, 7)
|
||||
|
|
|
|||
3
spec/fixtures/error_corpus/c_errors.txt
vendored
3
spec/fixtures/error_corpus/c_errors.txt
vendored
|
|
@ -127,6 +127,5 @@ int b() {
|
|||
(ERROR (identifier) (identifier))
|
||||
(identifier) (number_literal)))
|
||||
(declaration
|
||||
(ERROR (identifier) (identifier))
|
||||
(identifier)
|
||||
(init_declarator (identifier) (number_literal))))))
|
||||
(init_declarator (ERROR (identifier) (identifier)) (identifier) (number_literal))))))
|
||||
|
|
|
|||
|
|
@ -14,10 +14,8 @@ e f;
|
|||
(ERROR (identifier))
|
||||
(identifier)
|
||||
(statement_block
|
||||
(ERROR (identifier))
|
||||
(expression_statement (identifier))))
|
||||
(ERROR (identifier))
|
||||
(expression_statement (identifier)))
|
||||
(expression_statement (ERROR (identifier)) (identifier))))
|
||||
(expression_statement (ERROR (identifier)) (identifier)))
|
||||
|
||||
=======================================================
|
||||
multiple invalid tokens right after the viable prefix
|
||||
|
|
@ -35,8 +33,7 @@ h i j k;
|
|||
(ERROR (identifier) (identifier))
|
||||
(identifier)
|
||||
(statement_block
|
||||
(ERROR (identifier) (identifier) (identifier))
|
||||
(expression_statement (identifier))))
|
||||
(expression_statement (ERROR (identifier) (identifier) (identifier)) (identifier))))
|
||||
(expression_statement
|
||||
(ERROR (identifier) (identifier) (identifier))
|
||||
(identifier)))
|
||||
|
|
|
|||
|
|
@ -75,21 +75,15 @@ ostream &operator<<(ostream &stream, const ParseAction &action) {
|
|||
}
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const ParseTableEntry &entry) {
|
||||
return stream << entry.actions;
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const ParseState &state) {
|
||||
stream << string("#<parse_state ");
|
||||
bool started = false;
|
||||
for (auto entry : state.entries) {
|
||||
if (started)
|
||||
stream << string(", ");
|
||||
stream << entry.first << string(" => {");
|
||||
for (auto &action : entry.second.actions) {
|
||||
stream << string(" ") << action;
|
||||
}
|
||||
stream << string("}");
|
||||
started = true;
|
||||
}
|
||||
stream << string(">");
|
||||
return stream;
|
||||
stream << string("#<parse_state terminal_entries:");
|
||||
stream << state.terminal_entries;
|
||||
stream << " nonterminal_entries: " << state.nonterminal_entries;
|
||||
return stream << string(">");
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const ProductionStep &step) {
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ void assert_consistent(const Tree *tree) {
|
|||
START_TEST
|
||||
|
||||
enum {
|
||||
cat = ts_builtin_sym_start,
|
||||
cat = 1,
|
||||
dog,
|
||||
eel,
|
||||
fox,
|
||||
|
|
|
|||
|
|
@ -114,14 +114,11 @@ class LexTableBuilder {
|
|||
|
||||
void mark_fragile_tokens() {
|
||||
for (ParseState &state : parse_table->states) {
|
||||
for (auto &entry : state.entries) {
|
||||
if (!entry.first.is_token)
|
||||
continue;
|
||||
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
auto homonyms = conflict_manager.possible_homonyms.find(entry.first);
|
||||
if (homonyms != conflict_manager.possible_homonyms.end())
|
||||
for (const Symbol &homonym : homonyms->second)
|
||||
if (state.entries.count(homonym)) {
|
||||
for (Symbol::Index homonym : homonyms->second)
|
||||
if (state.terminal_entries.count(homonym)) {
|
||||
entry.second.reusable = false;
|
||||
break;
|
||||
}
|
||||
|
|
@ -131,8 +128,8 @@ class LexTableBuilder {
|
|||
|
||||
auto extensions = conflict_manager.possible_extensions.find(entry.first);
|
||||
if (extensions != conflict_manager.possible_extensions.end())
|
||||
for (const Symbol &extension : extensions->second)
|
||||
if (state.entries.count(extension)) {
|
||||
for (Symbol::Index extension : extensions->second)
|
||||
if (state.terminal_entries.count(extension)) {
|
||||
entry.second.depends_on_lookahead = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -147,7 +144,7 @@ class LexTableBuilder {
|
|||
}
|
||||
|
||||
auto replacements =
|
||||
remove_duplicate_states<LexTable, AdvanceAction>(&lex_table);
|
||||
remove_duplicate_states<LexTable>(&lex_table);
|
||||
|
||||
for (ParseState &parse_state : parse_table->states) {
|
||||
auto replacement = replacements.find(parse_state.lex_state_id);
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#include "compiler/build_tables/build_parse_table.h"
|
||||
#include "compiler/build_tables/build_parse_table.h"
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
|
@ -9,7 +9,7 @@
|
|||
#include "compiler/build_tables/parse_conflict_manager.h"
|
||||
#include "compiler/build_tables/remove_duplicate_states.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include "compiler/build_tables/parse_item_set_builder.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
|
@ -40,6 +40,7 @@ class ParseTableBuilder {
|
|||
vector<pair<ParseItemSet, ParseStateId>> item_sets_to_process;
|
||||
ParseTable parse_table;
|
||||
set<string> conflicts;
|
||||
ParseItemSetBuilder item_set_builder;
|
||||
set<const Production *> fragile_productions;
|
||||
bool allow_any_conflict;
|
||||
|
||||
|
|
@ -48,6 +49,7 @@ class ParseTableBuilder {
|
|||
const LexicalGrammar &lex_grammar)
|
||||
: grammar(grammar),
|
||||
lexical_grammar(lex_grammar),
|
||||
item_set_builder(grammar, lex_grammar),
|
||||
allow_any_conflict(false) {}
|
||||
|
||||
pair<ParseTable, CompileError> build() {
|
||||
|
|
@ -62,7 +64,7 @@ class ParseTableBuilder {
|
|||
add_parse_state(ParseItemSet({
|
||||
{
|
||||
ParseItem(rules::START(), start_production, 0),
|
||||
LookaheadSet({ END_OF_INPUT() }),
|
||||
LookaheadSet({ END_OF_INPUT().index }),
|
||||
},
|
||||
}));
|
||||
|
||||
|
|
@ -88,11 +90,11 @@ class ParseTableBuilder {
|
|||
CompileError process_part_state_queue() {
|
||||
while (!item_sets_to_process.empty()) {
|
||||
auto pair = item_sets_to_process.back();
|
||||
ParseItemSet item_set = item_set_closure(pair.first, grammar);
|
||||
|
||||
ParseItemSet &item_set = pair.first;
|
||||
ParseStateId state_id = pair.second;
|
||||
item_sets_to_process.pop_back();
|
||||
|
||||
item_set_builder.apply_transitive_closure(&item_set);
|
||||
add_reduce_actions(item_set, state_id);
|
||||
add_shift_actions(item_set, state_id);
|
||||
add_shift_extra_actions(state_id);
|
||||
|
|
@ -109,22 +111,21 @@ class ParseTableBuilder {
|
|||
void build_error_parse_state() {
|
||||
ParseState error_state;
|
||||
|
||||
for (const Symbol &symbol : parse_table.mergeable_symbols) {
|
||||
add_out_of_context_parse_state(&error_state, symbol);
|
||||
for (const Symbol::Index index : parse_table.mergeable_symbols) {
|
||||
add_out_of_context_parse_state(&error_state, Symbol(index, true));
|
||||
}
|
||||
|
||||
for (const Symbol &symbol : grammar.extra_tokens) {
|
||||
if (!error_state.entries.count(symbol)) {
|
||||
error_state.entries[symbol].actions.push_back(ParseAction::ShiftExtra());
|
||||
if (!error_state.terminal_entries.count(symbol.index)) {
|
||||
error_state.terminal_entries[symbol.index].actions.push_back(ParseAction::ShiftExtra());
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < grammar.variables.size(); i++) {
|
||||
Symbol symbol(i, false);
|
||||
add_out_of_context_parse_state(&error_state, symbol);
|
||||
add_out_of_context_parse_state(&error_state, Symbol(i, false));
|
||||
}
|
||||
|
||||
error_state.entries[END_OF_INPUT()].actions.push_back(ParseAction::Recover(0));
|
||||
error_state.terminal_entries[END_OF_INPUT().index].actions.push_back(ParseAction::Recover(0));
|
||||
parse_table.states[0] = error_state;
|
||||
}
|
||||
|
||||
|
|
@ -133,7 +134,11 @@ class ParseTableBuilder {
|
|||
const ParseItemSet &item_set = recovery_states[symbol];
|
||||
if (!item_set.entries.empty()) {
|
||||
ParseStateId state = add_parse_state(item_set);
|
||||
error_state->entries[symbol].actions.push_back(ParseAction::Recover(state));
|
||||
if (symbol.is_token) {
|
||||
error_state->terminal_entries[symbol.index].actions.assign({ ParseAction::Recover(state) });
|
||||
} else {
|
||||
error_state->nonterminal_entries[symbol.index] = state;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -143,7 +148,7 @@ class ParseTableBuilder {
|
|||
ParseStateId state_id = parse_table.add_state();
|
||||
|
||||
parse_state_ids[item_set] = state_id;
|
||||
item_sets_to_process.push_back({ item_set, state_id });
|
||||
item_sets_to_process.push_back({ std::move(item_set), state_id });
|
||||
return state_id;
|
||||
} else {
|
||||
return pair->second;
|
||||
|
|
@ -156,14 +161,19 @@ class ParseTableBuilder {
|
|||
const ParseItemSet &next_item_set = transition.second.first;
|
||||
const PrecedenceRange &precedence = transition.second.second;
|
||||
|
||||
ParseAction *new_action = add_action(
|
||||
state_id, symbol, ParseAction::Shift(0, precedence), item_set);
|
||||
|
||||
if (!allow_any_conflict)
|
||||
if (!allow_any_conflict) {
|
||||
recovery_states[symbol].add(next_item_set);
|
||||
}
|
||||
|
||||
if (new_action)
|
||||
new_action->state_index = add_parse_state(next_item_set);
|
||||
if (symbol.is_token) {
|
||||
ParseAction *new_action = add_terminal_action(
|
||||
state_id, symbol.index, ParseAction::Shift(0, precedence), item_set);
|
||||
if (new_action) {
|
||||
new_action->state_index = add_parse_state(next_item_set);
|
||||
}
|
||||
} else {
|
||||
parse_table.set_nonterminal_action(state_id, symbol.index, add_parse_state(next_item_set));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -183,8 +193,9 @@ class ParseTableBuilder {
|
|||
status.associativity, *item.production);
|
||||
}
|
||||
|
||||
for (const auto &lookahead_sym : *lookahead_symbols.entries)
|
||||
add_action(state_id, lookahead_sym, action, item_set);
|
||||
for (const Symbol::Index lookahead : *lookahead_symbols.entries) {
|
||||
add_terminal_action(state_id, lookahead, action, item_set);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -193,24 +204,25 @@ class ParseTableBuilder {
|
|||
ParseAction action = ParseAction::ShiftExtra();
|
||||
ParseState &state = parse_table.states[state_id];
|
||||
for (const Symbol &extra_symbol : grammar.extra_tokens)
|
||||
if (!state.entries.count(extra_symbol) || state.has_shift_action() ||
|
||||
allow_any_conflict)
|
||||
parse_table.add_action(state_id, extra_symbol, action);
|
||||
if (!state.terminal_entries.count(extra_symbol.index) ||
|
||||
state.has_shift_action() || allow_any_conflict)
|
||||
parse_table.add_terminal_action(state_id, extra_symbol.index, action);
|
||||
}
|
||||
|
||||
void mark_fragile_actions() {
|
||||
for (ParseState &state : parse_table.states) {
|
||||
set<Symbol> symbols_with_multiple_actions;
|
||||
|
||||
for (auto &entry : state.entries) {
|
||||
const Symbol &symbol = entry.first;
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
const Symbol symbol(entry.first, true);
|
||||
auto &actions = entry.second.actions;
|
||||
|
||||
if (actions.size() > 1)
|
||||
if (actions.size() > 1) {
|
||||
symbols_with_multiple_actions.insert(symbol);
|
||||
}
|
||||
|
||||
for (ParseAction &action : actions) {
|
||||
if (action.type == ParseActionTypeReduce && !action.extra) {
|
||||
if (action.type == ParseActionTypeReduce) {
|
||||
if (has_fragile_production(action.production))
|
||||
action.fragile = true;
|
||||
|
||||
|
|
@ -229,21 +241,8 @@ class ParseTableBuilder {
|
|||
break;
|
||||
}
|
||||
}
|
||||
if (!erased)
|
||||
if (!erased) {
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
if (!symbols_with_multiple_actions.empty()) {
|
||||
for (auto &entry : state.entries) {
|
||||
if (!entry.first.is_token) {
|
||||
set<Symbol> first_set = get_first_set(entry.first);
|
||||
for (const Symbol &symbol : symbols_with_multiple_actions) {
|
||||
if (first_set.count(symbol)) {
|
||||
entry.second.reusable = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -251,33 +250,34 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
void remove_duplicate_parse_states() {
|
||||
remove_duplicate_states<ParseTable, ParseAction>(&parse_table);
|
||||
remove_duplicate_states<ParseTable>(&parse_table);
|
||||
}
|
||||
|
||||
ParseAction *add_action(ParseStateId state_id, Symbol lookahead,
|
||||
const ParseAction &new_action,
|
||||
const ParseItemSet &item_set) {
|
||||
ParseAction *add_terminal_action(ParseStateId state_id, Symbol::Index lookahead,
|
||||
const ParseAction &new_action,
|
||||
const ParseItemSet &item_set) {
|
||||
const ParseState &state = parse_table.states[state_id];
|
||||
const auto ¤t_entry = state.entries.find(lookahead);
|
||||
if (current_entry == state.entries.end())
|
||||
return &parse_table.set_action(state_id, lookahead, new_action);
|
||||
const auto ¤t_entry = state.terminal_entries.find(lookahead);
|
||||
if (current_entry == state.terminal_entries.end())
|
||||
return &parse_table.set_terminal_action(state_id, lookahead, new_action);
|
||||
if (allow_any_conflict)
|
||||
return &parse_table.add_action(state_id, lookahead, new_action);
|
||||
return &parse_table.add_terminal_action(state_id, lookahead, new_action);
|
||||
|
||||
const ParseAction old_action = current_entry->second.actions[0];
|
||||
auto resolution = conflict_manager.resolve(new_action, old_action);
|
||||
|
||||
switch (resolution.second) {
|
||||
case ConflictTypeNone:
|
||||
if (resolution.first)
|
||||
return &parse_table.set_action(state_id, lookahead, new_action);
|
||||
if (resolution.first) {
|
||||
return &parse_table.set_terminal_action(state_id, lookahead, new_action);
|
||||
}
|
||||
break;
|
||||
|
||||
case ConflictTypeResolved: {
|
||||
if (resolution.first) {
|
||||
if (old_action.type == ParseActionTypeReduce)
|
||||
fragile_productions.insert(old_action.production);
|
||||
return &parse_table.set_action(state_id, lookahead, new_action);
|
||||
return &parse_table.set_terminal_action(state_id, lookahead, new_action);
|
||||
} else {
|
||||
if (new_action.type == ParseActionTypeReduce)
|
||||
fragile_productions.insert(new_action.production);
|
||||
|
|
@ -291,7 +291,7 @@ class ParseTableBuilder {
|
|||
fragile_productions.insert(old_action.production);
|
||||
if (new_action.type == ParseActionTypeReduce)
|
||||
fragile_productions.insert(new_action.production);
|
||||
return &parse_table.add_action(state_id, lookahead, new_action);
|
||||
return &parse_table.add_terminal_action(state_id, lookahead, new_action);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -301,7 +301,7 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
bool handle_unresolved_conflict(const ParseItemSet &item_set,
|
||||
const Symbol &lookahead) {
|
||||
const Symbol::Index lookahead) {
|
||||
set<Symbol> involved_symbols;
|
||||
set<ParseItem> reduce_items;
|
||||
set<ParseItem> core_shift_items;
|
||||
|
|
@ -319,12 +319,12 @@ class ParseTableBuilder {
|
|||
}
|
||||
} else {
|
||||
if (item.step_index > 0) {
|
||||
set<Symbol> first_set = get_first_set(next_symbol);
|
||||
if (first_set.count(lookahead)) {
|
||||
LookaheadSet first_set = item_set_builder.get_first_set(next_symbol);
|
||||
if (first_set.contains(lookahead)) {
|
||||
involved_symbols.insert(item.lhs());
|
||||
core_shift_items.insert(item);
|
||||
}
|
||||
} else if (next_symbol == lookahead) {
|
||||
} else if (next_symbol.is_token && next_symbol.index == lookahead) {
|
||||
other_shift_items.insert(item);
|
||||
}
|
||||
}
|
||||
|
|
@ -334,7 +334,7 @@ class ParseTableBuilder {
|
|||
if (involved_symbols == conflict_set)
|
||||
return true;
|
||||
|
||||
string description = "Lookahead symbol: " + symbol_name(lookahead) + "\n";
|
||||
string description = "Lookahead symbol: " + symbol_name(Symbol(lookahead, true)) + "\n";
|
||||
|
||||
if (!reduce_items.empty()) {
|
||||
description += "Reduce items:\n";
|
||||
|
|
|
|||
|
|
@ -1,80 +0,0 @@
|
|||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
using std::shared_ptr;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
using rules::NONE;
|
||||
|
||||
ParseItemSet item_set_closure(const ParseItemSet &input_item_set,
|
||||
const SyntaxGrammar &grammar) {
|
||||
ParseItemSet result;
|
||||
|
||||
// An item set's closure is defined recursively. Use an explicit stack to
|
||||
// store the recursively-added items.
|
||||
vector<pair<ParseItem, LookaheadSet>> items_to_process(
|
||||
input_item_set.entries.begin(), input_item_set.entries.end());
|
||||
|
||||
while (!items_to_process.empty()) {
|
||||
ParseItem item = items_to_process.back().first;
|
||||
LookaheadSet lookahead_symbols = items_to_process.back().second;
|
||||
items_to_process.pop_back();
|
||||
|
||||
// Add the parse-item and lookahead symbols to the item set.
|
||||
// If they were already present, skip to the next item.
|
||||
if (!result.entries[item].insert_all(lookahead_symbols))
|
||||
continue;
|
||||
|
||||
// If the next symbol in the production is not a non-terminal, skip to the
|
||||
// next item.
|
||||
Symbol next_symbol = item.next_symbol();
|
||||
if (next_symbol == NONE() || next_symbol.is_token ||
|
||||
next_symbol.is_built_in())
|
||||
continue;
|
||||
|
||||
// If the next symbol is the last symbol in the item's production, then the
|
||||
// lookahead symbols for the new items are the same as for the current item.
|
||||
// Otherwise, compute the FOLLOW-SET of the symbol in this production. This
|
||||
// is defined recursively as well, so use another queue to store the
|
||||
// recursively-added follow symbols.
|
||||
LookaheadSet next_lookahead_symbols;
|
||||
size_t next_step = item.step_index + 1;
|
||||
if (next_step == item.production->size()) {
|
||||
next_lookahead_symbols = lookahead_symbols;
|
||||
} else {
|
||||
vector<Symbol> symbols_to_process(
|
||||
{ item.production->at(next_step).symbol });
|
||||
while (!symbols_to_process.empty()) {
|
||||
Symbol symbol = symbols_to_process.back();
|
||||
symbols_to_process.pop_back();
|
||||
|
||||
if (!next_lookahead_symbols.insert(symbol))
|
||||
continue;
|
||||
|
||||
for (const Production &production : grammar.productions(symbol))
|
||||
if (!production.empty())
|
||||
symbols_to_process.push_back(production[0].symbol);
|
||||
}
|
||||
}
|
||||
|
||||
// Add each of the next symbol's productions to be processed recursively.
|
||||
for (const Production &production : grammar.productions(next_symbol))
|
||||
items_to_process.push_back({
|
||||
ParseItem(next_symbol, production, 0), next_lookahead_symbols,
|
||||
});
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_
|
||||
#define COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_
|
||||
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct SyntaxGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
ParseItemSet item_set_closure(const ParseItemSet &, const SyntaxGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_
|
||||
|
|
@ -14,7 +14,7 @@ bool LexConflictManager::resolve(const LexItemSet &item_set,
|
|||
return true;
|
||||
if (new_action.precedence_range.max >= old_action.precedence) {
|
||||
for (const LexItem &item : item_set.entries)
|
||||
possible_extensions[old_action.symbol].insert(item.lhs);
|
||||
possible_extensions[old_action.symbol.index].insert(item.lhs.index);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
|
|
@ -44,9 +44,9 @@ bool LexConflictManager::resolve(const AcceptTokenAction &new_action,
|
|||
result = false;
|
||||
|
||||
if (result)
|
||||
possible_homonyms[old_action.symbol].insert(new_action.symbol);
|
||||
possible_homonyms[old_action.symbol.index].insert(new_action.symbol.index);
|
||||
else
|
||||
possible_homonyms[new_action.symbol].insert(old_action.symbol);
|
||||
possible_homonyms[new_action.symbol.index].insert(old_action.symbol.index);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,8 +21,8 @@ class LexConflictManager {
|
|||
const AcceptTokenAction &);
|
||||
bool resolve(const AcceptTokenAction &, const AcceptTokenAction &);
|
||||
|
||||
std::map<rules::Symbol, std::set<rules::Symbol>> possible_homonyms;
|
||||
std::map<rules::Symbol, std::set<rules::Symbol>> possible_extensions;
|
||||
std::map<rules::Symbol::Index, std::set<rules::Symbol::Index>> possible_homonyms;
|
||||
std::map<rules::Symbol::Index, std::set<rules::Symbol::Index>> possible_extensions;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@ using rules::Symbol;
|
|||
|
||||
LookaheadSet::LookaheadSet() : entries(nullptr) {}
|
||||
|
||||
LookaheadSet::LookaheadSet(const set<Symbol> &symbols)
|
||||
: entries(make_shared<set<Symbol>>(symbols)) {}
|
||||
LookaheadSet::LookaheadSet(const set<Symbol::Index> &symbols)
|
||||
: entries(make_shared<set<Symbol::Index>>(symbols)) {}
|
||||
|
||||
bool LookaheadSet::empty() const {
|
||||
return !entries.get() || entries->empty();
|
||||
|
|
@ -23,21 +23,23 @@ bool LookaheadSet::operator==(const LookaheadSet &other) const {
|
|||
return *entries == *other.entries;
|
||||
}
|
||||
|
||||
bool LookaheadSet::contains(const Symbol &symbol) const {
|
||||
bool LookaheadSet::contains(const Symbol::Index &symbol) const {
|
||||
return entries->find(symbol) != entries->end();
|
||||
}
|
||||
|
||||
bool LookaheadSet::insert_all(const LookaheadSet &other) {
|
||||
if (!other.entries.get())
|
||||
return false;
|
||||
if (!entries.get())
|
||||
entries = make_shared<set<Symbol>>();
|
||||
entries = make_shared<set<Symbol::Index>>();
|
||||
size_t previous_size = entries->size();
|
||||
entries->insert(other.entries->begin(), other.entries->end());
|
||||
return entries->size() > previous_size;
|
||||
}
|
||||
|
||||
bool LookaheadSet::insert(const Symbol &symbol) {
|
||||
bool LookaheadSet::insert(const Symbol::Index &symbol) {
|
||||
if (!entries.get())
|
||||
entries = make_shared<set<Symbol>>();
|
||||
entries = make_shared<set<Symbol::Index>>();
|
||||
return entries->insert(symbol).second;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,15 +11,15 @@ namespace build_tables {
|
|||
class LookaheadSet {
|
||||
public:
|
||||
LookaheadSet();
|
||||
explicit LookaheadSet(const std::set<rules::Symbol> &);
|
||||
explicit LookaheadSet(const std::set<rules::Symbol::Index> &);
|
||||
|
||||
bool empty() const;
|
||||
bool operator==(const LookaheadSet &) const;
|
||||
bool contains(const rules::Symbol &) const;
|
||||
bool contains(const rules::Symbol::Index &) const;
|
||||
bool insert_all(const LookaheadSet &);
|
||||
bool insert(const rules::Symbol &);
|
||||
bool insert(const rules::Symbol::Index &);
|
||||
|
||||
std::shared_ptr<std::set<rules::Symbol>> entries;
|
||||
std::shared_ptr<std::set<rules::Symbol::Index>> entries;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -102,8 +102,8 @@ size_t ParseItemSet::Hash::operator()(const ParseItemSet &item_set) const {
|
|||
|
||||
const LookaheadSet &lookahead_set = pair.second;
|
||||
result ^= hash<size_t>()(lookahead_set.entries->size());
|
||||
for (auto &symbol : *pair.second.entries)
|
||||
result ^= hash<tree_sitter::rules::Symbol>()(symbol);
|
||||
for (Symbol::Index index : *pair.second.entries)
|
||||
result ^= hash<Symbol::Index>()(index);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
117
src/compiler/build_tables/parse_item_set_builder.cc
Normal file
117
src/compiler/build_tables/parse_item_set_builder.cc
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
#include "compiler/build_tables/parse_item_set_builder.h"
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::vector;
|
||||
using std::set;
|
||||
using std::map;
|
||||
using std::get;
|
||||
using std::tuple;
|
||||
using std::make_tuple;
|
||||
using std::shared_ptr;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
using rules::NONE;
|
||||
|
||||
static map<Symbol, LookaheadSet> build_first_sets(const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lexical_grammar) {
|
||||
map<Symbol, LookaheadSet> result;
|
||||
vector<Symbol> symbol_stack;
|
||||
set<Symbol> processed_symbols;
|
||||
|
||||
for (size_t i = 0; i < grammar.variables.size(); i++) {
|
||||
Symbol symbol(i);
|
||||
LookaheadSet first_set;
|
||||
|
||||
processed_symbols.clear();
|
||||
symbol_stack.clear();
|
||||
symbol_stack.push_back(symbol);
|
||||
while (!symbol_stack.empty()) {
|
||||
Symbol current_symbol = symbol_stack.back();
|
||||
symbol_stack.pop_back();
|
||||
if (current_symbol.is_token) {
|
||||
first_set.insert(current_symbol.index);
|
||||
} else if (processed_symbols.insert(current_symbol).second) {
|
||||
for (const Production &production : grammar.productions(current_symbol)) {
|
||||
if (!production.empty()) {
|
||||
symbol_stack.push_back(production[0].symbol);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result.insert({symbol, first_set});
|
||||
}
|
||||
|
||||
for (int i = 0; i < lexical_grammar.variables.size(); i++) {
|
||||
Symbol symbol(i, true);
|
||||
result.insert({symbol, LookaheadSet({ i })});
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lexical_grammar) :
|
||||
grammar{&grammar}, first_sets{build_first_sets(grammar, lexical_grammar)} {
|
||||
}
|
||||
|
||||
void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) {
|
||||
items_to_process.clear();
|
||||
for (const auto &entry : item_set->entries) {
|
||||
items_to_process.push_back(make_tuple(entry.first, entry.second, true));
|
||||
}
|
||||
|
||||
while (!items_to_process.empty()) {
|
||||
ParseItem item = get<0>(items_to_process.back());
|
||||
LookaheadSet lookahead_symbols = get<1>(items_to_process.back());
|
||||
bool from_original_set = get<2>(items_to_process.back());
|
||||
items_to_process.pop_back();
|
||||
|
||||
// Add the parse-item and lookahead symbols to the item set.
|
||||
// If they were already present, skip to the next item.
|
||||
if (!from_original_set && !item_set->entries[item].insert_all(lookahead_symbols))
|
||||
continue;
|
||||
|
||||
// If the next symbol in the production is not a non-terminal, skip to the
|
||||
// next item.
|
||||
Symbol next_symbol = item.next_symbol();
|
||||
if (next_symbol == NONE() || next_symbol.is_token ||
|
||||
next_symbol.is_built_in())
|
||||
continue;
|
||||
|
||||
// If the next symbol is the last symbol in the item's production, then the
|
||||
// lookahead symbols for the new items are the same as for the current item.
|
||||
// Otherwise, they are the FOLLOW set of the symbol in this production.
|
||||
LookaheadSet next_lookahead_symbols;
|
||||
size_t next_step = item.step_index + 1;
|
||||
if (next_step == item.production->size()) {
|
||||
next_lookahead_symbols = lookahead_symbols;
|
||||
} else {
|
||||
Symbol symbol_after_next = item.production->at(next_step).symbol;
|
||||
next_lookahead_symbols = first_sets.find(symbol_after_next)->second;
|
||||
}
|
||||
|
||||
// Add each of the next symbol's productions to be processed recursively.
|
||||
for (const Production &production : grammar->productions(next_symbol))
|
||||
items_to_process.push_back(make_tuple(
|
||||
ParseItem(next_symbol, production, 0),
|
||||
next_lookahead_symbols,
|
||||
false
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
LookaheadSet ParseItemSetBuilder::get_first_set(rules::Symbol &symbol) const {
|
||||
return first_sets.find(symbol)->second;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
29
src/compiler/build_tables/parse_item_set_builder.h
Normal file
29
src/compiler/build_tables/parse_item_set_builder.h
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_PARSE_ITEM_SET_BUILDER_H_
|
||||
#define COMPILER_BUILD_TABLES_PARSE_ITEM_SET_BUILDER_H_
|
||||
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include <map>
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct SyntaxGrammar;
|
||||
struct LexicalGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
class ParseItemSetBuilder {
|
||||
const SyntaxGrammar *grammar;
|
||||
std::map<rules::Symbol, LookaheadSet> first_sets;
|
||||
std::vector<std::tuple<ParseItem, LookaheadSet, bool>> items_to_process;
|
||||
|
||||
public:
|
||||
ParseItemSetBuilder(const SyntaxGrammar &, const LexicalGrammar &);
|
||||
void apply_transitive_closure(ParseItemSet *);
|
||||
LookaheadSet get_first_set(rules::Symbol &) const;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_PARSE_ITEM_SET_BUILDER_H_
|
||||
|
|
@ -47,8 +47,8 @@ class FirstCharacters : public CharacterAggregator<true, false> {};
|
|||
class LastCharacters : public CharacterAggregator<false, true> {};
|
||||
class AllCharacters : public CharacterAggregator<true, true> {};
|
||||
|
||||
set<Symbol> recovery_tokens(const LexicalGrammar &grammar) {
|
||||
set<Symbol> result;
|
||||
set<Symbol::Index> recovery_tokens(const LexicalGrammar &grammar) {
|
||||
set<Symbol::Index> result;
|
||||
|
||||
AllCharacters all_separator_characters;
|
||||
for (const rule_ptr &separator : grammar.separators)
|
||||
|
|
@ -79,7 +79,7 @@ set<Symbol> recovery_tokens(const LexicalGrammar &grammar) {
|
|||
!all_characters.result.intersects(all_separator_characters.result);
|
||||
|
||||
if ((has_distinct_start && has_distinct_end) || has_no_separators)
|
||||
result.insert(Symbol(i, true));
|
||||
result.insert(i);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ struct LexicalGrammar;
|
|||
|
||||
namespace build_tables {
|
||||
|
||||
std::set<rules::Symbol> recovery_tokens(const LexicalGrammar &);
|
||||
std::set<rules::Symbol::Index> recovery_tokens(const LexicalGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
template <typename TableType, typename ActionType>
|
||||
template <typename TableType>
|
||||
std::map<size_t, size_t> remove_duplicate_states(TableType *table) {
|
||||
std::map<size_t, size_t> replacements;
|
||||
|
||||
|
|
@ -46,10 +46,10 @@ std::map<size_t, size_t> remove_duplicate_states(TableType *table) {
|
|||
}
|
||||
|
||||
for (auto &state : table->states)
|
||||
state.each_advance_action([&new_replacements](ActionType *action) {
|
||||
auto new_replacement = new_replacements.find(action->state_index);
|
||||
state.each_referenced_state([&new_replacements](int64_t *state_index) {
|
||||
auto new_replacement = new_replacements.find(*state_index);
|
||||
if (new_replacement != new_replacements.end())
|
||||
action->state_index = new_replacement->second;
|
||||
*state_index = new_replacement->second;
|
||||
});
|
||||
|
||||
for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i)
|
||||
|
|
|
|||
|
|
@ -115,6 +115,7 @@ class CCodeGenerator {
|
|||
void add_state_and_symbol_counts() {
|
||||
line("#define STATE_COUNT " + to_string(parse_table.states.size()));
|
||||
line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size()));
|
||||
line("#define TOKEN_COUNT " + to_string(lexical_grammar.variables.size() + 1));
|
||||
line();
|
||||
}
|
||||
|
||||
|
|
@ -222,10 +223,15 @@ class CCodeGenerator {
|
|||
for (const auto &state : parse_table.states) {
|
||||
line("[" + to_string(state_id++) + "] = {");
|
||||
indent([&]() {
|
||||
for (const auto &entry : state.entries) {
|
||||
line("[" + symbol_id(entry.first) + "] = ");
|
||||
for (const auto &entry : state.nonterminal_entries) {
|
||||
line("[" + symbol_id(rules::Symbol(entry.first)) + "] = STATE(");
|
||||
add(to_string(entry.second));
|
||||
add("),");
|
||||
}
|
||||
for (const auto &entry : state.terminal_entries) {
|
||||
line("[" + symbol_id(rules::Symbol(entry.first, true)) + "] = ACTIONS(");
|
||||
add(to_string(add_parse_action_list_id(entry.second)));
|
||||
add(",");
|
||||
add("),");
|
||||
}
|
||||
});
|
||||
line("},");
|
||||
|
|
|
|||
|
|
@ -57,9 +57,9 @@ bool LexState::operator==(const LexState &other) const {
|
|||
is_token_start == other.is_token_start;
|
||||
}
|
||||
|
||||
void LexState::each_advance_action(function<void(AdvanceAction *)> fn) {
|
||||
void LexState::each_referenced_state(function<void(LexStateId *)> fn) {
|
||||
for (auto &entry : advance_actions)
|
||||
fn(&entry.second);
|
||||
fn(&entry.second.state_index);
|
||||
}
|
||||
|
||||
LexStateId LexTable::add_state() {
|
||||
|
|
|
|||
|
|
@ -11,6 +11,8 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
|
||||
typedef int64_t LexStateId;
|
||||
|
||||
typedef enum {
|
||||
LexActionTypeError,
|
||||
LexActionTypeAccept,
|
||||
|
|
@ -24,7 +26,7 @@ struct AdvanceAction {
|
|||
|
||||
bool operator==(const AdvanceAction &other) const;
|
||||
|
||||
size_t state_index;
|
||||
LexStateId state_index;
|
||||
PrecedenceRange precedence_range;
|
||||
bool in_main_token;
|
||||
};
|
||||
|
|
@ -52,15 +54,13 @@ class LexState {
|
|||
LexState();
|
||||
std::set<rules::CharacterSet> expected_inputs() const;
|
||||
bool operator==(const LexState &) const;
|
||||
void each_advance_action(std::function<void(AdvanceAction *)>);
|
||||
void each_referenced_state(std::function<void(LexStateId *)>);
|
||||
|
||||
std::map<rules::CharacterSet, AdvanceAction> advance_actions;
|
||||
AcceptTokenAction accept_action;
|
||||
bool is_token_start;
|
||||
};
|
||||
|
||||
typedef int64_t LexStateId;
|
||||
|
||||
class LexTable {
|
||||
public:
|
||||
LexStateId add_state();
|
||||
|
|
|
|||
|
|
@ -125,29 +125,34 @@ bool ParseTableEntry::operator==(const ParseTableEntry &other) const {
|
|||
ParseState::ParseState() : lex_state_id(-1) {}
|
||||
|
||||
bool ParseState::has_shift_action() const {
|
||||
for (const auto &pair : entries)
|
||||
for (const auto &pair : terminal_entries)
|
||||
if (pair.second.actions.size() > 0 &&
|
||||
pair.second.actions.back().type == ParseActionTypeShift)
|
||||
return true;
|
||||
return false;
|
||||
return (!nonterminal_entries.empty());
|
||||
}
|
||||
|
||||
set<Symbol> ParseState::expected_inputs() const {
|
||||
set<Symbol> result;
|
||||
for (auto &entry : entries)
|
||||
result.insert(entry.first);
|
||||
for (auto &entry : terminal_entries)
|
||||
result.insert(Symbol(entry.first, true));
|
||||
for (auto &entry : nonterminal_entries)
|
||||
result.insert(Symbol(entry.first, false));
|
||||
return result;
|
||||
}
|
||||
|
||||
void ParseState::each_advance_action(function<void(ParseAction *)> fn) {
|
||||
for (auto &entry : entries)
|
||||
void ParseState::each_referenced_state(function<void(ParseStateId *)> fn) {
|
||||
for (auto &entry : terminal_entries)
|
||||
for (ParseAction &action : entry.second.actions)
|
||||
if (action.type == ParseActionTypeShift || ParseActionTypeRecover)
|
||||
fn(&action);
|
||||
fn(&action.state_index);
|
||||
for (auto &entry : nonterminal_entries)
|
||||
fn(&entry.second);
|
||||
}
|
||||
|
||||
bool ParseState::operator==(const ParseState &other) const {
|
||||
return entries == other.entries;
|
||||
return terminal_entries == other.terminal_entries &&
|
||||
nonterminal_entries == other.nonterminal_entries;
|
||||
}
|
||||
|
||||
set<Symbol> ParseTable::all_symbols() const {
|
||||
|
|
@ -162,35 +167,37 @@ ParseStateId ParseTable::add_state() {
|
|||
return states.size() - 1;
|
||||
}
|
||||
|
||||
ParseAction &ParseTable::set_action(ParseStateId id, Symbol symbol,
|
||||
ParseAction action) {
|
||||
if (action.type == ParseActionTypeShift && action.extra)
|
||||
symbols[symbol].extra = true;
|
||||
else
|
||||
symbols[symbol].structural = true;
|
||||
|
||||
states[id].entries[symbol].actions = { action };
|
||||
return *states[id].entries[symbol].actions.begin();
|
||||
ParseAction &ParseTable::set_terminal_action(ParseStateId state_id,
|
||||
Symbol::Index index,
|
||||
ParseAction action) {
|
||||
states[state_id].terminal_entries[index].actions.clear();
|
||||
return add_terminal_action(state_id, index, action);
|
||||
}
|
||||
|
||||
ParseAction &ParseTable::add_action(ParseStateId id, Symbol symbol,
|
||||
ParseAction action) {
|
||||
ParseAction &ParseTable::add_terminal_action(ParseStateId state_id,
|
||||
Symbol::Index index,
|
||||
ParseAction action) {
|
||||
Symbol symbol(index, true);
|
||||
if (action.type == ParseActionTypeShift && action.extra)
|
||||
symbols[symbol].extra = true;
|
||||
else
|
||||
symbols[symbol].structural = true;
|
||||
|
||||
ParseState &state = states[id];
|
||||
for (ParseAction &existing_action : state.entries[symbol].actions)
|
||||
if (existing_action == action)
|
||||
return existing_action;
|
||||
ParseTableEntry &entry = states[state_id].terminal_entries[index];
|
||||
entry.actions.push_back(action);
|
||||
return *entry.actions.rbegin();
|
||||
}
|
||||
|
||||
state.entries[symbol].actions.push_back(action);
|
||||
return *state.entries[symbol].actions.rbegin();
|
||||
void ParseTable::set_nonterminal_action(ParseStateId state_id,
|
||||
Symbol::Index index,
|
||||
ParseStateId next_state_id) {
|
||||
Symbol symbol(index, false);
|
||||
symbols[symbol].structural = true;
|
||||
states[state_id].nonterminal_entries[index] = next_state_id;
|
||||
}
|
||||
|
||||
static bool has_entry(const ParseState &state, const ParseTableEntry &entry) {
|
||||
for (const auto &pair : state.entries)
|
||||
for (const auto &pair : state.terminal_entries)
|
||||
if (pair.second == entry)
|
||||
return true;
|
||||
return false;
|
||||
|
|
@ -200,13 +207,16 @@ bool ParseTable::merge_state(size_t i, size_t j) {
|
|||
ParseState &state = states[i];
|
||||
ParseState &other = states[j];
|
||||
|
||||
for (auto &entry : state.entries) {
|
||||
const Symbol &symbol = entry.first;
|
||||
if (state.nonterminal_entries != other.nonterminal_entries)
|
||||
return false;
|
||||
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
Symbol::Index index = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
const auto &other_entry = other.entries.find(symbol);
|
||||
if (other_entry == other.entries.end()) {
|
||||
if (mergeable_symbols.count(symbol) == 0 && !symbol.is_built_in() && symbol.is_token)
|
||||
const auto &other_entry = other.terminal_entries.find(index);
|
||||
if (other_entry == other.terminal_entries.end()) {
|
||||
if (mergeable_symbols.count(index) == 0 && !Symbol::is_built_in(index))
|
||||
return false;
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
|
|
@ -217,25 +227,25 @@ bool ParseTable::merge_state(size_t i, size_t j) {
|
|||
}
|
||||
}
|
||||
|
||||
set<Symbol> symbols_to_merge;
|
||||
set<Symbol::Index> symbols_to_merge;
|
||||
|
||||
for (auto &entry : other.entries) {
|
||||
const Symbol &symbol = entry.first;
|
||||
for (auto &entry : other.terminal_entries) {
|
||||
Symbol::Index index = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
if (!state.entries.count(symbol)) {
|
||||
if (mergeable_symbols.count(symbol) == 0 && !symbol.is_built_in() && symbol.is_token)
|
||||
if (!state.terminal_entries.count(index)) {
|
||||
if (mergeable_symbols.count(index) == 0 && !Symbol::is_built_in(index))
|
||||
return false;
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(state, entry.second))
|
||||
return false;
|
||||
symbols_to_merge.insert(symbol);
|
||||
symbols_to_merge.insert(index);
|
||||
}
|
||||
}
|
||||
|
||||
for (const Symbol &symbol : symbols_to_merge)
|
||||
state.entries[symbol] = other.entries.find(symbol)->second;
|
||||
for (const Symbol::Index &index : symbols_to_merge)
|
||||
state.terminal_entries[index] = other.terminal_entries.find(index)->second;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
|
||||
typedef uint64_t ParseStateId;
|
||||
typedef int64_t ParseStateId;
|
||||
|
||||
enum ParseActionType {
|
||||
ParseActionTypeError,
|
||||
|
|
@ -72,10 +72,11 @@ class ParseState {
|
|||
std::set<rules::Symbol> expected_inputs() const;
|
||||
bool operator==(const ParseState &) const;
|
||||
bool merge(const ParseState &);
|
||||
void each_advance_action(std::function<void(ParseAction *)>);
|
||||
void each_referenced_state(std::function<void(ParseStateId *)>);
|
||||
bool has_shift_action() const;
|
||||
|
||||
std::map<rules::Symbol, ParseTableEntry> entries;
|
||||
std::map<rules::Symbol::Index, ParseTableEntry> terminal_entries;
|
||||
std::map<rules::Symbol::Index, ParseStateId> nonterminal_entries;
|
||||
LexStateId lex_state_id;
|
||||
};
|
||||
|
||||
|
|
@ -88,16 +89,15 @@ class ParseTable {
|
|||
public:
|
||||
std::set<rules::Symbol> all_symbols() const;
|
||||
ParseStateId add_state();
|
||||
ParseAction &set_action(ParseStateId state_id, rules::Symbol symbol,
|
||||
ParseAction action);
|
||||
ParseAction &add_action(ParseStateId state_id, rules::Symbol symbol,
|
||||
ParseAction action);
|
||||
ParseAction &add_terminal_action(ParseStateId state_id, int, ParseAction);
|
||||
ParseAction &set_terminal_action(ParseStateId state_id, int index, ParseAction);
|
||||
void set_nonterminal_action(ParseStateId state_id, int index, ParseStateId);
|
||||
bool merge_state(size_t i, size_t j);
|
||||
|
||||
std::vector<ParseState> states;
|
||||
std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;
|
||||
|
||||
std::set<rules::Symbol> mergeable_symbols;
|
||||
std::set<rules::Symbol::Index> mergeable_symbols;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -10,9 +10,9 @@ using std::string;
|
|||
using std::to_string;
|
||||
using std::hash;
|
||||
|
||||
Symbol::Symbol(int index) : index(index), is_token(false) {}
|
||||
Symbol::Symbol(Symbol::Index index) : index(index), is_token(false) {}
|
||||
|
||||
Symbol::Symbol(int index, bool is_token) : index(index), is_token(is_token) {}
|
||||
Symbol::Symbol(Symbol::Index index, bool is_token) : index(index), is_token(is_token) {}
|
||||
|
||||
bool Symbol::operator==(const Symbol &other) const {
|
||||
return (other.index == index) && (other.is_token == is_token);
|
||||
|
|
@ -24,7 +24,7 @@ bool Symbol::operator==(const Rule &rule) const {
|
|||
}
|
||||
|
||||
size_t Symbol::hash_code() const {
|
||||
return hash<int>()(index) ^ hash<bool>()(is_token);
|
||||
return hash<Symbol::Index>()(index) ^ hash<bool>()(is_token);
|
||||
}
|
||||
|
||||
rule_ptr Symbol::copy() const {
|
||||
|
|
@ -37,17 +37,21 @@ string Symbol::to_string() const {
|
|||
}
|
||||
|
||||
bool Symbol::operator<(const Symbol &other) const {
|
||||
if (!is_token && other.is_token)
|
||||
return true;
|
||||
if (is_token && !other.is_token)
|
||||
return true;
|
||||
if (!is_token && other.is_token)
|
||||
return false;
|
||||
return (index < other.index);
|
||||
}
|
||||
|
||||
bool Symbol::is_built_in() const {
|
||||
bool Symbol::is_built_in(Symbol::Index index) {
|
||||
return index < 0;
|
||||
}
|
||||
|
||||
bool Symbol::is_built_in() const {
|
||||
return is_built_in(index);
|
||||
}
|
||||
|
||||
void Symbol::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,8 +9,11 @@ namespace rules {
|
|||
|
||||
class Symbol : public Rule {
|
||||
public:
|
||||
explicit Symbol(int index);
|
||||
Symbol(int index, bool is_token);
|
||||
typedef int Index;
|
||||
|
||||
|
||||
explicit Symbol(Index index);
|
||||
Symbol(Index index, bool is_token);
|
||||
|
||||
bool operator==(const Symbol &other) const;
|
||||
bool operator==(const Rule &other) const;
|
||||
|
|
@ -21,9 +24,10 @@ class Symbol : public Rule {
|
|||
void accept(Visitor *visitor) const;
|
||||
|
||||
bool operator<(const Symbol &other) const;
|
||||
static bool is_built_in(Index);
|
||||
bool is_built_in() const;
|
||||
|
||||
int index;
|
||||
Index index;
|
||||
bool is_token;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ void ts_language_table_entry(const TSLanguage *self, TSStateId state,
|
|||
}
|
||||
action_index = 0;
|
||||
} else {
|
||||
assert(symbol < self->token_count);
|
||||
action_index = self->parse_table[state * self->symbol_count + symbol];
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,10 +15,9 @@ typedef struct {
|
|||
bool depends_on_lookahead;
|
||||
} TableEntry;
|
||||
|
||||
void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol,
|
||||
TableEntry *);
|
||||
void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *);
|
||||
|
||||
bool ts_language_symbol_is_in_progress(const TSLanguage *, TSStateId, TSSymbol);
|
||||
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
|
||||
|
||||
static inline const TSParseAction *ts_language_actions(const TSLanguage *self,
|
||||
TSStateId state,
|
||||
|
|
@ -30,25 +29,26 @@ static inline const TSParseAction *ts_language_actions(const TSLanguage *self,
|
|||
return entry.actions;
|
||||
}
|
||||
|
||||
static inline const TSParseAction *ts_language_last_action(
|
||||
const TSLanguage *self, TSStateId state, TSSymbol symbol) {
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self, state, symbol, &entry);
|
||||
if (entry.action_count)
|
||||
return &entry.actions[entry.action_count - 1];
|
||||
else
|
||||
return NULL;
|
||||
static inline TSStateId ts_language_next_state(const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol) {
|
||||
if (symbol == ts_builtin_sym_error) {
|
||||
return 0;
|
||||
} else if (symbol < self->token_count) {
|
||||
size_t count;
|
||||
const TSParseAction *actions = ts_language_actions(self, state, symbol, &count);
|
||||
if (count > 0) {
|
||||
TSParseAction action = actions[count - 1];
|
||||
if (action.type == TSParseActionTypeShift || action.type == TSParseActionTypeRecover) {
|
||||
return action.params.to_state;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
} else {
|
||||
return self->parse_table[state * self->symbol_count + symbol];
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool ts_language_is_reusable(const TSLanguage *self,
|
||||
TSStateId state, TSSymbol symbol) {
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self, state, symbol, &entry);
|
||||
return entry.is_reusable;
|
||||
}
|
||||
|
||||
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -87,11 +87,7 @@ static bool parser__breakdown_top_of_stack(Parser *self, StackVersion version) {
|
|||
if (child->symbol == ts_builtin_sym_error) {
|
||||
state = ERROR_STATE;
|
||||
} else if (!child->extra) {
|
||||
const TSParseAction *action =
|
||||
ts_language_last_action(self->language, state, child->symbol);
|
||||
assert(action && (action->type == TSParseActionTypeShift ||
|
||||
action->type == TSParseActionTypeRecover));
|
||||
state = action->params.to_state;
|
||||
state = ts_language_next_state(self->language, state, child->symbol);
|
||||
}
|
||||
|
||||
ts_stack_push(self->stack, slice.version, child, pending, state);
|
||||
|
|
@ -486,13 +482,8 @@ static Reduction parser__reduce(Parser *self, StackVersion version,
|
|||
parent->parse_state = state;
|
||||
}
|
||||
|
||||
const TSParseAction *action =
|
||||
ts_language_last_action(language, state, symbol);
|
||||
assert(action->type == TSParseActionTypeShift ||
|
||||
action->type == TSParseActionTypeRecover);
|
||||
|
||||
if (action->type == TSParseActionTypeRecover && child_count > 1 &&
|
||||
allow_skipping) {
|
||||
TSStateId next_state = ts_language_next_state(language, state, symbol);
|
||||
if (state == ERROR_STATE && allow_skipping) {
|
||||
StackVersion other_version =
|
||||
ts_stack_duplicate_version(self->stack, slice.version);
|
||||
|
||||
|
|
@ -508,10 +499,10 @@ static Reduction parser__reduce(Parser *self, StackVersion version,
|
|||
ts_stack_remove_version(self->stack, other_version);
|
||||
}
|
||||
|
||||
parser__push(self, slice.version, parent, action->params.to_state);
|
||||
parser__push(self, slice.version, parent, next_state);
|
||||
for (size_t j = parent->child_count; j < slice.trees.size; j++) {
|
||||
Tree *tree = slice.trees.contents[j];
|
||||
parser__push(self, slice.version, tree, action->params.to_state);
|
||||
parser__push(self, slice.version, tree, next_state);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -540,26 +531,24 @@ static inline const TSParseAction *parser__reductions_after_sequence(
|
|||
if (child_count == tree_count_below)
|
||||
break;
|
||||
Tree *tree = trees_below->contents[trees_below->size - 1 - i];
|
||||
const TSParseAction *action =
|
||||
ts_language_last_action(self->language, state, tree->symbol);
|
||||
if (!action || action->type != TSParseActionTypeShift)
|
||||
TSStateId next_state = ts_language_next_state(self->language, state, tree->symbol);
|
||||
if (next_state == ERROR_STATE)
|
||||
return NULL;
|
||||
if (action->extra || tree->extra)
|
||||
continue;
|
||||
child_count++;
|
||||
state = action->params.to_state;
|
||||
if (next_state != state) {
|
||||
child_count++;
|
||||
state = next_state;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < trees_above->size; i++) {
|
||||
Tree *tree = trees_above->contents[i];
|
||||
const TSParseAction *action =
|
||||
ts_language_last_action(self->language, state, tree->symbol);
|
||||
if (!action || action->type != TSParseActionTypeShift)
|
||||
TSStateId next_state = ts_language_next_state(self->language, state, tree->symbol);
|
||||
if (next_state == ERROR_STATE)
|
||||
return NULL;
|
||||
if (action->extra || tree->extra)
|
||||
continue;
|
||||
child_count++;
|
||||
state = action->params.to_state;
|
||||
if (next_state != state) {
|
||||
child_count++;
|
||||
state = next_state;
|
||||
}
|
||||
}
|
||||
|
||||
const TSParseAction *actions =
|
||||
|
|
@ -610,15 +599,13 @@ static StackIterateAction parser__error_repair_callback(
|
|||
continue;
|
||||
}
|
||||
|
||||
const TSParseAction *repair_symbol_action =
|
||||
ts_language_last_action(self->language, state, repair->symbol);
|
||||
if (!repair_symbol_action ||
|
||||
repair_symbol_action->type != TSParseActionTypeShift)
|
||||
TSStateId state_after_repair = ts_language_next_state(self->language, state, repair->symbol);
|
||||
if (state == ERROR_STATE || state_after_repair == ERROR_STATE)
|
||||
continue;
|
||||
|
||||
TSStateId state_after_repair = repair_symbol_action->params.to_state;
|
||||
if (!ts_language_last_action(self->language, state_after_repair,
|
||||
lookahead_symbol))
|
||||
size_t action_count;
|
||||
ts_language_actions(self->language, state_after_repair, lookahead_symbol, &action_count);
|
||||
if (action_count == 0)
|
||||
continue;
|
||||
|
||||
if (count_needed_below_error != last_repair_count) {
|
||||
|
|
@ -795,7 +782,7 @@ static bool parser__do_potential_reductions(
|
|||
size_t previous_version_count = ts_stack_version_count(self->stack);
|
||||
|
||||
array_clear(&self->reduce_actions);
|
||||
for (TSSymbol symbol = 0; symbol < self->language->symbol_count; symbol++) {
|
||||
for (TSSymbol symbol = 0; symbol < self->language->token_count; symbol++) {
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self->language, state, symbol, &entry);
|
||||
for (size_t i = 0; i < entry.action_count; i++) {
|
||||
|
|
@ -857,9 +844,10 @@ static StackIterateAction parser__repair_consumed_error_callback(
|
|||
SkipPrecedingTokensSession *session = payload;
|
||||
Parser *self = session->parser;
|
||||
TSSymbol lookahead_symbol = session->lookahead_symbol;
|
||||
const TSParseAction *action =
|
||||
ts_language_last_action(self->language, state, lookahead_symbol);
|
||||
if (action && action->type == TSParseActionTypeReduce) {
|
||||
size_t action_count;
|
||||
const TSParseAction *actions =
|
||||
ts_language_actions(self->language, state, lookahead_symbol, &action_count);
|
||||
if (action_count > 0 && actions[0].type == TSParseActionTypeReduce) {
|
||||
return StackIteratePop | StackIterateStop;
|
||||
}
|
||||
}
|
||||
|
|
@ -982,6 +970,17 @@ static void parser__advance(Parser *self, StackVersion version,
|
|||
|
||||
switch (action.type) {
|
||||
case TSParseActionTypeShift: {
|
||||
bool extra = action.extra;
|
||||
TSStateId next_state;
|
||||
|
||||
if (action.extra) {
|
||||
next_state = state;
|
||||
LOG("shift_extra");
|
||||
} else {
|
||||
next_state = action.params.to_state;
|
||||
LOG("shift state:%u", next_state);
|
||||
}
|
||||
|
||||
if (lookahead->child_count > 0) {
|
||||
if (parser__breakdown_lookahead(self, &lookahead, state,
|
||||
reusable_node)) {
|
||||
|
|
@ -992,20 +991,10 @@ static void parser__advance(Parser *self, StackVersion version,
|
|||
}
|
||||
}
|
||||
|
||||
action = *ts_language_last_action(self->language, state,
|
||||
lookahead->symbol);
|
||||
next_state = ts_language_next_state(self->language, state, lookahead->symbol);
|
||||
}
|
||||
|
||||
TSStateId next_state;
|
||||
if (action.extra) {
|
||||
next_state = state;
|
||||
LOG("shift_extra");
|
||||
} else {
|
||||
next_state = action.params.to_state;
|
||||
LOG("shift state:%u", next_state);
|
||||
}
|
||||
|
||||
parser__shift(self, version, next_state, lookahead, action.extra);
|
||||
parser__shift(self, version, next_state, lookahead, extra);
|
||||
|
||||
if (lookahead == reusable_node->tree)
|
||||
parser__pop_reusable_node(reusable_node);
|
||||
|
|
@ -1045,7 +1034,6 @@ static void parser__advance(Parser *self, StackVersion version,
|
|||
|
||||
LOG("accept");
|
||||
parser__accept(self, version, lookahead);
|
||||
|
||||
ts_tree_release(lookahead);
|
||||
return;
|
||||
}
|
||||
|
|
@ -1057,14 +1045,10 @@ static void parser__advance(Parser *self, StackVersion version,
|
|||
lookahead = reusable_node->tree;
|
||||
ts_tree_retain(lookahead);
|
||||
}
|
||||
action =
|
||||
*ts_language_last_action(self->language, state, lookahead->symbol);
|
||||
|
||||
parser__recover(self, version, action.params.to_state, lookahead);
|
||||
|
||||
if (lookahead == reusable_node->tree)
|
||||
parser__pop_reusable_node(reusable_node);
|
||||
|
||||
ts_tree_release(lookahead);
|
||||
return;
|
||||
}
|
||||
|
|
@ -1086,7 +1070,7 @@ static void parser__advance(Parser *self, StackVersion version,
|
|||
return;
|
||||
}
|
||||
|
||||
parser__handle_error(self, version, lookahead->symbol);
|
||||
parser__handle_error(self, version, lookahead->first_leaf.symbol);
|
||||
|
||||
if (ts_stack_is_halted(self->stack, version)) {
|
||||
ts_tree_release(lookahead);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue