Refine logic for deciding when tokens need to be re-lexed
* While generating the lex table, note which tokens can match the same string. A token needs to be relexed when it has possible homonyms in the current state. * Also note which tokens can match substrings of each other tokens. A token needs to be relexed when there are viable tokens that could match longer strings in the current state and the next token has been edited. * Remove the logic for marking tokens as fragile on creation. * Store the reusability/non-reusability of symbols off of individual actions and onto the entire entry for the state & symbol.
This commit is contained in:
parent
45f7cee0c8
commit
38c144b4a3
19 changed files with 337 additions and 257 deletions
|
|
@ -51,7 +51,6 @@ typedef struct TSLexer {
|
|||
int32_t lookahead;
|
||||
TSStateId starting_state;
|
||||
TSSymbol result_symbol;
|
||||
bool result_is_fragile;
|
||||
bool result_follows_error;
|
||||
int32_t first_unexpected_character;
|
||||
|
||||
|
|
@ -79,12 +78,15 @@ typedef struct {
|
|||
TSParseActionType type : 3;
|
||||
bool extra : 1;
|
||||
bool fragile : 1;
|
||||
bool can_hide_split : 1;
|
||||
} TSParseAction;
|
||||
|
||||
typedef union {
|
||||
TSParseAction action;
|
||||
unsigned int count;
|
||||
struct {
|
||||
unsigned short count;
|
||||
bool reusable : 1;
|
||||
bool depends_on_lookahead : 1;
|
||||
};
|
||||
} TSParseActionEntry;
|
||||
|
||||
struct TSLanguage {
|
||||
|
|
@ -125,13 +127,6 @@ struct TSLanguage {
|
|||
GO_TO_STATE(state_value); \
|
||||
}
|
||||
|
||||
#define ACCEPT_FRAGILE_TOKEN(symbol_value) \
|
||||
{ \
|
||||
lexer->result_is_fragile = true; \
|
||||
lexer->result_symbol = symbol_value; \
|
||||
return true; \
|
||||
}
|
||||
|
||||
#define ACCEPT_TOKEN(symbol_value) \
|
||||
{ \
|
||||
lexer->result_symbol = symbol_value; \
|
||||
|
|
@ -151,23 +146,16 @@ struct TSLanguage {
|
|||
* Parse Table Macros
|
||||
*/
|
||||
|
||||
enum {
|
||||
FRAGILE = 1,
|
||||
CAN_HIDE_SPLIT = 2,
|
||||
};
|
||||
|
||||
#define ERROR() \
|
||||
{ \
|
||||
{ .type = TSParseActionTypeError } \
|
||||
}
|
||||
|
||||
#define SHIFT(to_state_value, flags) \
|
||||
{ \
|
||||
{ \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.can_hide_split = (flags & CAN_HIDE_SPLIT) != 0, \
|
||||
.data = {.to_state = to_state_value } \
|
||||
} \
|
||||
#define SHIFT(to_state_value) \
|
||||
{ \
|
||||
{ \
|
||||
.type = TSParseActionTypeShift, .data = {.to_state = to_state_value } \
|
||||
} \
|
||||
}
|
||||
|
||||
#define RECOVER(to_state_value) \
|
||||
|
|
@ -191,11 +179,18 @@ enum {
|
|||
} \
|
||||
}
|
||||
|
||||
#define REDUCE(symbol_val, child_count_val, flags) \
|
||||
#define REDUCE(symbol_val, child_count_val) \
|
||||
{ \
|
||||
{ \
|
||||
.type = TSParseActionTypeReduce, .fragile = (flags & FRAGILE) != 0, \
|
||||
.can_hide_split = (flags & CAN_HIDE_SPLIT) != 0, \
|
||||
.type = TSParseActionTypeReduce, .fragile = false, \
|
||||
.data = {.symbol = symbol_val, .child_count = child_count_val } \
|
||||
} \
|
||||
}
|
||||
|
||||
#define REDUCE_FRAGILE(symbol_val, child_count_val) \
|
||||
{ \
|
||||
{ \
|
||||
.type = TSParseActionTypeReduce, .fragile = true, \
|
||||
.data = {.symbol = symbol_val, .child_count = child_count_val } \
|
||||
} \
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
#include "spec_helper.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
|
@ -14,14 +16,16 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() {
|
|||
Symbol sym1(0, true);
|
||||
Symbol sym2(1, true);
|
||||
Symbol sym3(2, true);
|
||||
Symbol sym4(3, true);
|
||||
LexItemSet item_set({ LexItem(sym4, blank() )});
|
||||
|
||||
it("favors advance actions over empty accept token actions", [&]() {
|
||||
update = conflict_manager.resolve(AdvanceAction(2, {0, 0}, true), AcceptTokenAction());
|
||||
update = conflict_manager.resolve(item_set, AdvanceAction(2, {0, 0}, true), AcceptTokenAction());
|
||||
AssertThat(update, IsTrue());
|
||||
});
|
||||
|
||||
describe("accept-token/accept-token conflicts", [&]() {
|
||||
describe("when one tokens' precedence values differ", [&]() {
|
||||
describe("when the tokens' precedence values differ", [&]() {
|
||||
it("favors the token with higher precedence", [&]() {
|
||||
update = conflict_manager.resolve(AcceptTokenAction(sym2, 1, false), AcceptTokenAction(sym1, 2, false));
|
||||
AssertThat(update, IsFalse());
|
||||
|
|
@ -30,9 +34,9 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() {
|
|||
AssertThat(update, IsTrue());
|
||||
});
|
||||
|
||||
it("adds the discarded token to the 'fragile tokens' set", [&]() {
|
||||
update = conflict_manager.resolve(AcceptTokenAction(sym2, 1, false), AcceptTokenAction(sym1, 2, false));
|
||||
AssertThat(conflict_manager.fragile_tokens, Contains(sym2));
|
||||
it("adds the preferred token as a possible homonym for the discarded one", [&]() {
|
||||
conflict_manager.resolve(AcceptTokenAction(sym2, 1, false), AcceptTokenAction(sym1, 2, false));
|
||||
AssertThat(conflict_manager.possible_homonyms[sym2], Contains(sym1));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -60,16 +64,22 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() {
|
|||
describe("advance/accept-token conflicts", [&]() {
|
||||
describe("when the token to accept has higher precedence", [&]() {
|
||||
it("prefers the accept-token action", [&]() {
|
||||
update = conflict_manager.resolve(AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 3, true));
|
||||
update = conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 3, true));
|
||||
AssertThat(update, IsFalse());
|
||||
AssertThat(conflict_manager.possible_extensions, IsEmpty());
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the token to accept does not have a higher precedence", [&]() {
|
||||
it("favors the advance action", [&]() {
|
||||
update = conflict_manager.resolve(AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 2, true));
|
||||
update = conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 2, true));
|
||||
AssertThat(update, IsTrue());
|
||||
});
|
||||
|
||||
it("adds the in-progress tokens as possible extensions of the discarded token", [&]() {
|
||||
conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 3, true));
|
||||
AssertThat(conflict_manager.possible_extensions[sym3], Contains(sym4));
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -78,11 +78,11 @@ ostream &operator<<(ostream &stream, const ParseAction &action) {
|
|||
ostream &operator<<(ostream &stream, const ParseState &state) {
|
||||
stream << string("#<parse_state ");
|
||||
bool started = false;
|
||||
for (auto pair : state.actions) {
|
||||
for (auto entry : state.entries) {
|
||||
if (started)
|
||||
stream << string(", ");
|
||||
stream << pair.first << string(" => {");
|
||||
for (auto &action : pair.second) {
|
||||
stream << entry.first << string(" => {");
|
||||
for (auto &action : entry.second.actions) {
|
||||
stream << string(" ") << action;
|
||||
}
|
||||
stream << string("}");
|
||||
|
|
|
|||
|
|
@ -92,7 +92,8 @@ class LexTableBuilder {
|
|||
AdvanceAction action(-1, transition.precedence, transition.in_main_token);
|
||||
|
||||
auto current_action = lex_table.state(state_id).accept_action;
|
||||
if (conflict_manager.resolve(action, current_action)) {
|
||||
if (conflict_manager.resolve(transition.destination, action,
|
||||
current_action)) {
|
||||
action.state_index = add_lex_state(transition.destination);
|
||||
lex_table.state(state_id).advance_actions[characters] = action;
|
||||
}
|
||||
|
|
@ -114,10 +115,31 @@ class LexTableBuilder {
|
|||
}
|
||||
|
||||
void mark_fragile_tokens() {
|
||||
for (LexState &state : lex_table.states)
|
||||
if (state.accept_action.is_present())
|
||||
if (conflict_manager.fragile_tokens.count(state.accept_action.symbol))
|
||||
state.accept_action.is_fragile = true;
|
||||
for (ParseState &state : parse_table->states) {
|
||||
for (auto &entry : state.entries) {
|
||||
if (!entry.first.is_token)
|
||||
continue;
|
||||
|
||||
auto homonyms = conflict_manager.possible_homonyms.find(entry.first);
|
||||
if (homonyms != conflict_manager.possible_homonyms.end())
|
||||
for (const Symbol &homonym : homonyms->second)
|
||||
if (state.entries.count(homonym)) {
|
||||
entry.second.reusable = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!entry.second.reusable)
|
||||
continue;
|
||||
|
||||
auto extensions = conflict_manager.possible_extensions.find(entry.first);
|
||||
if (extensions != conflict_manager.possible_extensions.end())
|
||||
for (const Symbol &extension : extensions->second)
|
||||
if (state.entries.count(extension)) {
|
||||
entry.second.depends_on_lookahead = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void remove_duplicate_lex_states() {
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
for (const Symbol &symbol : grammar.extra_tokens) {
|
||||
parse_table.error_state.actions[symbol].push_back(
|
||||
parse_table.error_state.entries[symbol].actions.push_back(
|
||||
ParseAction::ShiftExtra());
|
||||
}
|
||||
|
||||
|
|
@ -119,7 +119,7 @@ class ParseTableBuilder {
|
|||
add_out_of_context_parse_state(symbol);
|
||||
}
|
||||
|
||||
parse_table.error_state.actions[rules::END_OF_INPUT()].push_back(
|
||||
parse_table.error_state.entries[rules::END_OF_INPUT()].actions.push_back(
|
||||
ParseAction::Shift(0, PrecedenceRange()));
|
||||
}
|
||||
|
||||
|
|
@ -127,7 +127,7 @@ class ParseTableBuilder {
|
|||
const ParseItemSet &item_set = recovery_states[symbol];
|
||||
if (!item_set.entries.empty()) {
|
||||
ParseStateId state = add_parse_state(item_set);
|
||||
parse_table.error_state.actions[symbol].push_back(
|
||||
parse_table.error_state.entries[symbol].actions.push_back(
|
||||
ParseAction::Shift(state, PrecedenceRange()));
|
||||
}
|
||||
}
|
||||
|
|
@ -198,15 +198,15 @@ class ParseTableBuilder {
|
|||
const ParseState &state = parse_table.states[state_id];
|
||||
|
||||
for (const Symbol &extra_symbol : grammar.extra_tokens) {
|
||||
const auto &actions_for_symbol = state.actions.find(extra_symbol);
|
||||
if (actions_for_symbol == state.actions.end())
|
||||
const auto &entry_for_symbol = state.entries.find(extra_symbol);
|
||||
if (entry_for_symbol == state.entries.end())
|
||||
continue;
|
||||
|
||||
for (const ParseAction &action : actions_for_symbol->second)
|
||||
for (const ParseAction &action : entry_for_symbol->second.actions)
|
||||
if (action.type == ParseActionTypeShift && !action.extra) {
|
||||
size_t dest_state_id = action.state_index;
|
||||
ParseAction reduce_extra = ParseAction::ReduceExtra(extra_symbol);
|
||||
for (const auto &pair : state.actions)
|
||||
for (const auto &pair : state.entries)
|
||||
add_action(dest_state_id, pair.first, reduce_extra, null_item_set);
|
||||
}
|
||||
}
|
||||
|
|
@ -216,11 +216,14 @@ class ParseTableBuilder {
|
|||
for (ParseState &state : parse_table.states) {
|
||||
set<Symbol> symbols_with_multiple_actions;
|
||||
|
||||
for (auto &entry : state.actions) {
|
||||
if (entry.second.size() > 1)
|
||||
symbols_with_multiple_actions.insert(entry.first);
|
||||
for (auto &entry : state.entries) {
|
||||
const Symbol &symbol = entry.first;
|
||||
auto &actions = entry.second.actions;
|
||||
|
||||
for (ParseAction &action : entry.second) {
|
||||
if (actions.size() > 1)
|
||||
symbols_with_multiple_actions.insert(symbol);
|
||||
|
||||
for (ParseAction &action : actions) {
|
||||
if (action.type == ParseActionTypeReduce && !action.extra) {
|
||||
if (has_fragile_production(action.production))
|
||||
action.fragile = true;
|
||||
|
|
@ -231,11 +234,11 @@ class ParseTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
for (auto i = entry.second.begin(); i != entry.second.end();) {
|
||||
for (auto i = actions.begin(); i != actions.end();) {
|
||||
bool erased = false;
|
||||
for (auto j = entry.second.begin(); j != i; j++) {
|
||||
for (auto j = actions.begin(); j != i; j++) {
|
||||
if (*j == *i) {
|
||||
entry.second.erase(i);
|
||||
actions.erase(i);
|
||||
erased = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -246,12 +249,12 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
if (!symbols_with_multiple_actions.empty()) {
|
||||
for (auto &entry : state.actions) {
|
||||
for (auto &entry : state.entries) {
|
||||
if (!entry.first.is_token) {
|
||||
set<Symbol> first_set = get_first_set(entry.first);
|
||||
for (const Symbol &symbol : symbols_with_multiple_actions) {
|
||||
if (first_set.count(symbol)) {
|
||||
entry.second[0].can_hide_split = true;
|
||||
entry.second.reusable = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -276,14 +279,14 @@ class ParseTableBuilder {
|
|||
ParseAction *add_action(ParseStateId state_id, Symbol lookahead,
|
||||
const ParseAction &new_action,
|
||||
const ParseItemSet &item_set) {
|
||||
const auto ¤t_actions = parse_table.states[state_id].actions;
|
||||
const auto ¤t_entry = current_actions.find(lookahead);
|
||||
if (current_entry == current_actions.end())
|
||||
const ParseState &state = parse_table.states[state_id];
|
||||
const auto ¤t_entry = state.entries.find(lookahead);
|
||||
if (current_entry == state.entries.end())
|
||||
return &parse_table.set_action(state_id, lookahead, new_action);
|
||||
if (allow_any_conflict)
|
||||
return &parse_table.add_action(state_id, lookahead, new_action);
|
||||
|
||||
const ParseAction old_action = current_entry->second[0];
|
||||
const ParseAction old_action = current_entry->second.actions[0];
|
||||
auto resolution = conflict_manager.resolve(new_action, old_action);
|
||||
|
||||
switch (resolution.second) {
|
||||
|
|
|
|||
|
|
@ -2,15 +2,23 @@
|
|||
#include <utility>
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
bool LexConflictManager::resolve(const AdvanceAction &new_action,
|
||||
bool LexConflictManager::resolve(const LexItemSet &item_set,
|
||||
const AdvanceAction &new_action,
|
||||
const AcceptTokenAction &old_action) {
|
||||
if (!old_action.is_present())
|
||||
return true;
|
||||
return new_action.precedence_range.max >= old_action.precedence;
|
||||
if (new_action.precedence_range.max >= old_action.precedence) {
|
||||
for (const LexItem &item : item_set.entries)
|
||||
possible_extensions[old_action.symbol].insert(item.lhs);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool LexConflictManager::resolve(const AcceptTokenAction &new_action,
|
||||
|
|
@ -36,9 +44,9 @@ bool LexConflictManager::resolve(const AcceptTokenAction &new_action,
|
|||
result = false;
|
||||
|
||||
if (result)
|
||||
fragile_tokens.insert(old_action.symbol);
|
||||
possible_homonyms[old_action.symbol].insert(new_action.symbol);
|
||||
else
|
||||
fragile_tokens.insert(new_action.symbol);
|
||||
possible_homonyms[new_action.symbol].insert(old_action.symbol);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_
|
||||
#define COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
|
@ -12,12 +13,16 @@ struct AcceptTokenAction;
|
|||
|
||||
namespace build_tables {
|
||||
|
||||
class LexItemSet;
|
||||
|
||||
class LexConflictManager {
|
||||
public:
|
||||
bool resolve(const AdvanceAction &, const AcceptTokenAction &);
|
||||
bool resolve(const LexItemSet &, const AdvanceAction &,
|
||||
const AcceptTokenAction &);
|
||||
bool resolve(const AcceptTokenAction &, const AcceptTokenAction &);
|
||||
|
||||
std::set<rules::Symbol> fragile_tokens;
|
||||
std::map<rules::Symbol, std::set<rules::Symbol>> possible_homonyms;
|
||||
std::map<rules::Symbol, std::set<rules::Symbol>> possible_extensions;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ class CCodeGenerator {
|
|||
const SyntaxGrammar syntax_grammar;
|
||||
const LexicalGrammar lexical_grammar;
|
||||
map<string, string> sanitized_names;
|
||||
vector<pair<size_t, vector<ParseAction>>> parse_actions;
|
||||
vector<pair<size_t, ParseTableEntry>> parse_table_entries;
|
||||
vector<pair<size_t, set<rules::Symbol>>> in_progress_symbols;
|
||||
size_t next_parse_action_list_index;
|
||||
size_t next_in_progress_symbol_list_index;
|
||||
|
|
@ -155,35 +155,28 @@ class CCodeGenerator {
|
|||
for (const auto &entry : parse_table.symbols) {
|
||||
const rules::Symbol &symbol = entry.first;
|
||||
line("[" + symbol_id(symbol) + "] = {");
|
||||
indent([&]() {
|
||||
switch (symbol_type(symbol)) {
|
||||
case VariableTypeNamed:
|
||||
line(".visible = true,");
|
||||
line(".named = true,");
|
||||
break;
|
||||
case VariableTypeAnonymous:
|
||||
line(".visible = true,");
|
||||
line(".named = false,");
|
||||
break;
|
||||
case VariableTypeHidden:
|
||||
case VariableTypeAuxiliary:
|
||||
line(".visible = false,");
|
||||
line(".named = false,");
|
||||
break;
|
||||
}
|
||||
|
||||
switch (symbol_type(symbol)) {
|
||||
case VariableTypeNamed:
|
||||
add(".visible = true, .named = true");
|
||||
break;
|
||||
case VariableTypeAnonymous:
|
||||
add(".visible = true, .named = false");
|
||||
break;
|
||||
case VariableTypeHidden:
|
||||
case VariableTypeAuxiliary:
|
||||
add(".visible = false, .named = false");
|
||||
break;
|
||||
}
|
||||
line(".structural = " + _boolean(entry.second.structural) + ",");
|
||||
line(".extra = " + _boolean(entry.second.extra) + ",");
|
||||
});
|
||||
|
||||
add(", ");
|
||||
|
||||
if (entry.second.structural)
|
||||
add(".structural = true");
|
||||
else
|
||||
add(".structural = false");
|
||||
|
||||
add(", ");
|
||||
|
||||
if (syntax_grammar.extra_tokens.count(symbol))
|
||||
add(".extra = true");
|
||||
else
|
||||
add(".extra = false");
|
||||
|
||||
add("},");
|
||||
line("},");
|
||||
}
|
||||
});
|
||||
line("};");
|
||||
|
|
@ -221,11 +214,10 @@ class CCodeGenerator {
|
|||
void add_recovery_parse_states_list() {
|
||||
line("static TSParseAction ts_recovery_actions[SYMBOL_COUNT] = {");
|
||||
indent([&]() {
|
||||
for (const auto &entry : parse_table.error_state.actions) {
|
||||
const rules::Symbol &symbol = entry.first;
|
||||
if (!entry.second.empty()) {
|
||||
line("[" + symbol_id(symbol) + "] = ");
|
||||
ParseAction action = entry.second[0];
|
||||
for (const auto &entry : parse_table.error_state.entries) {
|
||||
if (!entry.second.actions.empty()) {
|
||||
line("[" + symbol_id(entry.first) + "] = ");
|
||||
ParseAction action = entry.second.actions[0];
|
||||
if (action.extra) {
|
||||
add("RECOVER_EXTRA(),");
|
||||
} else {
|
||||
|
|
@ -239,7 +231,8 @@ class CCodeGenerator {
|
|||
}
|
||||
|
||||
void add_parse_table() {
|
||||
add_parse_action_list_id({ ParseAction::Error() });
|
||||
add_parse_action_list_id(
|
||||
ParseTableEntry{ { ParseAction::Error() }, true, false });
|
||||
|
||||
size_t state_id = 0;
|
||||
line("#pragma GCC diagnostic push");
|
||||
|
|
@ -251,9 +244,9 @@ class CCodeGenerator {
|
|||
for (const auto &state : parse_table.states) {
|
||||
line("[" + to_string(state_id++) + "] = {");
|
||||
indent([&]() {
|
||||
for (const auto &pair : state.actions) {
|
||||
line("[" + symbol_id(pair.first) + "] = ");
|
||||
add(to_string(add_parse_action_list_id(pair.second)));
|
||||
for (const auto &entry : state.entries) {
|
||||
line("[" + symbol_id(entry.first) + "] = ");
|
||||
add(to_string(add_parse_action_list_id(entry.second)));
|
||||
add(",");
|
||||
}
|
||||
});
|
||||
|
|
@ -338,22 +331,21 @@ class CCodeGenerator {
|
|||
}
|
||||
|
||||
void add_accept_token_action(const AcceptTokenAction &action) {
|
||||
if (action.is_fragile)
|
||||
line("ACCEPT_FRAGILE_TOKEN(" + symbol_id(action.symbol) + ");");
|
||||
else
|
||||
line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");");
|
||||
line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");");
|
||||
}
|
||||
|
||||
void add_parse_action_list() {
|
||||
line("static TSParseActionEntry ts_parse_actions[] = {");
|
||||
|
||||
indent([&]() {
|
||||
for (const auto &pair : parse_actions) {
|
||||
for (const auto &pair : parse_table_entries) {
|
||||
size_t index = pair.first;
|
||||
line("[" + to_string(index) + "] = {.count = " +
|
||||
to_string(pair.second.size()) + "},");
|
||||
to_string(pair.second.actions.size()) + ", .reusable = " +
|
||||
_boolean(pair.second.reusable) + ", .depends_on_lookahead = " +
|
||||
_boolean(pair.second.depends_on_lookahead) + "},");
|
||||
|
||||
for (const ParseAction &action : pair.second) {
|
||||
for (const ParseAction &action : pair.second.actions) {
|
||||
add(" ");
|
||||
switch (action.type) {
|
||||
case ParseActionTypeError:
|
||||
|
|
@ -366,19 +358,18 @@ class CCodeGenerator {
|
|||
if (action.extra) {
|
||||
add("SHIFT_EXTRA()");
|
||||
} else {
|
||||
add("SHIFT(" + to_string(action.state_index) + ", ");
|
||||
add_action_flags(action);
|
||||
add(")");
|
||||
add("SHIFT(" + to_string(action.state_index) + ")");
|
||||
}
|
||||
break;
|
||||
case ParseActionTypeReduce:
|
||||
if (action.extra) {
|
||||
add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")");
|
||||
} else if (action.fragile) {
|
||||
add("REDUCE_FRAGILE(" + symbol_id(action.symbol) + ", " +
|
||||
to_string(action.consumed_symbol_count) + ")");
|
||||
} else {
|
||||
add("REDUCE(" + symbol_id(action.symbol) + ", " +
|
||||
to_string(action.consumed_symbol_count) + ", ");
|
||||
add_action_flags(action);
|
||||
add(")");
|
||||
to_string(action.consumed_symbol_count) + ")");
|
||||
}
|
||||
break;
|
||||
default: {}
|
||||
|
|
@ -391,16 +382,16 @@ class CCodeGenerator {
|
|||
line("};");
|
||||
}
|
||||
|
||||
size_t add_parse_action_list_id(const vector<ParseAction> &actions) {
|
||||
for (const auto &pair : parse_actions) {
|
||||
if (pair.second == actions) {
|
||||
size_t add_parse_action_list_id(const ParseTableEntry &entry) {
|
||||
for (const auto &pair : parse_table_entries) {
|
||||
if (pair.second == entry) {
|
||||
return pair.first;
|
||||
}
|
||||
}
|
||||
|
||||
size_t result = next_parse_action_list_index;
|
||||
parse_actions.push_back({ next_parse_action_list_index, actions });
|
||||
next_parse_action_list_index += 1 + actions.size();
|
||||
parse_table_entries.push_back({ next_parse_action_list_index, entry });
|
||||
next_parse_action_list_index += 1 + entry.actions.size();
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -417,17 +408,6 @@ class CCodeGenerator {
|
|||
return result;
|
||||
}
|
||||
|
||||
void add_action_flags(const ParseAction &action) {
|
||||
if (action.fragile && action.can_hide_split)
|
||||
add("FRAGILE|CAN_HIDE_SPLIT");
|
||||
else if (action.fragile)
|
||||
add("FRAGILE");
|
||||
else if (action.can_hide_split)
|
||||
add("CAN_HIDE_SPLIT");
|
||||
else
|
||||
add("0");
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
|
||||
string symbol_id(const rules::Symbol &symbol) {
|
||||
|
|
|
|||
|
|
@ -27,14 +27,11 @@ bool AdvanceAction::operator==(const AdvanceAction &other) const {
|
|||
}
|
||||
|
||||
AcceptTokenAction::AcceptTokenAction()
|
||||
: symbol(rules::NONE()), precedence(0), is_string(false), is_fragile(false) {}
|
||||
: symbol(rules::NONE()), precedence(0), is_string(false) {}
|
||||
|
||||
AcceptTokenAction::AcceptTokenAction(Symbol symbol, int precedence,
|
||||
bool is_string)
|
||||
: symbol(symbol),
|
||||
precedence(precedence),
|
||||
is_string(is_string),
|
||||
is_fragile(false) {}
|
||||
: symbol(symbol), precedence(precedence), is_string(is_string) {}
|
||||
|
||||
bool AcceptTokenAction::is_present() const {
|
||||
return symbol != rules::NONE();
|
||||
|
|
@ -42,7 +39,7 @@ bool AcceptTokenAction::is_present() const {
|
|||
|
||||
bool AcceptTokenAction::operator==(const AcceptTokenAction &other) const {
|
||||
return (symbol == other.symbol) && (precedence == other.precedence) &&
|
||||
(is_string == other.is_string) && (is_fragile == other.is_fragile);
|
||||
(is_string == other.is_string);
|
||||
}
|
||||
|
||||
LexState::LexState() : is_token_start(false) {}
|
||||
|
|
|
|||
|
|
@ -39,7 +39,6 @@ struct AcceptTokenAction {
|
|||
rules::Symbol symbol;
|
||||
int precedence;
|
||||
bool is_string;
|
||||
bool is_fragile;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ ParseAction::ParseAction(ParseActionType type, ParseStateId state_index,
|
|||
: type(type),
|
||||
extra(false),
|
||||
fragile(false),
|
||||
can_hide_split(false),
|
||||
symbol(symbol),
|
||||
state_index(state_index),
|
||||
consumed_symbol_count(consumed_symbol_count),
|
||||
|
|
@ -32,7 +31,6 @@ ParseAction::ParseAction()
|
|||
: type(ParseActionTypeError),
|
||||
extra(false),
|
||||
fragile(false),
|
||||
can_hide_split(false),
|
||||
symbol(Symbol(-1)),
|
||||
state_index(-1),
|
||||
consumed_symbol_count(0),
|
||||
|
|
@ -81,9 +79,8 @@ ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
|
|||
|
||||
bool ParseAction::operator==(const ParseAction &other) const {
|
||||
return (type == other.type && extra == other.extra &&
|
||||
fragile == other.fragile && can_hide_split == other.can_hide_split &&
|
||||
symbol == other.symbol && state_index == other.state_index &&
|
||||
production == other.production &&
|
||||
fragile == other.fragile && symbol == other.symbol &&
|
||||
state_index == other.state_index && production == other.production &&
|
||||
consumed_symbol_count == other.consumed_symbol_count);
|
||||
}
|
||||
|
||||
|
|
@ -100,10 +97,6 @@ bool ParseAction::operator<(const ParseAction &other) const {
|
|||
return true;
|
||||
if (other.fragile && !fragile)
|
||||
return false;
|
||||
if (can_hide_split && !other.can_hide_split)
|
||||
return true;
|
||||
if (other.can_hide_split && !can_hide_split)
|
||||
return false;
|
||||
if (symbol < other.symbol)
|
||||
return true;
|
||||
if (other.symbol < symbol)
|
||||
|
|
@ -119,24 +112,38 @@ bool ParseAction::operator<(const ParseAction &other) const {
|
|||
return consumed_symbol_count < other.consumed_symbol_count;
|
||||
}
|
||||
|
||||
ParseTableEntry::ParseTableEntry()
|
||||
: reusable(true), depends_on_lookahead(false) {}
|
||||
|
||||
ParseTableEntry::ParseTableEntry(const vector<ParseAction> &actions,
|
||||
bool reusable, bool depends_on_lookahead)
|
||||
: actions(actions),
|
||||
reusable(reusable),
|
||||
depends_on_lookahead(depends_on_lookahead) {}
|
||||
|
||||
bool ParseTableEntry::operator==(const ParseTableEntry &other) const {
|
||||
return actions == other.actions && reusable == other.reusable &&
|
||||
depends_on_lookahead == other.depends_on_lookahead;
|
||||
}
|
||||
|
||||
ParseState::ParseState() : lex_state_id(-1) {}
|
||||
|
||||
set<Symbol> ParseState::expected_inputs() const {
|
||||
set<Symbol> result;
|
||||
for (auto &pair : actions)
|
||||
result.insert(pair.first);
|
||||
for (auto &entry : entries)
|
||||
result.insert(entry.first);
|
||||
return result;
|
||||
}
|
||||
|
||||
void ParseState::each_advance_action(function<void(ParseAction *)> fn) {
|
||||
for (auto &entry : actions)
|
||||
for (ParseAction &action : entry.second)
|
||||
for (auto &entry : entries)
|
||||
for (ParseAction &action : entry.second.actions)
|
||||
if (action.type == ParseActionTypeShift)
|
||||
fn(&action);
|
||||
}
|
||||
|
||||
bool ParseState::operator==(const ParseState &other) const {
|
||||
return actions == other.actions;
|
||||
return entries == other.entries;
|
||||
}
|
||||
|
||||
set<Symbol> ParseTable::all_symbols() const {
|
||||
|
|
@ -154,26 +161,28 @@ ParseStateId ParseTable::add_state() {
|
|||
ParseAction &ParseTable::set_action(ParseStateId id, Symbol symbol,
|
||||
ParseAction action) {
|
||||
if (action.extra)
|
||||
symbols[symbol];
|
||||
symbols[symbol].extra = true;
|
||||
else
|
||||
symbols[symbol].structural = true;
|
||||
states[id].actions[symbol] = vector<ParseAction>({ action });
|
||||
return *states[id].actions[symbol].begin();
|
||||
|
||||
states[id].entries[symbol].actions = { action };
|
||||
return *states[id].entries[symbol].actions.begin();
|
||||
}
|
||||
|
||||
ParseAction &ParseTable::add_action(ParseStateId id, Symbol symbol,
|
||||
ParseAction action) {
|
||||
if (action.extra)
|
||||
symbols[symbol];
|
||||
symbols[symbol].extra = true;
|
||||
else
|
||||
symbols[symbol].structural = true;
|
||||
|
||||
for (ParseAction &existing_action : states[id].actions[symbol])
|
||||
ParseState &state = states[id];
|
||||
for (ParseAction &existing_action : state.entries[symbol].actions)
|
||||
if (existing_action == action)
|
||||
return existing_action;
|
||||
|
||||
states[id].actions[symbol].push_back(action);
|
||||
return *states[id].actions[symbol].rbegin();
|
||||
state.entries[symbol].actions.push_back(action);
|
||||
return *state.entries[symbol].actions.rbegin();
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -15,12 +15,12 @@ namespace tree_sitter {
|
|||
|
||||
typedef uint64_t ParseStateId;
|
||||
|
||||
typedef enum {
|
||||
enum ParseActionType {
|
||||
ParseActionTypeError,
|
||||
ParseActionTypeShift,
|
||||
ParseActionTypeReduce,
|
||||
ParseActionTypeAccept,
|
||||
} ParseActionType;
|
||||
};
|
||||
|
||||
class ParseAction {
|
||||
ParseAction(ParseActionType type, ParseStateId state_index,
|
||||
|
|
@ -43,7 +43,6 @@ class ParseAction {
|
|||
ParseActionType type;
|
||||
bool extra;
|
||||
bool fragile;
|
||||
bool can_hide_split;
|
||||
rules::Symbol symbol;
|
||||
ParseStateId state_index;
|
||||
size_t consumed_symbol_count;
|
||||
|
|
@ -52,30 +51,16 @@ class ParseAction {
|
|||
const Production *production;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
struct ParseTableEntry {
|
||||
std::vector<ParseAction> actions;
|
||||
bool reusable;
|
||||
bool depends_on_lookahead;
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<tree_sitter::ParseAction> {
|
||||
size_t operator()(const tree_sitter::ParseAction &action) const {
|
||||
return (hash<int>()(action.type) ^
|
||||
hash<tree_sitter::rules::Symbol>()(action.symbol) ^
|
||||
hash<size_t>()(action.state_index) ^
|
||||
hash<size_t>()(action.consumed_symbol_count) ^
|
||||
hash<bool>()(action.extra) ^ hash<bool>()(action.fragile) ^
|
||||
hash<bool>()(action.can_hide_split) ^
|
||||
hash<int>()(action.associativity) ^
|
||||
hash<int>()(action.precedence_range.min) ^
|
||||
hash<int>()(action.precedence_range.max) ^
|
||||
hash<const void *>()(&action.production));
|
||||
}
|
||||
ParseTableEntry();
|
||||
ParseTableEntry(const std::vector<ParseAction> &, bool, bool);
|
||||
bool operator==(const ParseTableEntry &other) const;
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
class ParseState {
|
||||
public:
|
||||
ParseState();
|
||||
|
|
@ -83,11 +68,12 @@ class ParseState {
|
|||
bool operator==(const ParseState &) const;
|
||||
void each_advance_action(std::function<void(ParseAction *)>);
|
||||
|
||||
std::map<rules::Symbol, std::vector<ParseAction>> actions;
|
||||
std::map<rules::Symbol, ParseTableEntry> entries;
|
||||
LexStateId lex_state_id;
|
||||
};
|
||||
|
||||
struct ParseTableSymbolMetadata {
|
||||
bool extra;
|
||||
bool structural;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -6,38 +6,30 @@ static const TSParseAction ERROR_SHIFT_EXTRA = {
|
|||
.type = TSParseActionTypeShift, .extra = true,
|
||||
};
|
||||
|
||||
const TSParseAction *ts_language_actions(const TSLanguage *self, TSStateId state,
|
||||
TSSymbol symbol, size_t *count) {
|
||||
void ts_language_table_entry(const TSLanguage *self, TSStateId state,
|
||||
TSSymbol symbol, TableEntry *result) {
|
||||
if (state == ts_parse_state_error) {
|
||||
*count = 1;
|
||||
if (symbol == ts_builtin_sym_error)
|
||||
return &ERROR_SHIFT_EXTRA;
|
||||
else if (self->recovery_actions[symbol].type == TSParseActionTypeError)
|
||||
return &ERROR_SHIFT_EXTRA;
|
||||
result->action_count = 1;
|
||||
result->is_reusable = false;
|
||||
result->depends_on_lookahead = false;
|
||||
if (symbol == ts_builtin_sym_error ||
|
||||
self->recovery_actions[symbol].type == TSParseActionTypeError)
|
||||
result->actions = &ERROR_SHIFT_EXTRA;
|
||||
else
|
||||
return &self->recovery_actions[symbol];
|
||||
result->actions = &self->recovery_actions[symbol];
|
||||
return;
|
||||
}
|
||||
|
||||
size_t action_index = 0;
|
||||
if (symbol != ts_builtin_sym_error)
|
||||
action_index = self->parse_table[state * self->symbol_count + symbol];
|
||||
size_t action_index =
|
||||
(symbol != ts_builtin_sym_error)
|
||||
? self->parse_table[state * self->symbol_count + symbol]
|
||||
: 0;
|
||||
|
||||
*count = self->parse_actions[action_index].count;
|
||||
const TSParseActionEntry *entry = self->parse_actions + action_index + 1;
|
||||
return (const TSParseAction *)entry;
|
||||
}
|
||||
|
||||
TSParseAction ts_language_last_action(const TSLanguage *self, TSStateId state,
|
||||
TSSymbol sym) {
|
||||
size_t count;
|
||||
const TSParseAction *actions = ts_language_actions(self, state, sym, &count);
|
||||
return actions[count - 1];
|
||||
}
|
||||
|
||||
bool ts_language_has_action(const TSLanguage *self, TSStateId state,
|
||||
TSSymbol symbol) {
|
||||
TSParseAction action = ts_language_last_action(self, state, symbol);
|
||||
return action.type != TSParseActionTypeError;
|
||||
const TSParseActionEntry *entry = &self->parse_actions[action_index];
|
||||
result->action_count = entry->count;
|
||||
result->is_reusable = entry->reusable;
|
||||
result->depends_on_lookahead = entry->depends_on_lookahead;
|
||||
result->actions = (const TSParseAction *)(entry + 1);
|
||||
}
|
||||
|
||||
size_t ts_language_symbol_count(const TSLanguage *language) {
|
||||
|
|
|
|||
|
|
@ -8,13 +8,48 @@ extern "C" {
|
|||
#include "tree_sitter/parser.h"
|
||||
#include "runtime/tree.h"
|
||||
|
||||
typedef struct {
|
||||
const TSParseAction *actions;
|
||||
size_t action_count;
|
||||
bool is_reusable;
|
||||
bool depends_on_lookahead;
|
||||
} TableEntry;
|
||||
|
||||
void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol,
|
||||
TableEntry *);
|
||||
|
||||
bool ts_language_symbol_is_in_progress(const TSLanguage *, TSStateId, TSSymbol);
|
||||
|
||||
const TSParseAction *ts_language_actions(const TSLanguage *, TSStateId,
|
||||
TSSymbol, size_t *);
|
||||
TSParseAction ts_language_last_action(const TSLanguage *, TSStateId, TSSymbol);
|
||||
static inline const TSParseAction *ts_language_actions(const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol,
|
||||
size_t *count) {
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self, state, symbol, &entry);
|
||||
*count = entry.action_count;
|
||||
return entry.actions;
|
||||
}
|
||||
|
||||
bool ts_language_has_action(const TSLanguage *, TSStateId, TSSymbol);
|
||||
static inline TSParseAction ts_language_last_action(const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol) {
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self, state, symbol, &entry);
|
||||
return entry.actions[entry.action_count - 1];
|
||||
}
|
||||
|
||||
static inline bool ts_language_has_action(const TSLanguage *self,
|
||||
TSStateId state, TSSymbol symbol) {
|
||||
TSParseAction action = ts_language_last_action(self, state, symbol);
|
||||
return action.type != TSParseActionTypeError;
|
||||
}
|
||||
|
||||
static inline bool ts_language_is_reusable(const TSLanguage *self,
|
||||
TSStateId state, TSSymbol symbol) {
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self, state, symbol, &entry);
|
||||
return entry.is_reusable;
|
||||
}
|
||||
|
||||
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
|
||||
|
||||
|
|
|
|||
|
|
@ -137,7 +137,6 @@ void ts_lexer_start(TSLexer *self, TSStateId lex_state) {
|
|||
self->starting_state = lex_state;
|
||||
self->token_start_position = self->current_position;
|
||||
self->result_follows_error = false;
|
||||
self->result_is_fragile = false;
|
||||
self->result_symbol = 0;
|
||||
self->first_unexpected_character = 0;
|
||||
|
||||
|
|
@ -156,13 +155,11 @@ void ts_lexer_finish(TSLexer *self, TSLexerResult *result) {
|
|||
result->size =
|
||||
ts_length_sub(self->error_end_position, self->token_start_position);
|
||||
result->first_unexpected_character = self->first_unexpected_character;
|
||||
result->is_fragile = true;
|
||||
ts_lexer_reset(self, self->error_end_position);
|
||||
} else {
|
||||
result->symbol = self->result_symbol;
|
||||
result->size =
|
||||
ts_length_sub(self->current_position, self->token_start_position);
|
||||
result->is_fragile = self->result_is_fragile;
|
||||
self->token_end_position = self->current_position;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ typedef struct {
|
|||
TSSymbol symbol;
|
||||
TSLength padding;
|
||||
TSLength size;
|
||||
bool is_fragile;
|
||||
int32_t first_unexpected_character;
|
||||
} TSLexerResult;
|
||||
|
||||
|
|
|
|||
|
|
@ -211,28 +211,64 @@ static bool ts_parser__can_reuse(TSParser *self, StackVersion version,
|
|||
if (tree->symbol == ts_builtin_sym_error)
|
||||
return false;
|
||||
|
||||
if (ts_tree_is_fragile(tree) &&
|
||||
tree->parse_state != ts_stack_top_state(self->stack, version))
|
||||
return false;
|
||||
TSStateId state = ts_stack_top_state(self->stack, version);
|
||||
if (tree->parse_state != state) {
|
||||
if (ts_tree_is_fragile(tree)) {
|
||||
LOG_ACTION("cant_reuse_fragile tree:%s", SYM_NAME(tree->symbol));
|
||||
return false;
|
||||
}
|
||||
|
||||
TSStateId top_state = ts_stack_top_state(self->stack, version);
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self->language, state, tree->symbol, &entry);
|
||||
|
||||
if (tree->lex_state != TS_TREE_STATE_INDEPENDENT &&
|
||||
tree->lex_state != ts_language_lex_state(self->language, top_state))
|
||||
return false;
|
||||
if (!entry.is_reusable) {
|
||||
LOG_ACTION("cant_reuse tree:%s", SYM_NAME(tree->symbol));
|
||||
return false;
|
||||
}
|
||||
|
||||
const TSParseAction action =
|
||||
ts_language_last_action(self->language, top_state, tree->symbol);
|
||||
if (action.type == TSParseActionTypeError || action.can_hide_split)
|
||||
return false;
|
||||
TSParseAction action = entry.actions[entry.action_count - 1];
|
||||
if (action.type == TSParseActionTypeError) {
|
||||
LOG_ACTION("cant_reuse_unexpected tree:%s", SYM_NAME(tree->symbol));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (tree->extra && !action.extra)
|
||||
return false;
|
||||
if (tree->extra != action.extra) {
|
||||
LOG_ACTION("cant_reuse_extra tree:%s", SYM_NAME(tree->symbol));
|
||||
return false;
|
||||
}
|
||||
|
||||
TSStateId lex_state = ts_language_lex_state(self->language, state);
|
||||
if (tree->first_leaf.lex_state != lex_state) {
|
||||
if (tree->child_count > 0) {
|
||||
TableEntry leaf_entry;
|
||||
ts_language_table_entry(self->language, state, tree->first_leaf.symbol,
|
||||
&leaf_entry);
|
||||
|
||||
if (!leaf_entry.is_reusable) {
|
||||
LOG_ACTION("cant_reuse_first_leaf tree:%s, leaf:%s",
|
||||
SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (tree->child_count == 1 && leaf_entry.depends_on_lookahead) {
|
||||
LOG_ACTION("cant_reuse_lookahead_dependent tree:%s, leaf:%s", SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol));
|
||||
return false;
|
||||
}
|
||||
} else if (entry.depends_on_lookahead) {
|
||||
LOG_ACTION("cant_reuse_lookahead_dependent tree:%s", SYM_NAME(tree->symbol));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static TSTree *ts_parser__lex(TSParser *self, TSStateId state, bool error_mode) {
|
||||
static TSTree *ts_parser__lex(TSParser *self, TSStateId parse_state,
|
||||
bool error_mode) {
|
||||
TSStateId state = error_mode ? 0 : self->language->lex_states[parse_state];
|
||||
LOG("lex state:%d", state);
|
||||
|
||||
TSLength position = self->lexer.current_position;
|
||||
|
||||
ts_lexer_start(&self->lexer, state);
|
||||
|
|
@ -247,6 +283,7 @@ static TSTree *ts_parser__lex(TSParser *self, TSStateId state, bool error_mode)
|
|||
|
||||
TSTree *result;
|
||||
if (lex_result.symbol == ts_builtin_sym_error) {
|
||||
LOG("accept_error_token");
|
||||
result = ts_tree_make_error(lex_result.size, lex_result.padding,
|
||||
lex_result.first_unexpected_character);
|
||||
} else {
|
||||
|
|
@ -254,14 +291,12 @@ static TSTree *ts_parser__lex(TSParser *self, TSStateId state, bool error_mode)
|
|||
result = ts_tree_make_leaf(
|
||||
lex_result.symbol, lex_result.padding, lex_result.size,
|
||||
ts_language_symbol_metadata(self->language, lex_result.symbol));
|
||||
if (!result)
|
||||
return NULL;
|
||||
result->parse_state = parse_state;
|
||||
result->first_leaf.lex_state = state;
|
||||
}
|
||||
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
if (lex_result.is_fragile)
|
||||
result->lex_state = state;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -284,21 +319,20 @@ static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version,
|
|||
if (reusable_node->tree->child_count == 0)
|
||||
ts_parser__breakdown_top_of_stack(self, version);
|
||||
|
||||
LOG("breakdown_changed sym:%s", SYM_NAME(reusable_node->tree->symbol));
|
||||
LOG_ACTION("breakdown_changed sym:%s", SYM_NAME(reusable_node->tree->symbol));
|
||||
ts_parser__breakdown_reusable_node(reusable_node);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!ts_parser__can_reuse(self, version, reusable_node->tree)) {
|
||||
LOG("breakdown_unreusable sym:%s", SYM_NAME(reusable_node->tree->symbol));
|
||||
ts_parser__breakdown_reusable_node(reusable_node);
|
||||
continue;
|
||||
}
|
||||
|
||||
TSTree *result = reusable_node->tree;
|
||||
TSLength size = ts_tree_total_size(result);
|
||||
LOG("reuse sym:%s size:%lu extra:%d", SYM_NAME(result->symbol), size.chars,
|
||||
result->extra);
|
||||
LOG_ACTION("reuse sym:%s size:%lu extra:%d", SYM_NAME(result->symbol),
|
||||
size.chars, result->extra);
|
||||
ts_parser__pop_reusable_node(reusable_node);
|
||||
ts_tree_retain(result);
|
||||
return result;
|
||||
|
|
@ -307,9 +341,7 @@ static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version,
|
|||
ts_lexer_reset(&self->lexer, position);
|
||||
TSStateId parse_state = ts_stack_top_state(self->stack, version);
|
||||
bool error_mode = parse_state == ts_parse_state_error;
|
||||
TSStateId lex_state = error_mode ? 0 : self->language->lex_states[parse_state];
|
||||
LOG("lex state:%d", lex_state);
|
||||
return ts_parser__lex(self, lex_state, error_mode);
|
||||
return ts_parser__lex(self, parse_state, error_mode);
|
||||
}
|
||||
|
||||
static bool ts_parser__select_tree(TSParser *self, TSTree *left, TSTree *right) {
|
||||
|
|
@ -487,7 +519,7 @@ static Reduction ts_parser__reduce(TSParser *self, StackVersion version,
|
|||
return (Reduction){ ReduceSucceeded, pop.slices.contents[0] };
|
||||
|
||||
error:
|
||||
return (Reduction){ ReduceFailed };
|
||||
return (Reduction){ ReduceFailed, {} };
|
||||
}
|
||||
|
||||
static inline const TSParseAction *ts_parser__reductions_after_sequence(
|
||||
|
|
|
|||
|
|
@ -28,15 +28,13 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, TSLength padding, TSLength size,
|
|||
.padding = padding,
|
||||
.visible = metadata.visible,
|
||||
.named = metadata.named,
|
||||
.lex_state = TS_TREE_STATE_INDEPENDENT,
|
||||
.parse_state = TS_TREE_STATE_INDEPENDENT,
|
||||
.first_leaf =
|
||||
{
|
||||
.symbol = sym, .lex_state = TS_TREE_STATE_INDEPENDENT,
|
||||
},
|
||||
};
|
||||
|
||||
if (sym == ts_builtin_sym_error) {
|
||||
result->fragile_left = true;
|
||||
result->fragile_right = true;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -81,6 +79,8 @@ TSTree *ts_tree_make_error(TSLength size, TSLength padding, char lookahead_char)
|
|||
if (!result)
|
||||
return NULL;
|
||||
|
||||
result->fragile_left = true;
|
||||
result->fragile_right = true;
|
||||
result->lookahead_char = lookahead_char;
|
||||
return result;
|
||||
}
|
||||
|
|
@ -174,7 +174,7 @@ void ts_tree_set_children(TSTree *self, size_t child_count, TSTree **children) {
|
|||
}
|
||||
|
||||
if (child_count > 0) {
|
||||
self->lex_state = children[0]->lex_state;
|
||||
self->first_leaf = children[0]->first_leaf;
|
||||
if (children[0]->fragile_left)
|
||||
self->fragile_left = true;
|
||||
if (children[child_count - 1]->fragile_right)
|
||||
|
|
@ -206,9 +206,16 @@ TSTree *ts_tree_make_error_node(TreeArray *children) {
|
|||
}
|
||||
}
|
||||
|
||||
return ts_tree_make_node(
|
||||
TSTree *result = ts_tree_make_node(
|
||||
ts_builtin_sym_error, children->size, children->contents,
|
||||
(TSSymbolMetadata){.extra = false, .visible = true, .named = true });
|
||||
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
result->fragile_left = true;
|
||||
result->fragile_right = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
void ts_tree_retain(TSTree *self) {
|
||||
|
|
|
|||
|
|
@ -32,10 +32,14 @@ typedef struct TSTree {
|
|||
TSLength size;
|
||||
|
||||
TSSymbol symbol;
|
||||
TSStateId lex_state;
|
||||
TSStateId parse_state;
|
||||
size_t error_size;
|
||||
|
||||
struct {
|
||||
TSSymbol symbol;
|
||||
TSStateId lex_state;
|
||||
} first_leaf;
|
||||
|
||||
unsigned short ref_count;
|
||||
bool visible : 1;
|
||||
bool named : 1;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue