Merge pull request #32 from tree-sitter/parse-table-compression
Parse table compression
This commit is contained in:
commit
4feceabd58
17 changed files with 324 additions and 196 deletions
35
spec/helpers/random_helpers.cc
Normal file
35
spec/helpers/random_helpers.cc
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
#include <string>
|
||||
#include <stdlib.h>
|
||||
|
||||
using std::string;
|
||||
|
||||
static string random_string(char min, char max) {
|
||||
string result;
|
||||
size_t length = random() % 12;
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
char inserted_char = min + (random() % (max - min));
|
||||
result += inserted_char;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static string random_char(string characters) {
|
||||
size_t index = random() % characters.size();
|
||||
return string() + characters[index];
|
||||
}
|
||||
|
||||
string random_words(size_t count) {
|
||||
string result;
|
||||
bool just_inserted_word = false;
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
if (random() % 10 < 6) {
|
||||
result += random_char("!(){}[]<>+-=");
|
||||
} else {
|
||||
if (just_inserted_word)
|
||||
result += " ";
|
||||
result += random_string('a', 'z');
|
||||
just_inserted_word = true;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
8
spec/helpers/random_helpers.h
Normal file
8
spec/helpers/random_helpers.h
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
#ifndef HELPERS_RANDOM_HELPERS_H_
|
||||
#define HELPERS_RANDOM_HELPERS_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
std::string random_words(size_t count);
|
||||
|
||||
#endif // HELPERS_RANDOM_HELPERS_H_
|
||||
|
|
@ -7,6 +7,7 @@
|
|||
#include "helpers/point_helpers.h"
|
||||
#include "helpers/encoding_helpers.h"
|
||||
#include "helpers/record_alloc.h"
|
||||
#include "helpers/random_helpers.h"
|
||||
#include <set>
|
||||
|
||||
static void expect_the_correct_tree(TSNode node, TSDocument *document, string tree_string) {
|
||||
|
|
@ -60,37 +61,6 @@ static void expect_a_consistent_tree(TSNode node, TSDocument *document) {
|
|||
AssertThat(has_changes, Equals(some_child_has_changes));
|
||||
}
|
||||
|
||||
static string random_string(char min, char max) {
|
||||
string result;
|
||||
size_t length = random() % 12;
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
char inserted_char = min + (random() % (max - min));
|
||||
result += inserted_char;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static string random_char(string characters) {
|
||||
size_t index = random() % characters.size();
|
||||
return string() + characters[index];
|
||||
}
|
||||
|
||||
static string random_words(size_t count) {
|
||||
string result;
|
||||
bool just_inserted_word = false;
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
if (random() % 10 < 6) {
|
||||
result += random_char("!(){}[]<>+-=");
|
||||
} else {
|
||||
if (just_inserted_word)
|
||||
result += " ";
|
||||
result += random_string('a', 'z');
|
||||
just_inserted_word = true;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("The Corpus", []() {
|
||||
|
|
|
|||
|
|
@ -173,7 +173,7 @@ describe("Document", [&]() {
|
|||
|
||||
AssertThat(debugger->messages, Contains("new_parse"));
|
||||
AssertThat(debugger->messages, Contains("lookahead char:'['"));
|
||||
AssertThat(debugger->messages, Contains("reduce sym:array, child_count:4, fragile:false"));
|
||||
AssertThat(debugger->messages, Contains("reduce sym:array, child_count:4"));
|
||||
AssertThat(debugger->messages, Contains("accept"));
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -224,14 +224,14 @@ describe("Parser", [&]() {
|
|||
"(identifier) "
|
||||
"(math_op (number) (identifier)))))");
|
||||
|
||||
insert_text(strlen("x ^ (100 + abc"), ".d");
|
||||
insert_text(strlen("x * (100 + abc"), ".d");
|
||||
|
||||
assert_root_node(
|
||||
"(program (expression_statement (math_op "
|
||||
"(identifier) "
|
||||
"(math_op (number) (member_access (identifier) (identifier))))))");
|
||||
|
||||
AssertThat(input->strings_read, Equals(vector<string>({ " abc.d);", "" })));
|
||||
AssertThat(input->strings_read, Equals(vector<string>({ " + abc.d)", "" })));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -147,7 +147,7 @@ class LexTableBuilder {
|
|||
}
|
||||
|
||||
auto replacements =
|
||||
remove_duplicate_states<LexState, AdvanceAction>(&lex_table.states);
|
||||
remove_duplicate_states<LexTable, AdvanceAction>(&lex_table);
|
||||
|
||||
for (ParseState &parse_state : parse_table->states) {
|
||||
auto replacement = replacements.find(parse_state.lex_state_id);
|
||||
|
|
|
|||
|
|
@ -70,6 +70,11 @@ class ParseTableBuilder {
|
|||
if (error.type != TSCompileErrorTypeNone)
|
||||
return { parse_table, error };
|
||||
|
||||
for (const ParseState &state : parse_table.states)
|
||||
for (const auto &pair1 : state.entries)
|
||||
for (const auto &pair2 : state.entries)
|
||||
parse_table.symbols[pair1.first].compatible_symbols.insert(pair2.first);
|
||||
|
||||
build_error_parse_state();
|
||||
|
||||
allow_any_conflict = true;
|
||||
|
|
@ -110,13 +115,12 @@ class ParseTableBuilder {
|
|||
void build_error_parse_state() {
|
||||
ParseState error_state;
|
||||
|
||||
for (const Symbol &symbol : recovery_tokens(lexical_grammar)) {
|
||||
for (const Symbol &symbol : recovery_tokens(lexical_grammar))
|
||||
add_out_of_context_parse_state(&error_state, symbol);
|
||||
}
|
||||
|
||||
for (const Symbol &symbol : grammar.extra_tokens) {
|
||||
error_state.entries[symbol].actions.push_back(ParseAction::ShiftExtra());
|
||||
}
|
||||
for (const Symbol &symbol : grammar.extra_tokens)
|
||||
if (!error_state.entries.count(symbol))
|
||||
error_state.entries[symbol].actions.push_back(ParseAction::ShiftExtra());
|
||||
|
||||
for (size_t i = 0; i < grammar.variables.size(); i++) {
|
||||
Symbol symbol(i, false);
|
||||
|
|
@ -134,8 +138,7 @@ class ParseTableBuilder {
|
|||
const ParseItemSet &item_set = recovery_states[symbol];
|
||||
if (!item_set.entries.empty()) {
|
||||
ParseStateId state = add_parse_state(item_set);
|
||||
error_state->entries[symbol].actions.push_back(
|
||||
ParseAction::Recover(state));
|
||||
error_state->entries[symbol].actions.push_back(ParseAction::Recover(state));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -193,8 +196,13 @@ class ParseTableBuilder {
|
|||
|
||||
void add_shift_extra_actions(ParseStateId state_id) {
|
||||
ParseAction action = ParseAction::ShiftExtra();
|
||||
ParseState &state = parse_table.states[state_id];
|
||||
for (const Symbol &extra_symbol : grammar.extra_tokens)
|
||||
add_action(state_id, extra_symbol, action, null_item_set);
|
||||
if (!state.entries.count(extra_symbol) ||
|
||||
(allow_any_conflict &&
|
||||
state.entries[extra_symbol].actions.back().type ==
|
||||
ParseActionTypeReduce))
|
||||
parse_table.add_action(state_id, extra_symbol, action);
|
||||
}
|
||||
|
||||
void add_reduce_extra_actions(ParseStateId state_id) {
|
||||
|
|
@ -268,7 +276,7 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
void remove_duplicate_parse_states() {
|
||||
remove_duplicate_states<ParseState, ParseAction>(&parse_table.states);
|
||||
remove_duplicate_states<ParseTable, ParseAction>(&parse_table);
|
||||
}
|
||||
|
||||
ParseAction *add_action(ParseStateId state_id, Symbol lookahead,
|
||||
|
|
|
|||
|
|
@ -7,15 +7,15 @@
|
|||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
template <typename StateType, typename ActionType>
|
||||
std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states) {
|
||||
template <typename TableType, typename ActionType>
|
||||
std::map<size_t, size_t> remove_duplicate_states(TableType *table) {
|
||||
std::map<size_t, size_t> replacements;
|
||||
|
||||
while (true) {
|
||||
std::map<size_t, size_t> duplicates;
|
||||
for (size_t i = 0, size = states->size(); i < size; i++)
|
||||
for (size_t i = 0, size = table->states.size(); i < size; i++)
|
||||
for (size_t j = 0; j < i; j++)
|
||||
if (states->at(i) == states->at(j)) {
|
||||
if (!duplicates.count(j) && table->merge_state(j, i)) {
|
||||
duplicates.insert({ i, j });
|
||||
break;
|
||||
}
|
||||
|
|
@ -24,7 +24,7 @@ std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states)
|
|||
break;
|
||||
|
||||
std::map<size_t, size_t> new_replacements;
|
||||
for (size_t i = 0, size = states->size(); i < size; i++) {
|
||||
for (size_t i = 0, size = table->states.size(); i < size; i++) {
|
||||
size_t new_state_index = i;
|
||||
auto duplicate = duplicates.find(i);
|
||||
if (duplicate != duplicates.end())
|
||||
|
|
@ -45,7 +45,7 @@ std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states)
|
|||
replacement.second = new_state_index;
|
||||
}
|
||||
|
||||
for (StateType &state : *states)
|
||||
for (auto &state : table->states)
|
||||
state.each_advance_action([&new_replacements](ActionType *action) {
|
||||
auto new_replacement = new_replacements.find(action->state_index);
|
||||
if (new_replacement != new_replacements.end())
|
||||
|
|
@ -53,7 +53,7 @@ std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states)
|
|||
});
|
||||
|
||||
for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i)
|
||||
states->erase(states->begin() + i->first);
|
||||
table->states.erase(table->states.begin() + i->first);
|
||||
}
|
||||
|
||||
return replacements;
|
||||
|
|
|
|||
|
|
@ -165,6 +165,9 @@ class CCodeGenerator {
|
|||
line(".named = false,");
|
||||
break;
|
||||
case VariableTypeHidden:
|
||||
line(".visible = false,");
|
||||
line(".named = true,");
|
||||
break;
|
||||
case VariableTypeAuxiliary:
|
||||
line(".visible = false,");
|
||||
line(".named = false,");
|
||||
|
|
@ -211,7 +214,7 @@ class CCodeGenerator {
|
|||
}
|
||||
|
||||
void add_parse_table() {
|
||||
add_parse_action_list_id(ParseTableEntry{ {}, true, false });
|
||||
add_parse_action_list_id(ParseTableEntry{ {}, false, false });
|
||||
|
||||
size_t state_id = 0;
|
||||
line("#pragma GCC diagnostic push");
|
||||
|
|
|
|||
|
|
@ -71,4 +71,8 @@ LexState &LexTable::state(LexStateId id) {
|
|||
return states[id];
|
||||
}
|
||||
|
||||
bool LexTable::merge_state(size_t i, size_t j) {
|
||||
return states[i] == states[j];
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ struct AdvanceAction {
|
|||
AdvanceAction();
|
||||
AdvanceAction(size_t, PrecedenceRange, bool);
|
||||
|
||||
bool operator==(const AdvanceAction &action) const;
|
||||
bool operator==(const AdvanceAction &other) const;
|
||||
|
||||
size_t state_index;
|
||||
PrecedenceRange precedence_range;
|
||||
|
|
@ -66,6 +66,8 @@ class LexTable {
|
|||
LexStateId add_state();
|
||||
LexState &state(LexStateId state_id);
|
||||
std::vector<LexState> states;
|
||||
|
||||
bool merge_state(size_t i, size_t j);
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -190,4 +190,51 @@ ParseAction &ParseTable::add_action(ParseStateId id, Symbol symbol,
|
|||
return *state.entries[symbol].actions.rbegin();
|
||||
}
|
||||
|
||||
static bool has_entry(const ParseState &state, const ParseTableEntry &entry) {
|
||||
for (const auto &pair : state.entries)
|
||||
if (pair.second == entry)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ParseTable::merge_state(size_t i, size_t j) {
|
||||
ParseState &state = states[i];
|
||||
ParseState &other = states[j];
|
||||
|
||||
for (auto &entry : state.entries) {
|
||||
const Symbol &symbol = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
const auto &other_entry = other.entries.find(symbol);
|
||||
if (other_entry == other.entries.end()) {
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(other, entry.second))
|
||||
return false;
|
||||
} else if (entry.second != other_entry->second) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
set<Symbol> symbols_to_merge;
|
||||
|
||||
for (auto &entry : other.entries) {
|
||||
const Symbol &symbol = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
if (!state.entries.count(symbol)) {
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(state, entry.second))
|
||||
return false;
|
||||
symbols_to_merge.insert(symbol);
|
||||
}
|
||||
}
|
||||
|
||||
for (const Symbol &symbol : symbols_to_merge)
|
||||
state.entries[symbol] = other.entries.find(symbol)->second;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -61,6 +61,10 @@ struct ParseTableEntry {
|
|||
ParseTableEntry();
|
||||
ParseTableEntry(const std::vector<ParseAction> &, bool, bool);
|
||||
bool operator==(const ParseTableEntry &other) const;
|
||||
|
||||
inline bool operator!=(const ParseTableEntry &other) const {
|
||||
return !operator==(other);
|
||||
}
|
||||
};
|
||||
|
||||
class ParseState {
|
||||
|
|
@ -68,6 +72,7 @@ class ParseState {
|
|||
ParseState();
|
||||
std::set<rules::Symbol> expected_inputs() const;
|
||||
bool operator==(const ParseState &) const;
|
||||
bool merge(const ParseState &);
|
||||
void each_advance_action(std::function<void(ParseAction *)>);
|
||||
|
||||
std::map<rules::Symbol, ParseTableEntry> entries;
|
||||
|
|
@ -77,6 +82,7 @@ class ParseState {
|
|||
struct ParseTableSymbolMetadata {
|
||||
bool extra;
|
||||
bool structural;
|
||||
std::set<rules::Symbol> compatible_symbols;
|
||||
};
|
||||
|
||||
class ParseTable {
|
||||
|
|
@ -87,6 +93,7 @@ class ParseTable {
|
|||
ParseAction action);
|
||||
ParseAction &add_action(ParseStateId state_id, rules::Symbol symbol,
|
||||
ParseAction action);
|
||||
bool merge_state(size_t i, size_t j);
|
||||
|
||||
std::vector<ParseState> states;
|
||||
std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ static inline size_t ts_node__offset_row(TSNode self) {
|
|||
|
||||
static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) {
|
||||
const TSTree *tree = ts_node__tree(self);
|
||||
return include_anonymous ? tree->visible : tree->named;
|
||||
return include_anonymous ? tree->visible : tree->visible && tree->named;
|
||||
}
|
||||
|
||||
static inline size_t ts_node__relevant_child_count(TSNode self,
|
||||
|
|
|
|||
|
|
@ -49,11 +49,6 @@
|
|||
|
||||
static const unsigned ERROR_COST_THRESHOLD = 3;
|
||||
|
||||
typedef struct {
|
||||
TSTree *tree;
|
||||
size_t char_index;
|
||||
} ReusableNode;
|
||||
|
||||
typedef struct {
|
||||
TSParser *parser;
|
||||
TSSymbol lookahead_symbol;
|
||||
|
|
@ -112,8 +107,6 @@ static BreakdownResult ts_parser__breakdown_top_of_stack(TSParser *self,
|
|||
TSStateId state = ts_stack_top_state(self->stack, slice.version);
|
||||
TSTree *parent = *array_front(&slice.trees);
|
||||
|
||||
LOG("breakdown_top_of_stack tree:%s", SYM_NAME(parent->symbol));
|
||||
|
||||
for (size_t j = 0; j < parent->child_count; j++) {
|
||||
TSTree *child = parent->children[j];
|
||||
pending = child->child_count > 0;
|
||||
|
|
@ -136,6 +129,9 @@ static BreakdownResult ts_parser__breakdown_top_of_stack(TSParser *self,
|
|||
CHECK(ts_parser__push(self, slice.version, tree, state));
|
||||
}
|
||||
|
||||
LOG("breakdown_top_of_stack tree:%s", SYM_NAME(parent->symbol));
|
||||
LOG_STACK();
|
||||
|
||||
ts_tree_release(parent);
|
||||
array_delete(&slice.trees);
|
||||
}
|
||||
|
|
@ -161,8 +157,7 @@ static void ts_parser__pop_reusable_node(ReusableNode *reusable_node) {
|
|||
}
|
||||
|
||||
static bool ts_parser__breakdown_reusable_node(ReusableNode *reusable_node) {
|
||||
if (reusable_node->tree->symbol == ts_builtin_sym_error ||
|
||||
reusable_node->tree->child_count == 0) {
|
||||
if (reusable_node->tree->child_count == 0) {
|
||||
return false;
|
||||
} else {
|
||||
reusable_node->tree = reusable_node->tree->children[0];
|
||||
|
|
@ -170,10 +165,48 @@ static bool ts_parser__breakdown_reusable_node(ReusableNode *reusable_node) {
|
|||
}
|
||||
}
|
||||
|
||||
static bool ts_parser__breakdown_lookahead(TSParser *self, TSTree **lookahead, TSStateId state, ReusableNode *reusable_node) {
|
||||
bool result = false;
|
||||
while (reusable_node->tree->child_count > 0 &&
|
||||
(reusable_node->tree->parse_state != state ||
|
||||
reusable_node->tree->fragile_left ||
|
||||
reusable_node->tree->fragile_right)) {
|
||||
LOG("state_mismatch sym:%s", SYM_NAME(reusable_node->tree->symbol));
|
||||
ts_parser__breakdown_reusable_node(reusable_node);
|
||||
result = true;
|
||||
}
|
||||
|
||||
if (result) {
|
||||
LOG("lookahead sym:%s", SYM_NAME(reusable_node->tree->symbol));
|
||||
ts_tree_release(*lookahead);
|
||||
ts_tree_retain(*lookahead = reusable_node->tree);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void ts_parser__pop_reusable_node_leaf(ReusableNode *reusable_node) {
|
||||
while (reusable_node->tree->child_count > 0)
|
||||
reusable_node->tree = reusable_node->tree->children[0];
|
||||
ts_parser__pop_reusable_node(reusable_node);
|
||||
}
|
||||
|
||||
static bool ts_parser__can_reuse(TSParser *self, TSStateId state, TSTree *tree,
|
||||
TableEntry *table_entry) {
|
||||
if (tree->first_leaf.lex_state == self->language->lex_states[state])
|
||||
return true;
|
||||
if (!table_entry->is_reusable)
|
||||
return false;
|
||||
if (!table_entry->depends_on_lookahead)
|
||||
return true;
|
||||
return tree->child_count > 1 && tree->error_size == 0;
|
||||
}
|
||||
|
||||
static bool ts_parser__condense_stack(TSParser *self) {
|
||||
bool result = false;
|
||||
for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
|
||||
if (ts_stack_is_halted(self->stack, i)) {
|
||||
result = true;
|
||||
ts_stack_remove_version(self->stack, i);
|
||||
i--;
|
||||
continue;
|
||||
|
|
@ -197,73 +230,6 @@ static bool ts_parser__condense_stack(TSParser *self) {
|
|||
return result;
|
||||
}
|
||||
|
||||
static bool ts_parser__can_reuse(TSParser *self, StackVersion version,
|
||||
TSTree *tree) {
|
||||
if (tree->symbol == ts_builtin_sym_error) {
|
||||
LOG("cant_reuse_error tree:%s", SYM_NAME(tree->symbol));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (tree->has_changes) {
|
||||
LOG("cant_reuse_changed tree:%s", SYM_NAME(tree->symbol));
|
||||
return false;
|
||||
}
|
||||
|
||||
TSStateId state = ts_stack_top_state(self->stack, version);
|
||||
if (tree->parse_state != state) {
|
||||
if (ts_tree_is_fragile(tree)) {
|
||||
LOG("cant_reuse_fragile sym:%s", SYM_NAME(tree->symbol));
|
||||
return false;
|
||||
}
|
||||
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self->language, state, tree->symbol, &entry);
|
||||
|
||||
if (!entry.is_reusable) {
|
||||
LOG("cant_reuse_ambiguous sym:%s", SYM_NAME(tree->symbol));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (entry.action_count == 0) {
|
||||
LOG("cant_reuse_unexpected sym:%s", SYM_NAME(tree->symbol));
|
||||
return false;
|
||||
}
|
||||
|
||||
TSParseAction action = entry.actions[entry.action_count - 1];
|
||||
if (tree->extra != action.extra) {
|
||||
LOG("cant_reuse_extra sym:%s", SYM_NAME(tree->symbol));
|
||||
return false;
|
||||
}
|
||||
|
||||
TSStateId lex_state = self->language->lex_states[state];
|
||||
if (tree->first_leaf.lex_state != lex_state) {
|
||||
if (tree->child_count > 0) {
|
||||
TableEntry leaf_entry;
|
||||
ts_language_table_entry(self->language, state, tree->first_leaf.symbol,
|
||||
&leaf_entry);
|
||||
|
||||
if (!leaf_entry.is_reusable) {
|
||||
LOG("cant_reuse_first_leaf sym:%s, leaf_sym:%s",
|
||||
SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (tree->child_count == 1 && leaf_entry.depends_on_lookahead) {
|
||||
LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s",
|
||||
SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol));
|
||||
return false;
|
||||
}
|
||||
} else if (entry.depends_on_lookahead) {
|
||||
LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s",
|
||||
SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static TSTree *ts_parser__lex(TSParser *self, TSStateId parse_state,
|
||||
bool error_mode) {
|
||||
TSStateId state = self->language->lex_states[parse_state];
|
||||
|
|
@ -292,28 +258,51 @@ static TSTree *ts_parser__lex(TSParser *self, TSStateId parse_state,
|
|||
if (!result)
|
||||
return NULL;
|
||||
result->parse_state = parse_state;
|
||||
result->first_leaf.lex_state = state;
|
||||
}
|
||||
|
||||
result->first_leaf.lex_state = state;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void ts_parser__clear_cached_token(TSParser *self) {
|
||||
ts_tree_release(self->cached_token);
|
||||
self->cached_token = NULL;
|
||||
}
|
||||
|
||||
static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version,
|
||||
ReusableNode *reusable_node) {
|
||||
TSLength position = ts_stack_top_position(self->stack, version);
|
||||
|
||||
while (reusable_node->tree) {
|
||||
if (reusable_node->char_index > position.chars) {
|
||||
LOG("before_reusable sym:%s, pos:%lu",
|
||||
SYM_NAME(reusable_node->tree->symbol), reusable_node->char_index);
|
||||
break;
|
||||
}
|
||||
|
||||
if (reusable_node->char_index < position.chars) {
|
||||
LOG("past_reusable sym:%s", SYM_NAME(reusable_node->tree->symbol));
|
||||
LOG("past_reusable sym:%s, pos:%lu",
|
||||
SYM_NAME(reusable_node->tree->symbol), reusable_node->char_index);
|
||||
ts_parser__pop_reusable_node(reusable_node);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!ts_parser__can_reuse(self, version, reusable_node->tree)) {
|
||||
if (reusable_node->tree->has_changes) {
|
||||
LOG("cant_reuse_changed tree:%s, size:%lu",
|
||||
SYM_NAME(reusable_node->tree->symbol),
|
||||
reusable_node->tree->size.chars);
|
||||
if (!ts_parser__breakdown_reusable_node(reusable_node)) {
|
||||
ts_parser__pop_reusable_node(reusable_node);
|
||||
CHECK(ts_parser__breakdown_top_of_stack(self, version));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (reusable_node->tree->symbol == ts_builtin_sym_error) {
|
||||
LOG("cant_reuse_error tree:%s, size:%lu",
|
||||
SYM_NAME(reusable_node->tree->symbol),
|
||||
reusable_node->tree->size.chars);
|
||||
if (!ts_parser__breakdown_reusable_node(reusable_node)) {
|
||||
ts_parser__pop_reusable_node(reusable_node);
|
||||
CHECK(ts_parser__breakdown_top_of_stack(self, version));
|
||||
|
|
@ -322,14 +311,15 @@ static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version,
|
|||
}
|
||||
|
||||
TSTree *result = reusable_node->tree;
|
||||
TSLength size = ts_tree_total_size(result);
|
||||
LOG("reuse sym:%s size:%lu extra:%d", SYM_NAME(result->symbol), size.chars,
|
||||
result->extra);
|
||||
ts_parser__pop_reusable_node(reusable_node);
|
||||
ts_tree_retain(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
if (self->cached_token && position.chars == self->cached_token_char_index) {
|
||||
ts_tree_retain(self->cached_token);
|
||||
return self->cached_token;
|
||||
}
|
||||
|
||||
ts_lexer_reset(&self->lexer, position);
|
||||
TSStateId parse_state = ts_stack_top_state(self->stack, version);
|
||||
bool error_mode = parse_state == TS_STATE_ERROR;
|
||||
|
|
@ -497,7 +487,7 @@ static Reduction ts_parser__reduce(TSParser *self, StackVersion version,
|
|||
}
|
||||
|
||||
TSStateId state = ts_stack_top_state(self->stack, slice.version);
|
||||
if (fragile || self->is_split || initial_version_count > 1) {
|
||||
if (fragile || self->is_split || pop.slices.size > 1 || initial_version_count > 1) {
|
||||
parent->fragile_left = true;
|
||||
parent->fragile_right = true;
|
||||
parent->parse_state = TS_TREE_STATE_NONE;
|
||||
|
|
@ -682,13 +672,13 @@ static StackIterateAction ts_parser__error_repair_callback(
|
|||
}
|
||||
|
||||
static RepairResult ts_parser__repair_error(TSParser *self, StackSlice slice,
|
||||
TSTree *lookahead,
|
||||
TSSymbol lookahead_symbol,
|
||||
const TSParseAction *actions,
|
||||
size_t action_count) {
|
||||
LOG("repair_error");
|
||||
ErrorRepairSession session = {
|
||||
.parser = self,
|
||||
.lookahead_symbol = lookahead->symbol,
|
||||
.lookahead_symbol = lookahead_symbol,
|
||||
.found_repair = false,
|
||||
.trees_above_error = &slice.trees,
|
||||
.tree_count_above_error = ts_tree_array_essential_count(&slice.trees),
|
||||
|
|
@ -784,7 +774,8 @@ static void ts_parser__start(TSParser *self, TSInput input,
|
|||
|
||||
ts_lexer_set_input(&self->lexer, input);
|
||||
ts_stack_clear(self->stack);
|
||||
|
||||
self->reusable_node = (ReusableNode){ previous_tree, 0 };
|
||||
self->cached_token = NULL;
|
||||
self->finished_tree = NULL;
|
||||
}
|
||||
|
||||
|
|
@ -819,6 +810,7 @@ static bool ts_parser__accept(TSParser *self, StackVersion version) {
|
|||
|
||||
if (ts_parser__select_tree(self, self->finished_tree, root)) {
|
||||
ts_tree_release(self->finished_tree);
|
||||
assert(root->ref_count > 0);
|
||||
self->finished_tree = root;
|
||||
} else {
|
||||
ts_tree_release(root);
|
||||
|
|
@ -834,9 +826,9 @@ error:
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool ts_parser__handle_error(TSParser *self, StackVersion version,
|
||||
TSStateId state, TSTree *lookahead) {
|
||||
static bool ts_parser__handle_error(TSParser *self, StackVersion version) {
|
||||
size_t previous_version_count = ts_stack_version_count(self->stack);
|
||||
TSStateId state = ts_stack_top_state(self->stack, version);
|
||||
|
||||
unsigned error_cost = ts_stack_error_cost(self->stack, version);
|
||||
unsigned error_depth = ts_stack_error_depth(self->stack, version) + 1;
|
||||
|
|
@ -944,22 +936,59 @@ error:
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version,
|
||||
TSTree *lookahead) {
|
||||
static bool ts_parser__advance(TSParser *self, StackVersion version,
|
||||
ReusableNode *reusable_node) {
|
||||
bool validated_lookahead = false;
|
||||
TSTree *lookahead = ts_parser__get_lookahead(self, version, reusable_node);
|
||||
CHECK(lookahead);
|
||||
|
||||
for (;;) {
|
||||
TSStateId state = ts_stack_top_state(self->stack, version);
|
||||
|
||||
TableEntry table_entry;
|
||||
ts_language_table_entry(self->language, state, lookahead->first_leaf.symbol,
|
||||
&table_entry);
|
||||
|
||||
if (!validated_lookahead) {
|
||||
if (!ts_parser__can_reuse(self, state, lookahead, &table_entry)) {
|
||||
if (lookahead == reusable_node->tree)
|
||||
ts_parser__pop_reusable_node_leaf(reusable_node);
|
||||
else
|
||||
ts_parser__clear_cached_token(self);
|
||||
|
||||
ts_tree_release(lookahead);
|
||||
lookahead = ts_parser__get_lookahead(self, version, reusable_node);
|
||||
CHECK(lookahead);
|
||||
continue;
|
||||
}
|
||||
|
||||
validated_lookahead = true;
|
||||
LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol),
|
||||
lookahead->size.chars);
|
||||
}
|
||||
|
||||
bool reduction_stopped_at_error = false;
|
||||
StackVersion last_reduction_version = STACK_VERSION_NONE;
|
||||
|
||||
size_t action_count;
|
||||
const TSParseAction *actions = ts_language_actions(
|
||||
self->language, state, lookahead->symbol, &action_count);
|
||||
|
||||
for (size_t i = 0; i < action_count; i++) {
|
||||
TSParseAction action = actions[i];
|
||||
for (size_t i = 0; i < table_entry.action_count; i++) {
|
||||
TSParseAction action = table_entry.actions[i];
|
||||
|
||||
switch (action.type) {
|
||||
case TSParseActionTypeShift: {
|
||||
if (lookahead->child_count > 0) {
|
||||
if (ts_parser__breakdown_lookahead(self, &lookahead, state, reusable_node)) {
|
||||
if (!ts_parser__can_reuse(self, state, lookahead, &table_entry)) {
|
||||
ts_parser__pop_reusable_node(reusable_node);
|
||||
ts_tree_release(lookahead);
|
||||
lookahead = ts_parser__get_lookahead(self, version, reusable_node);
|
||||
CHECK(lookahead);
|
||||
}
|
||||
}
|
||||
|
||||
action = *ts_language_last_action(self->language, state,
|
||||
lookahead->symbol);
|
||||
}
|
||||
|
||||
TSStateId next_state;
|
||||
if (action.extra) {
|
||||
next_state = state;
|
||||
|
|
@ -971,6 +1000,11 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version,
|
|||
|
||||
CHECK(ts_parser__shift(self, version, next_state, lookahead,
|
||||
action.extra));
|
||||
|
||||
if (lookahead == reusable_node->tree)
|
||||
ts_parser__pop_reusable_node(reusable_node);
|
||||
|
||||
ts_tree_release(lookahead);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -981,14 +1015,14 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version,
|
|||
if (action.extra) {
|
||||
LOG("reduce_extra");
|
||||
} else {
|
||||
LOG("reduce sym:%s, child_count:%u, fragile:%s",
|
||||
SYM_NAME(action.symbol), action.child_count,
|
||||
BOOL_STRING(action.fragile));
|
||||
LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.symbol),
|
||||
action.child_count);
|
||||
}
|
||||
|
||||
Reduction reduction =
|
||||
ts_parser__reduce(self, version, action.symbol, action.child_count,
|
||||
action.extra, action.fragile, true);
|
||||
action.extra, (i < table_entry.action_count - 1),
|
||||
true);
|
||||
|
||||
switch (reduction.status) {
|
||||
case ReduceFailed:
|
||||
|
|
@ -998,8 +1032,9 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version,
|
|||
break;
|
||||
case ReduceStoppedAtError: {
|
||||
reduction_stopped_at_error = true;
|
||||
switch (ts_parser__repair_error(self, reduction.slice, lookahead,
|
||||
actions, action_count)) {
|
||||
switch (ts_parser__repair_error(
|
||||
self, reduction.slice, lookahead->first_leaf.symbol,
|
||||
table_entry.actions, table_entry.action_count)) {
|
||||
case RepairFailed:
|
||||
goto error;
|
||||
case RepairNoneFound:
|
||||
|
|
@ -1020,20 +1055,35 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version,
|
|||
|
||||
LOG("accept");
|
||||
CHECK(ts_parser__accept(self, version));
|
||||
|
||||
ts_tree_release(lookahead);
|
||||
return true;
|
||||
}
|
||||
|
||||
case TSParseActionTypeRecover: {
|
||||
while (lookahead->child_count > 0) {
|
||||
ts_parser__breakdown_reusable_node(reusable_node);
|
||||
ts_tree_release(lookahead);
|
||||
lookahead = reusable_node->tree;
|
||||
ts_tree_retain(lookahead);
|
||||
}
|
||||
action =
|
||||
*ts_language_last_action(self->language, state, lookahead->symbol);
|
||||
|
||||
CHECK(ts_parser__recover(self, version, action.to_state, lookahead));
|
||||
|
||||
if (lookahead == reusable_node->tree)
|
||||
ts_parser__pop_reusable_node(reusable_node);
|
||||
|
||||
ts_tree_release(lookahead);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_STACK();
|
||||
}
|
||||
|
||||
if (last_reduction_version != STACK_VERSION_NONE) {
|
||||
ts_stack_renumber_version(self->stack, last_reduction_version, version);
|
||||
LOG_STACK();
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -1041,15 +1091,22 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version,
|
|||
case BreakdownFailed:
|
||||
goto error;
|
||||
case BreakdownPerformed:
|
||||
break;
|
||||
continue;
|
||||
case BreakdownAborted:
|
||||
CHECK(ts_parser__handle_error(self, version, state, lookahead));
|
||||
if (ts_stack_is_halted(self->stack, version))
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
|
||||
CHECK(ts_parser__handle_error(self, version));
|
||||
|
||||
if (ts_stack_is_halted(self->stack, version)) {
|
||||
ts_tree_release(lookahead);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
error:
|
||||
if (lookahead)
|
||||
ts_tree_release(lookahead);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -1087,16 +1144,14 @@ void ts_parser_destroy(TSParser *self) {
|
|||
|
||||
TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *old_tree) {
|
||||
ts_parser__start(self, input, old_tree);
|
||||
StackVersion version = 0;
|
||||
size_t last_position = 0, position = 0;
|
||||
ReusableNode reusable_node, current_reusable_node = { old_tree, 0 };
|
||||
|
||||
for (;;) {
|
||||
TSTree *lookahead = NULL;
|
||||
size_t lookahead_position = 0;
|
||||
StackVersion version = STACK_VERSION_NONE;
|
||||
size_t position = 0, last_position = 0;
|
||||
ReusableNode reusable_node;
|
||||
|
||||
do {
|
||||
for (version = 0; version < ts_stack_version_count(self->stack); version++) {
|
||||
reusable_node = current_reusable_node;
|
||||
reusable_node = self->reusable_node;
|
||||
last_position = position;
|
||||
|
||||
while (!ts_stack_is_halted(self->stack, version)) {
|
||||
|
|
@ -1111,44 +1166,25 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *old_tree) {
|
|||
ts_stack_top_position(self->stack, version).rows + 1,
|
||||
ts_stack_top_position(self->stack, version).columns + 1);
|
||||
|
||||
if (!lookahead || (position != lookahead_position) ||
|
||||
!ts_parser__can_reuse(self, version, lookahead)) {
|
||||
ts_tree_release(lookahead);
|
||||
lookahead = ts_parser__get_lookahead(self, version, &reusable_node);
|
||||
lookahead_position = position;
|
||||
CHECK(lookahead);
|
||||
}
|
||||
|
||||
LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol),
|
||||
ts_tree_total_chars(lookahead));
|
||||
|
||||
if (!ts_parser__consume_lookahead(self, version, lookahead)) {
|
||||
ts_tree_release(lookahead);
|
||||
goto error;
|
||||
}
|
||||
|
||||
CHECK(ts_parser__advance(self, version, &reusable_node));
|
||||
LOG_STACK();
|
||||
}
|
||||
}
|
||||
|
||||
current_reusable_node = reusable_node;
|
||||
self->reusable_node = reusable_node;
|
||||
|
||||
if (ts_parser__condense_stack(self)) {
|
||||
LOG("condense");
|
||||
LOG_STACK();
|
||||
}
|
||||
|
||||
ts_tree_release(lookahead);
|
||||
|
||||
if (version == 0)
|
||||
break;
|
||||
else
|
||||
self->is_split = (version > 1);
|
||||
}
|
||||
self->is_split = (version > 1);
|
||||
} while (version != 0);
|
||||
|
||||
LOG("done");
|
||||
LOG_TREE();
|
||||
ts_stack_clear(self->stack);
|
||||
ts_parser__clear_cached_token(self);
|
||||
ts_tree_assign_parents(self->finished_tree);
|
||||
return self->finished_tree;
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,11 @@ extern "C" {
|
|||
#include "runtime/array.h"
|
||||
#include "runtime/reduce_action.h"
|
||||
|
||||
typedef struct {
|
||||
TSTree *tree;
|
||||
size_t char_index;
|
||||
} ReusableNode;
|
||||
|
||||
typedef struct {
|
||||
TSLexer lexer;
|
||||
Stack *stack;
|
||||
|
|
@ -18,6 +23,9 @@ typedef struct {
|
|||
bool is_split;
|
||||
bool print_debugging_graphs;
|
||||
TSTree scratch_tree;
|
||||
TSTree *cached_token;
|
||||
size_t cached_token_char_index;
|
||||
ReusableNode reusable_node;
|
||||
} TSParser;
|
||||
|
||||
bool ts_parser_init(TSParser *);
|
||||
|
|
|
|||
|
|
@ -436,8 +436,8 @@ void ts_tree__print_dot_graph(const TSTree *self, size_t offset,
|
|||
if (self->extra)
|
||||
fprintf(f, ", fontcolor=gray");
|
||||
|
||||
fprintf(f, ", tooltip=\"%lu - %lu\"]\n", offset,
|
||||
offset + ts_tree_total_chars(self));
|
||||
fprintf(f, ", tooltip=\"range:%lu - %lu\nstate:%d\"]\n", offset,
|
||||
offset + ts_tree_total_chars(self), self->parse_state);
|
||||
for (size_t i = 0; i < self->child_count; i++) {
|
||||
const TSTree *child = self->children[i];
|
||||
ts_tree__print_dot_graph(child, offset, language, f);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue