Merge pull request #32 from tree-sitter/parse-table-compression

Parse table compression
This commit is contained in:
Max Brunsfeld 2016-07-31 21:45:46 -07:00 committed by GitHub
commit 4feceabd58
17 changed files with 324 additions and 196 deletions

View file

@ -0,0 +1,35 @@
#include <string>
#include <stdlib.h>
using std::string;
static string random_string(char min, char max) {
string result;
size_t length = random() % 12;
for (size_t i = 0; i < length; i++) {
char inserted_char = min + (random() % (max - min));
result += inserted_char;
}
return result;
}
static string random_char(string characters) {
size_t index = random() % characters.size();
return string() + characters[index];
}
string random_words(size_t count) {
string result;
bool just_inserted_word = false;
for (size_t i = 0; i < count; i++) {
if (random() % 10 < 6) {
result += random_char("!(){}[]<>+-=");
} else {
if (just_inserted_word)
result += " ";
result += random_string('a', 'z');
just_inserted_word = true;
}
}
return result;
}

View file

@ -0,0 +1,8 @@
#ifndef HELPERS_RANDOM_HELPERS_H_
#define HELPERS_RANDOM_HELPERS_H_
#include <string>
std::string random_words(size_t count);
#endif // HELPERS_RANDOM_HELPERS_H_

View file

@ -7,6 +7,7 @@
#include "helpers/point_helpers.h"
#include "helpers/encoding_helpers.h"
#include "helpers/record_alloc.h"
#include "helpers/random_helpers.h"
#include <set>
static void expect_the_correct_tree(TSNode node, TSDocument *document, string tree_string) {
@ -60,37 +61,6 @@ static void expect_a_consistent_tree(TSNode node, TSDocument *document) {
AssertThat(has_changes, Equals(some_child_has_changes));
}
static string random_string(char min, char max) {
string result;
size_t length = random() % 12;
for (size_t i = 0; i < length; i++) {
char inserted_char = min + (random() % (max - min));
result += inserted_char;
}
return result;
}
static string random_char(string characters) {
size_t index = random() % characters.size();
return string() + characters[index];
}
static string random_words(size_t count) {
string result;
bool just_inserted_word = false;
for (size_t i = 0; i < count; i++) {
if (random() % 10 < 6) {
result += random_char("!(){}[]<>+-=");
} else {
if (just_inserted_word)
result += " ";
result += random_string('a', 'z');
just_inserted_word = true;
}
}
return result;
}
START_TEST
describe("The Corpus", []() {

View file

@ -173,7 +173,7 @@ describe("Document", [&]() {
AssertThat(debugger->messages, Contains("new_parse"));
AssertThat(debugger->messages, Contains("lookahead char:'['"));
AssertThat(debugger->messages, Contains("reduce sym:array, child_count:4, fragile:false"));
AssertThat(debugger->messages, Contains("reduce sym:array, child_count:4"));
AssertThat(debugger->messages, Contains("accept"));
});

View file

@ -224,14 +224,14 @@ describe("Parser", [&]() {
"(identifier) "
"(math_op (number) (identifier)))))");
insert_text(strlen("x ^ (100 + abc"), ".d");
insert_text(strlen("x * (100 + abc"), ".d");
assert_root_node(
"(program (expression_statement (math_op "
"(identifier) "
"(math_op (number) (member_access (identifier) (identifier))))))");
AssertThat(input->strings_read, Equals(vector<string>({ " abc.d);", "" })));
AssertThat(input->strings_read, Equals(vector<string>({ " + abc.d)", "" })));
});
});

View file

@ -147,7 +147,7 @@ class LexTableBuilder {
}
auto replacements =
remove_duplicate_states<LexState, AdvanceAction>(&lex_table.states);
remove_duplicate_states<LexTable, AdvanceAction>(&lex_table);
for (ParseState &parse_state : parse_table->states) {
auto replacement = replacements.find(parse_state.lex_state_id);

View file

@ -70,6 +70,11 @@ class ParseTableBuilder {
if (error.type != TSCompileErrorTypeNone)
return { parse_table, error };
for (const ParseState &state : parse_table.states)
for (const auto &pair1 : state.entries)
for (const auto &pair2 : state.entries)
parse_table.symbols[pair1.first].compatible_symbols.insert(pair2.first);
build_error_parse_state();
allow_any_conflict = true;
@ -110,13 +115,12 @@ class ParseTableBuilder {
void build_error_parse_state() {
ParseState error_state;
for (const Symbol &symbol : recovery_tokens(lexical_grammar)) {
for (const Symbol &symbol : recovery_tokens(lexical_grammar))
add_out_of_context_parse_state(&error_state, symbol);
}
for (const Symbol &symbol : grammar.extra_tokens) {
error_state.entries[symbol].actions.push_back(ParseAction::ShiftExtra());
}
for (const Symbol &symbol : grammar.extra_tokens)
if (!error_state.entries.count(symbol))
error_state.entries[symbol].actions.push_back(ParseAction::ShiftExtra());
for (size_t i = 0; i < grammar.variables.size(); i++) {
Symbol symbol(i, false);
@ -134,8 +138,7 @@ class ParseTableBuilder {
const ParseItemSet &item_set = recovery_states[symbol];
if (!item_set.entries.empty()) {
ParseStateId state = add_parse_state(item_set);
error_state->entries[symbol].actions.push_back(
ParseAction::Recover(state));
error_state->entries[symbol].actions.push_back(ParseAction::Recover(state));
}
}
@ -193,8 +196,13 @@ class ParseTableBuilder {
void add_shift_extra_actions(ParseStateId state_id) {
ParseAction action = ParseAction::ShiftExtra();
ParseState &state = parse_table.states[state_id];
for (const Symbol &extra_symbol : grammar.extra_tokens)
add_action(state_id, extra_symbol, action, null_item_set);
if (!state.entries.count(extra_symbol) ||
(allow_any_conflict &&
state.entries[extra_symbol].actions.back().type ==
ParseActionTypeReduce))
parse_table.add_action(state_id, extra_symbol, action);
}
void add_reduce_extra_actions(ParseStateId state_id) {
@ -268,7 +276,7 @@ class ParseTableBuilder {
}
void remove_duplicate_parse_states() {
remove_duplicate_states<ParseState, ParseAction>(&parse_table.states);
remove_duplicate_states<ParseTable, ParseAction>(&parse_table);
}
ParseAction *add_action(ParseStateId state_id, Symbol lookahead,

View file

@ -7,15 +7,15 @@
namespace tree_sitter {
namespace build_tables {
template <typename StateType, typename ActionType>
std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states) {
template <typename TableType, typename ActionType>
std::map<size_t, size_t> remove_duplicate_states(TableType *table) {
std::map<size_t, size_t> replacements;
while (true) {
std::map<size_t, size_t> duplicates;
for (size_t i = 0, size = states->size(); i < size; i++)
for (size_t i = 0, size = table->states.size(); i < size; i++)
for (size_t j = 0; j < i; j++)
if (states->at(i) == states->at(j)) {
if (!duplicates.count(j) && table->merge_state(j, i)) {
duplicates.insert({ i, j });
break;
}
@ -24,7 +24,7 @@ std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states)
break;
std::map<size_t, size_t> new_replacements;
for (size_t i = 0, size = states->size(); i < size; i++) {
for (size_t i = 0, size = table->states.size(); i < size; i++) {
size_t new_state_index = i;
auto duplicate = duplicates.find(i);
if (duplicate != duplicates.end())
@ -45,7 +45,7 @@ std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states)
replacement.second = new_state_index;
}
for (StateType &state : *states)
for (auto &state : table->states)
state.each_advance_action([&new_replacements](ActionType *action) {
auto new_replacement = new_replacements.find(action->state_index);
if (new_replacement != new_replacements.end())
@ -53,7 +53,7 @@ std::map<size_t, size_t> remove_duplicate_states(std::vector<StateType> *states)
});
for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i)
states->erase(states->begin() + i->first);
table->states.erase(table->states.begin() + i->first);
}
return replacements;

View file

@ -165,6 +165,9 @@ class CCodeGenerator {
line(".named = false,");
break;
case VariableTypeHidden:
line(".visible = false,");
line(".named = true,");
break;
case VariableTypeAuxiliary:
line(".visible = false,");
line(".named = false,");
@ -211,7 +214,7 @@ class CCodeGenerator {
}
void add_parse_table() {
add_parse_action_list_id(ParseTableEntry{ {}, true, false });
add_parse_action_list_id(ParseTableEntry{ {}, false, false });
size_t state_id = 0;
line("#pragma GCC diagnostic push");

View file

@ -71,4 +71,8 @@ LexState &LexTable::state(LexStateId id) {
return states[id];
}
bool LexTable::merge_state(size_t i, size_t j) {
return states[i] == states[j];
}
} // namespace tree_sitter

View file

@ -22,7 +22,7 @@ struct AdvanceAction {
AdvanceAction();
AdvanceAction(size_t, PrecedenceRange, bool);
bool operator==(const AdvanceAction &action) const;
bool operator==(const AdvanceAction &other) const;
size_t state_index;
PrecedenceRange precedence_range;
@ -66,6 +66,8 @@ class LexTable {
LexStateId add_state();
LexState &state(LexStateId state_id);
std::vector<LexState> states;
bool merge_state(size_t i, size_t j);
};
} // namespace tree_sitter

View file

@ -190,4 +190,51 @@ ParseAction &ParseTable::add_action(ParseStateId id, Symbol symbol,
return *state.entries[symbol].actions.rbegin();
}
static bool has_entry(const ParseState &state, const ParseTableEntry &entry) {
for (const auto &pair : state.entries)
if (pair.second == entry)
return true;
return false;
}
bool ParseTable::merge_state(size_t i, size_t j) {
ParseState &state = states[i];
ParseState &other = states[j];
for (auto &entry : state.entries) {
const Symbol &symbol = entry.first;
const vector<ParseAction> &actions = entry.second.actions;
const auto &other_entry = other.entries.find(symbol);
if (other_entry == other.entries.end()) {
if (actions.back().type != ParseActionTypeReduce)
return false;
if (!has_entry(other, entry.second))
return false;
} else if (entry.second != other_entry->second) {
return false;
}
}
set<Symbol> symbols_to_merge;
for (auto &entry : other.entries) {
const Symbol &symbol = entry.first;
const vector<ParseAction> &actions = entry.second.actions;
if (!state.entries.count(symbol)) {
if (actions.back().type != ParseActionTypeReduce)
return false;
if (!has_entry(state, entry.second))
return false;
symbols_to_merge.insert(symbol);
}
}
for (const Symbol &symbol : symbols_to_merge)
state.entries[symbol] = other.entries.find(symbol)->second;
return true;
}
} // namespace tree_sitter

View file

@ -61,6 +61,10 @@ struct ParseTableEntry {
ParseTableEntry();
ParseTableEntry(const std::vector<ParseAction> &, bool, bool);
bool operator==(const ParseTableEntry &other) const;
inline bool operator!=(const ParseTableEntry &other) const {
return !operator==(other);
}
};
class ParseState {
@ -68,6 +72,7 @@ class ParseState {
ParseState();
std::set<rules::Symbol> expected_inputs() const;
bool operator==(const ParseState &) const;
bool merge(const ParseState &);
void each_advance_action(std::function<void(ParseAction *)>);
std::map<rules::Symbol, ParseTableEntry> entries;
@ -77,6 +82,7 @@ class ParseState {
struct ParseTableSymbolMetadata {
bool extra;
bool structural;
std::set<rules::Symbol> compatible_symbols;
};
class ParseTable {
@ -87,6 +93,7 @@ class ParseTable {
ParseAction action);
ParseAction &add_action(ParseStateId state_id, rules::Symbol symbol,
ParseAction action);
bool merge_state(size_t i, size_t j);
std::vector<ParseState> states;
std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;

View file

@ -33,7 +33,7 @@ static inline size_t ts_node__offset_row(TSNode self) {
static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) {
const TSTree *tree = ts_node__tree(self);
return include_anonymous ? tree->visible : tree->named;
return include_anonymous ? tree->visible : tree->visible && tree->named;
}
static inline size_t ts_node__relevant_child_count(TSNode self,

View file

@ -49,11 +49,6 @@
static const unsigned ERROR_COST_THRESHOLD = 3;
typedef struct {
TSTree *tree;
size_t char_index;
} ReusableNode;
typedef struct {
TSParser *parser;
TSSymbol lookahead_symbol;
@ -112,8 +107,6 @@ static BreakdownResult ts_parser__breakdown_top_of_stack(TSParser *self,
TSStateId state = ts_stack_top_state(self->stack, slice.version);
TSTree *parent = *array_front(&slice.trees);
LOG("breakdown_top_of_stack tree:%s", SYM_NAME(parent->symbol));
for (size_t j = 0; j < parent->child_count; j++) {
TSTree *child = parent->children[j];
pending = child->child_count > 0;
@ -136,6 +129,9 @@ static BreakdownResult ts_parser__breakdown_top_of_stack(TSParser *self,
CHECK(ts_parser__push(self, slice.version, tree, state));
}
LOG("breakdown_top_of_stack tree:%s", SYM_NAME(parent->symbol));
LOG_STACK();
ts_tree_release(parent);
array_delete(&slice.trees);
}
@ -161,8 +157,7 @@ static void ts_parser__pop_reusable_node(ReusableNode *reusable_node) {
}
static bool ts_parser__breakdown_reusable_node(ReusableNode *reusable_node) {
if (reusable_node->tree->symbol == ts_builtin_sym_error ||
reusable_node->tree->child_count == 0) {
if (reusable_node->tree->child_count == 0) {
return false;
} else {
reusable_node->tree = reusable_node->tree->children[0];
@ -170,10 +165,48 @@ static bool ts_parser__breakdown_reusable_node(ReusableNode *reusable_node) {
}
}
static bool ts_parser__breakdown_lookahead(TSParser *self, TSTree **lookahead, TSStateId state, ReusableNode *reusable_node) {
bool result = false;
while (reusable_node->tree->child_count > 0 &&
(reusable_node->tree->parse_state != state ||
reusable_node->tree->fragile_left ||
reusable_node->tree->fragile_right)) {
LOG("state_mismatch sym:%s", SYM_NAME(reusable_node->tree->symbol));
ts_parser__breakdown_reusable_node(reusable_node);
result = true;
}
if (result) {
LOG("lookahead sym:%s", SYM_NAME(reusable_node->tree->symbol));
ts_tree_release(*lookahead);
ts_tree_retain(*lookahead = reusable_node->tree);
}
return result;
}
static void ts_parser__pop_reusable_node_leaf(ReusableNode *reusable_node) {
while (reusable_node->tree->child_count > 0)
reusable_node->tree = reusable_node->tree->children[0];
ts_parser__pop_reusable_node(reusable_node);
}
static bool ts_parser__can_reuse(TSParser *self, TSStateId state, TSTree *tree,
TableEntry *table_entry) {
if (tree->first_leaf.lex_state == self->language->lex_states[state])
return true;
if (!table_entry->is_reusable)
return false;
if (!table_entry->depends_on_lookahead)
return true;
return tree->child_count > 1 && tree->error_size == 0;
}
static bool ts_parser__condense_stack(TSParser *self) {
bool result = false;
for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
if (ts_stack_is_halted(self->stack, i)) {
result = true;
ts_stack_remove_version(self->stack, i);
i--;
continue;
@ -197,73 +230,6 @@ static bool ts_parser__condense_stack(TSParser *self) {
return result;
}
static bool ts_parser__can_reuse(TSParser *self, StackVersion version,
TSTree *tree) {
if (tree->symbol == ts_builtin_sym_error) {
LOG("cant_reuse_error tree:%s", SYM_NAME(tree->symbol));
return false;
}
if (tree->has_changes) {
LOG("cant_reuse_changed tree:%s", SYM_NAME(tree->symbol));
return false;
}
TSStateId state = ts_stack_top_state(self->stack, version);
if (tree->parse_state != state) {
if (ts_tree_is_fragile(tree)) {
LOG("cant_reuse_fragile sym:%s", SYM_NAME(tree->symbol));
return false;
}
TableEntry entry;
ts_language_table_entry(self->language, state, tree->symbol, &entry);
if (!entry.is_reusable) {
LOG("cant_reuse_ambiguous sym:%s", SYM_NAME(tree->symbol));
return false;
}
if (entry.action_count == 0) {
LOG("cant_reuse_unexpected sym:%s", SYM_NAME(tree->symbol));
return false;
}
TSParseAction action = entry.actions[entry.action_count - 1];
if (tree->extra != action.extra) {
LOG("cant_reuse_extra sym:%s", SYM_NAME(tree->symbol));
return false;
}
TSStateId lex_state = self->language->lex_states[state];
if (tree->first_leaf.lex_state != lex_state) {
if (tree->child_count > 0) {
TableEntry leaf_entry;
ts_language_table_entry(self->language, state, tree->first_leaf.symbol,
&leaf_entry);
if (!leaf_entry.is_reusable) {
LOG("cant_reuse_first_leaf sym:%s, leaf_sym:%s",
SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol));
return false;
}
if (tree->child_count == 1 && leaf_entry.depends_on_lookahead) {
LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s",
SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol));
return false;
}
} else if (entry.depends_on_lookahead) {
LOG("cant_reuse_lookahead_dependent sym:%s, leaf_sym:%s",
SYM_NAME(tree->symbol), SYM_NAME(tree->first_leaf.symbol));
return false;
}
}
}
return true;
}
static TSTree *ts_parser__lex(TSParser *self, TSStateId parse_state,
bool error_mode) {
TSStateId state = self->language->lex_states[parse_state];
@ -292,28 +258,51 @@ static TSTree *ts_parser__lex(TSParser *self, TSStateId parse_state,
if (!result)
return NULL;
result->parse_state = parse_state;
result->first_leaf.lex_state = state;
}
result->first_leaf.lex_state = state;
return result;
}
static void ts_parser__clear_cached_token(TSParser *self) {
ts_tree_release(self->cached_token);
self->cached_token = NULL;
}
static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version,
ReusableNode *reusable_node) {
TSLength position = ts_stack_top_position(self->stack, version);
while (reusable_node->tree) {
if (reusable_node->char_index > position.chars) {
LOG("before_reusable sym:%s, pos:%lu",
SYM_NAME(reusable_node->tree->symbol), reusable_node->char_index);
break;
}
if (reusable_node->char_index < position.chars) {
LOG("past_reusable sym:%s", SYM_NAME(reusable_node->tree->symbol));
LOG("past_reusable sym:%s, pos:%lu",
SYM_NAME(reusable_node->tree->symbol), reusable_node->char_index);
ts_parser__pop_reusable_node(reusable_node);
continue;
}
if (!ts_parser__can_reuse(self, version, reusable_node->tree)) {
if (reusable_node->tree->has_changes) {
LOG("cant_reuse_changed tree:%s, size:%lu",
SYM_NAME(reusable_node->tree->symbol),
reusable_node->tree->size.chars);
if (!ts_parser__breakdown_reusable_node(reusable_node)) {
ts_parser__pop_reusable_node(reusable_node);
CHECK(ts_parser__breakdown_top_of_stack(self, version));
}
continue;
}
if (reusable_node->tree->symbol == ts_builtin_sym_error) {
LOG("cant_reuse_error tree:%s, size:%lu",
SYM_NAME(reusable_node->tree->symbol),
reusable_node->tree->size.chars);
if (!ts_parser__breakdown_reusable_node(reusable_node)) {
ts_parser__pop_reusable_node(reusable_node);
CHECK(ts_parser__breakdown_top_of_stack(self, version));
@ -322,14 +311,15 @@ static TSTree *ts_parser__get_lookahead(TSParser *self, StackVersion version,
}
TSTree *result = reusable_node->tree;
TSLength size = ts_tree_total_size(result);
LOG("reuse sym:%s size:%lu extra:%d", SYM_NAME(result->symbol), size.chars,
result->extra);
ts_parser__pop_reusable_node(reusable_node);
ts_tree_retain(result);
return result;
}
if (self->cached_token && position.chars == self->cached_token_char_index) {
ts_tree_retain(self->cached_token);
return self->cached_token;
}
ts_lexer_reset(&self->lexer, position);
TSStateId parse_state = ts_stack_top_state(self->stack, version);
bool error_mode = parse_state == TS_STATE_ERROR;
@ -497,7 +487,7 @@ static Reduction ts_parser__reduce(TSParser *self, StackVersion version,
}
TSStateId state = ts_stack_top_state(self->stack, slice.version);
if (fragile || self->is_split || initial_version_count > 1) {
if (fragile || self->is_split || pop.slices.size > 1 || initial_version_count > 1) {
parent->fragile_left = true;
parent->fragile_right = true;
parent->parse_state = TS_TREE_STATE_NONE;
@ -682,13 +672,13 @@ static StackIterateAction ts_parser__error_repair_callback(
}
static RepairResult ts_parser__repair_error(TSParser *self, StackSlice slice,
TSTree *lookahead,
TSSymbol lookahead_symbol,
const TSParseAction *actions,
size_t action_count) {
LOG("repair_error");
ErrorRepairSession session = {
.parser = self,
.lookahead_symbol = lookahead->symbol,
.lookahead_symbol = lookahead_symbol,
.found_repair = false,
.trees_above_error = &slice.trees,
.tree_count_above_error = ts_tree_array_essential_count(&slice.trees),
@ -784,7 +774,8 @@ static void ts_parser__start(TSParser *self, TSInput input,
ts_lexer_set_input(&self->lexer, input);
ts_stack_clear(self->stack);
self->reusable_node = (ReusableNode){ previous_tree, 0 };
self->cached_token = NULL;
self->finished_tree = NULL;
}
@ -819,6 +810,7 @@ static bool ts_parser__accept(TSParser *self, StackVersion version) {
if (ts_parser__select_tree(self, self->finished_tree, root)) {
ts_tree_release(self->finished_tree);
assert(root->ref_count > 0);
self->finished_tree = root;
} else {
ts_tree_release(root);
@ -834,9 +826,9 @@ error:
return false;
}
static bool ts_parser__handle_error(TSParser *self, StackVersion version,
TSStateId state, TSTree *lookahead) {
static bool ts_parser__handle_error(TSParser *self, StackVersion version) {
size_t previous_version_count = ts_stack_version_count(self->stack);
TSStateId state = ts_stack_top_state(self->stack, version);
unsigned error_cost = ts_stack_error_cost(self->stack, version);
unsigned error_depth = ts_stack_error_depth(self->stack, version) + 1;
@ -944,22 +936,59 @@ error:
return false;
}
static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version,
TSTree *lookahead) {
static bool ts_parser__advance(TSParser *self, StackVersion version,
ReusableNode *reusable_node) {
bool validated_lookahead = false;
TSTree *lookahead = ts_parser__get_lookahead(self, version, reusable_node);
CHECK(lookahead);
for (;;) {
TSStateId state = ts_stack_top_state(self->stack, version);
TableEntry table_entry;
ts_language_table_entry(self->language, state, lookahead->first_leaf.symbol,
&table_entry);
if (!validated_lookahead) {
if (!ts_parser__can_reuse(self, state, lookahead, &table_entry)) {
if (lookahead == reusable_node->tree)
ts_parser__pop_reusable_node_leaf(reusable_node);
else
ts_parser__clear_cached_token(self);
ts_tree_release(lookahead);
lookahead = ts_parser__get_lookahead(self, version, reusable_node);
CHECK(lookahead);
continue;
}
validated_lookahead = true;
LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol),
lookahead->size.chars);
}
bool reduction_stopped_at_error = false;
StackVersion last_reduction_version = STACK_VERSION_NONE;
size_t action_count;
const TSParseAction *actions = ts_language_actions(
self->language, state, lookahead->symbol, &action_count);
for (size_t i = 0; i < action_count; i++) {
TSParseAction action = actions[i];
for (size_t i = 0; i < table_entry.action_count; i++) {
TSParseAction action = table_entry.actions[i];
switch (action.type) {
case TSParseActionTypeShift: {
if (lookahead->child_count > 0) {
if (ts_parser__breakdown_lookahead(self, &lookahead, state, reusable_node)) {
if (!ts_parser__can_reuse(self, state, lookahead, &table_entry)) {
ts_parser__pop_reusable_node(reusable_node);
ts_tree_release(lookahead);
lookahead = ts_parser__get_lookahead(self, version, reusable_node);
CHECK(lookahead);
}
}
action = *ts_language_last_action(self->language, state,
lookahead->symbol);
}
TSStateId next_state;
if (action.extra) {
next_state = state;
@ -971,6 +1000,11 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version,
CHECK(ts_parser__shift(self, version, next_state, lookahead,
action.extra));
if (lookahead == reusable_node->tree)
ts_parser__pop_reusable_node(reusable_node);
ts_tree_release(lookahead);
return true;
}
@ -981,14 +1015,14 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version,
if (action.extra) {
LOG("reduce_extra");
} else {
LOG("reduce sym:%s, child_count:%u, fragile:%s",
SYM_NAME(action.symbol), action.child_count,
BOOL_STRING(action.fragile));
LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.symbol),
action.child_count);
}
Reduction reduction =
ts_parser__reduce(self, version, action.symbol, action.child_count,
action.extra, action.fragile, true);
action.extra, (i < table_entry.action_count - 1),
true);
switch (reduction.status) {
case ReduceFailed:
@ -998,8 +1032,9 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version,
break;
case ReduceStoppedAtError: {
reduction_stopped_at_error = true;
switch (ts_parser__repair_error(self, reduction.slice, lookahead,
actions, action_count)) {
switch (ts_parser__repair_error(
self, reduction.slice, lookahead->first_leaf.symbol,
table_entry.actions, table_entry.action_count)) {
case RepairFailed:
goto error;
case RepairNoneFound:
@ -1020,20 +1055,35 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version,
LOG("accept");
CHECK(ts_parser__accept(self, version));
ts_tree_release(lookahead);
return true;
}
case TSParseActionTypeRecover: {
while (lookahead->child_count > 0) {
ts_parser__breakdown_reusable_node(reusable_node);
ts_tree_release(lookahead);
lookahead = reusable_node->tree;
ts_tree_retain(lookahead);
}
action =
*ts_language_last_action(self->language, state, lookahead->symbol);
CHECK(ts_parser__recover(self, version, action.to_state, lookahead));
if (lookahead == reusable_node->tree)
ts_parser__pop_reusable_node(reusable_node);
ts_tree_release(lookahead);
return true;
}
}
LOG_STACK();
}
if (last_reduction_version != STACK_VERSION_NONE) {
ts_stack_renumber_version(self->stack, last_reduction_version, version);
LOG_STACK();
continue;
}
@ -1041,15 +1091,22 @@ static bool ts_parser__consume_lookahead(TSParser *self, StackVersion version,
case BreakdownFailed:
goto error;
case BreakdownPerformed:
break;
continue;
case BreakdownAborted:
CHECK(ts_parser__handle_error(self, version, state, lookahead));
if (ts_stack_is_halted(self->stack, version))
return true;
break;
}
CHECK(ts_parser__handle_error(self, version));
if (ts_stack_is_halted(self->stack, version)) {
ts_tree_release(lookahead);
return true;
}
}
error:
if (lookahead)
ts_tree_release(lookahead);
return false;
}
@ -1087,16 +1144,14 @@ void ts_parser_destroy(TSParser *self) {
TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *old_tree) {
ts_parser__start(self, input, old_tree);
StackVersion version = 0;
size_t last_position = 0, position = 0;
ReusableNode reusable_node, current_reusable_node = { old_tree, 0 };
for (;;) {
TSTree *lookahead = NULL;
size_t lookahead_position = 0;
StackVersion version = STACK_VERSION_NONE;
size_t position = 0, last_position = 0;
ReusableNode reusable_node;
do {
for (version = 0; version < ts_stack_version_count(self->stack); version++) {
reusable_node = current_reusable_node;
reusable_node = self->reusable_node;
last_position = position;
while (!ts_stack_is_halted(self->stack, version)) {
@ -1111,44 +1166,25 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *old_tree) {
ts_stack_top_position(self->stack, version).rows + 1,
ts_stack_top_position(self->stack, version).columns + 1);
if (!lookahead || (position != lookahead_position) ||
!ts_parser__can_reuse(self, version, lookahead)) {
ts_tree_release(lookahead);
lookahead = ts_parser__get_lookahead(self, version, &reusable_node);
lookahead_position = position;
CHECK(lookahead);
}
LOG("lookahead sym:%s, size:%lu", SYM_NAME(lookahead->symbol),
ts_tree_total_chars(lookahead));
if (!ts_parser__consume_lookahead(self, version, lookahead)) {
ts_tree_release(lookahead);
goto error;
}
CHECK(ts_parser__advance(self, version, &reusable_node));
LOG_STACK();
}
}
current_reusable_node = reusable_node;
self->reusable_node = reusable_node;
if (ts_parser__condense_stack(self)) {
LOG("condense");
LOG_STACK();
}
ts_tree_release(lookahead);
if (version == 0)
break;
else
self->is_split = (version > 1);
}
self->is_split = (version > 1);
} while (version != 0);
LOG("done");
LOG_TREE();
ts_stack_clear(self->stack);
ts_parser__clear_cached_token(self);
ts_tree_assign_parents(self->finished_tree);
return self->finished_tree;

View file

@ -9,6 +9,11 @@ extern "C" {
#include "runtime/array.h"
#include "runtime/reduce_action.h"
typedef struct {
TSTree *tree;
size_t char_index;
} ReusableNode;
typedef struct {
TSLexer lexer;
Stack *stack;
@ -18,6 +23,9 @@ typedef struct {
bool is_split;
bool print_debugging_graphs;
TSTree scratch_tree;
TSTree *cached_token;
size_t cached_token_char_index;
ReusableNode reusable_node;
} TSParser;
bool ts_parser_init(TSParser *);

View file

@ -436,8 +436,8 @@ void ts_tree__print_dot_graph(const TSTree *self, size_t offset,
if (self->extra)
fprintf(f, ", fontcolor=gray");
fprintf(f, ", tooltip=\"%lu - %lu\"]\n", offset,
offset + ts_tree_total_chars(self));
fprintf(f, ", tooltip=\"range:%lu - %lu\nstate:%d\"]\n", offset,
offset + ts_tree_total_chars(self), self->parse_state);
for (size_t i = 0; i < self->child_count; i++) {
const TSTree *child = self->children[i];
ts_tree__print_dot_graph(child, offset, language, f);