Merge pull request #17 from maxbrunsfeld/optimize-fragile-token-handling

Improve likelihood of node reuse when reparsing
This commit is contained in:
Max Brunsfeld 2015-12-23 09:45:53 -08:00
commit f0c89f36f7
22 changed files with 566 additions and 591 deletions

View file

@ -47,6 +47,7 @@ typedef struct TSLexer {
size_t lookahead_size;
int32_t lookahead;
TSStateId starting_state;
TSInput input;
TSDebugger debugger;

View file

@ -102,20 +102,14 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
(lookahead == ' '))
ADVANCE(14);
if (lookahead == '\n')
ADVANCE(15);
ADVANCE(2);
if (lookahead == '\r')
ADVANCE(16);
ADVANCE(3);
if (lookahead == '\"')
ADVANCE(4);
if ('0' <= lookahead && lookahead <= '9')
ADVANCE(11);
LEX_ERROR();
case 15:
START_TOKEN();
ACCEPT_TOKEN(anon_sym_LF);
case 16:
START_TOKEN();
ACCEPT_TOKEN(anon_sym_CR);
case ts_lex_state_error:
START_TOKEN();
if (lookahead == 0)

File diff suppressed because it is too large Load diff

View file

@ -144,7 +144,7 @@ describe("Languages", [&]() {
std::set<std::pair<size_t, size_t>> deletions;
std::set<std::pair<size_t, string>> insertions;
for (size_t i = 0; i < 50; i++) {
for (size_t i = 0; i < 80; i++) {
size_t edit_position = random() % SpyInput::char_count(entry.input);
size_t deletion_size = random() % (SpyInput::char_count(entry.input) - edit_position);
string inserted_text = random_words(random() % 4 + 1);
@ -159,16 +159,6 @@ describe("Languages", [&]() {
ts_document_edit(doc, input->undo());
ts_document_parse(doc);
});
it_handles_edit_sequence("performing and repairing an insertion of " + description, [&]() {
ts_document_parse(doc);
ts_document_edit(doc, input->replace(edit_position, 0, inserted_text));
ts_document_parse(doc);
ts_document_edit(doc, input->undo());
ts_document_parse(doc);
});
}
if (deletions.insert({edit_position, deletion_size}).second) {
@ -181,16 +171,6 @@ describe("Languages", [&]() {
ts_document_edit(doc, input->undo());
ts_document_parse(doc);
});
it_handles_edit_sequence("performing and repairing a deletion of " + desription, [&]() {
ts_document_parse(doc);
ts_document_edit(doc, input->replace(edit_position, deletion_size, ""));
ts_document_parse(doc);
ts_document_edit(doc, input->undo());
ts_document_parse(doc);
});
}
}
}

View file

@ -128,7 +128,7 @@ describe("Stack", [&]() {
});
it("does not count 'extra' trees toward the count", [&]() {
ts_tree_set_extra(trees[1]);
trees[1]->extra = true;
Vector pop = ts_stack_pop(stack, 0, 2, false);
StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0);

View file

@ -47,8 +47,8 @@ describe("Tree", []() {
describe("make_leaf(sym, size, padding, is_hidden)", [&]() {
it("does not record that it is fragile", [&]() {
AssertThat(ts_tree_is_fragile_left(tree1), IsFalse());
AssertThat(ts_tree_is_fragile_right(tree1), IsFalse());
AssertThat(tree1->fragile_left, IsFalse());
AssertThat(tree1->fragile_right, IsFalse());
});
});
@ -59,8 +59,8 @@ describe("Tree", []() {
ts_length_zero(),
'z');
AssertThat(ts_tree_is_fragile_left(error_tree), IsTrue());
AssertThat(ts_tree_is_fragile_right(error_tree), IsTrue());
AssertThat(error_tree->fragile_left, IsTrue());
AssertThat(error_tree->fragile_right, IsTrue());
});
});
@ -81,8 +81,8 @@ describe("Tree", []() {
TSTree *parent;
before_each([&]() {
ts_tree_set_fragile_left(tree1);
ts_tree_set_extra(tree1);
tree1->fragile_left = true;
tree1->extra = true;
parent = ts_tree_make_node(eel, 2, tree_array({
tree1,
tree2,
@ -94,7 +94,7 @@ describe("Tree", []() {
});
it("records that it is fragile on the left side", [&]() {
AssertThat(ts_tree_is_fragile_left(parent), IsTrue());
AssertThat(parent->fragile_left, IsTrue());
});
});
@ -102,8 +102,8 @@ describe("Tree", []() {
TSTree *parent;
before_each([&]() {
ts_tree_set_fragile_right(tree2);
ts_tree_set_extra(tree2);
tree2->fragile_right = true;
tree2->extra = true;
parent = ts_tree_make_node(eel, 2, tree_array({
tree1,
tree2,
@ -115,7 +115,7 @@ describe("Tree", []() {
});
it("records that it is fragile on the right side", [&]() {
AssertThat(ts_tree_is_fragile_right(parent), IsTrue());
AssertThat(parent->fragile_right, IsTrue());
});
});
@ -123,8 +123,8 @@ describe("Tree", []() {
TSTree *parent;
before_each([&]() {
ts_tree_set_fragile_right(tree1);
ts_tree_set_fragile_left(tree2);
tree1->fragile_right = true;
tree2->fragile_left = true;
parent = ts_tree_make_node(eel, 2, tree_array({
tree1,
tree2,
@ -136,8 +136,8 @@ describe("Tree", []() {
});
it("records that it is not fragile", [&]() {
AssertThat(ts_tree_is_fragile_left(parent), IsFalse());
AssertThat(ts_tree_is_fragile_right(parent), IsFalse());
AssertThat(parent->fragile_left, IsFalse());
AssertThat(parent->fragile_right, IsFalse());
});
});
});
@ -175,15 +175,15 @@ describe("Tree", []() {
assert_consistent(tree);
AssertThat(tree->options.has_changes, IsTrue());
AssertThat(tree->has_changes, IsTrue());
AssertThat(tree->padding, Equals<TSLength>({0, 3, 0, 0}));
AssertThat(tree->size, Equals<TSLength>({13, 13, 0, 13}));
AssertThat(tree->children[0]->options.has_changes, IsTrue());
AssertThat(tree->children[0]->has_changes, IsTrue());
AssertThat(tree->children[0]->padding, Equals<TSLength>({0, 3, 0, 0}));
AssertThat(tree->children[0]->size, Equals<TSLength>({3, 3, 0, 3}));
AssertThat(tree->children[1]->options.has_changes, IsFalse());
AssertThat(tree->children[1]->has_changes, IsFalse());
AssertThat(tree->children[1]->padding, Equals<TSLength>({2, 2, 0, 2}));
AssertThat(tree->children[1]->size, Equals<TSLength>({3, 3, 0, 3}));
});
@ -195,11 +195,11 @@ describe("Tree", []() {
assert_consistent(tree);
AssertThat(tree->options.has_changes, IsTrue());
AssertThat(tree->has_changes, IsTrue());
AssertThat(tree->padding, Equals<TSLength>({0, 5, 0, 0}));
AssertThat(tree->size, Equals<TSLength>({0, 11, 0, 0}));
AssertThat(tree->children[0]->options.has_changes, IsTrue());
AssertThat(tree->children[0]->has_changes, IsTrue());
AssertThat(tree->children[0]->padding, Equals<TSLength>({0, 5, 0, 0}));
AssertThat(tree->children[0]->size, Equals<TSLength>({0, 1, 0, 0}));
});
@ -211,15 +211,15 @@ describe("Tree", []() {
assert_consistent(tree);
AssertThat(tree->options.has_changes, IsTrue());
AssertThat(tree->has_changes, IsTrue());
AssertThat(tree->padding, Equals<TSLength>({0, 4, 0, 0}));
AssertThat(tree->size, Equals<TSLength>({13, 13, 0, 13}));
AssertThat(tree->children[0]->options.has_changes, IsTrue());
AssertThat(tree->children[0]->has_changes, IsTrue());
AssertThat(tree->children[0]->padding, Equals<TSLength>({0, 4, 0, 0}));
AssertThat(tree->children[0]->size, Equals<TSLength>({3, 3, 0, 3}));
AssertThat(tree->children[1]->options.has_changes, IsFalse());
AssertThat(tree->children[1]->has_changes, IsFalse());
});
});
@ -229,15 +229,15 @@ describe("Tree", []() {
assert_consistent(tree);
AssertThat(tree->options.has_changes, IsTrue());
AssertThat(tree->has_changes, IsTrue());
AssertThat(tree->padding, Equals<TSLength>({2, 2, 0, 2}));
AssertThat(tree->size, Equals<TSLength>({0, 16, 0, 0}));
AssertThat(tree->children[0]->options.has_changes, IsTrue());
AssertThat(tree->children[0]->has_changes, IsTrue());
AssertThat(tree->children[0]->padding, Equals<TSLength>({2, 2, 0, 2}));
AssertThat(tree->children[0]->size, Equals<TSLength>({0, 6, 0, 0}));
AssertThat(tree->children[1]->options.has_changes, IsFalse());
AssertThat(tree->children[1]->has_changes, IsFalse());
});
});
@ -247,19 +247,19 @@ describe("Tree", []() {
assert_consistent(tree);
AssertThat(tree->options.has_changes, IsTrue());
AssertThat(tree->has_changes, IsTrue());
AssertThat(tree->padding, Equals<TSLength>({0, 4, 0, 0}));
AssertThat(tree->size, Equals<TSLength>({0, 4, 0, 0}));
AssertThat(tree->children[0]->options.has_changes, IsTrue());
AssertThat(tree->children[0]->has_changes, IsTrue());
AssertThat(tree->children[0]->padding, Equals<TSLength>({0, 4, 0, 0}));
AssertThat(tree->children[0]->size, Equals<TSLength>({0, 0, 0, 0}));
AssertThat(tree->children[1]->options.has_changes, IsTrue());
AssertThat(tree->children[1]->has_changes, IsTrue());
AssertThat(tree->children[1]->padding, Equals<TSLength>({0, 0, 0, 0}));
AssertThat(tree->children[1]->size, Equals<TSLength>({0, 0, 0, 0}));
AssertThat(tree->children[2]->options.has_changes, IsTrue());
AssertThat(tree->children[2]->has_changes, IsTrue());
AssertThat(tree->children[2]->padding, Equals<TSLength>({0, 1, 0, 0}));
AssertThat(tree->children[2]->size, Equals<TSLength>({3, 3, 0, 3}));
});
@ -344,7 +344,7 @@ describe("Tree", []() {
});
it("hides invisible nodes", [&]() {
tree2->options.visible = false;
tree2->visible = false;
char *string1 = ts_tree_string(parent1, names, true);
AssertThat(string(string1), Equals("(dog (cat))"));
@ -353,13 +353,13 @@ describe("Tree", []() {
describe("when the root node is not visible", [&]() {
it("still serializes it", [&]() {
parent1->options.visible = false;
parent1->visible = false;
char *string1 = ts_tree_string(parent1, names, true);
AssertThat(string(string1), Equals("(dog (cat) (cat))"));
free(string1);
tree1->options.visible = false;
tree1->visible = false;
char *string2 = ts_tree_string(tree1, names, true);
AssertThat(string(string2), Equals("(cat)"));

View file

@ -7,6 +7,7 @@
#include <utility>
#include <vector>
#include "compiler/build_tables/lex_conflict_manager.h"
#include "compiler/build_tables/remove_duplicate_states.h"
#include "compiler/build_tables/lex_item.h"
#include "compiler/build_tables/does_match_any_line.h"
#include "compiler/parse_table.h"
@ -40,8 +41,7 @@ class LexTableBuilder {
public:
LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar)
: lex_grammar(lex_grammar),
parse_table(parse_table) {
: lex_grammar(lex_grammar), parse_table(parse_table) {
for (const rule_ptr &rule : lex_grammar.separators)
separator_rules.push_back(rules::Repeat::build(rule));
separator_rules.push_back(rules::Blank::build());
@ -59,6 +59,7 @@ class LexTableBuilder {
populate_lex_state(error_item_set, LexTable::ERROR_STATE_ID);
mark_fragile_tokens();
remove_duplicate_lex_states();
return lex_table;
}
@ -161,6 +162,18 @@ class LexTableBuilder {
if (conflict_manager.fragile_tokens.count(state.default_action.symbol))
state.default_action.type = LexActionTypeAcceptFragile;
}
void remove_duplicate_lex_states() {
auto replacements =
remove_duplicate_states<LexState, LexAction, LexActionTypeAdvance>(
&lex_table.states);
for (ParseState &parse_state : parse_table->states) {
auto replacement = replacements.find(parse_state.lex_state_id);
if (replacement != replacements.end())
parse_state.lex_state_id = replacement->second;
}
}
};
LexTable build_lex_table(ParseTable *table, const LexicalGrammar &grammar) {

View file

@ -7,6 +7,7 @@
#include <utility>
#include "compiler/parse_table.h"
#include "compiler/build_tables/parse_conflict_manager.h"
#include "compiler/build_tables/remove_duplicate_states.h"
#include "compiler/build_tables/parse_item.h"
#include "compiler/build_tables/item_set_closure.h"
#include "compiler/lexical_grammar.h"
@ -42,8 +43,7 @@ class ParseTableBuilder {
public:
ParseTableBuilder(const SyntaxGrammar &grammar,
const LexicalGrammar &lex_grammar)
: grammar(grammar),
lexical_grammar(lex_grammar) {}
: grammar(grammar), lexical_grammar(lex_grammar) {}
pair<ParseTable, const GrammarError *> build() {
Symbol start_symbol = Symbol(0, grammar.variables.empty());
@ -79,7 +79,7 @@ class ParseTableBuilder {
}
mark_fragile_actions();
remove_duplicate_states();
remove_duplicate_parse_states();
parse_table.symbols.insert({ rules::ERROR(), {} });
@ -192,49 +192,9 @@ class ParseTableBuilder {
}
}
void remove_duplicate_states() {
bool done = false;
while (!done) {
done = true;
map<ParseStateId, ParseStateId> replacements;
for (size_t i = 0, size = parse_table.states.size(); i < size; i++) {
for (size_t j = 0; j < i; j++) {
if (parse_table.states[i].actions == parse_table.states[j].actions) {
replacements.insert({ i, j });
done = false;
break;
}
}
}
for (ParseState &state : parse_table.states) {
for (auto &entry : state.actions) {
for (ParseAction &action : entry.second) {
if (action.type == ParseActionTypeShift) {
ParseStateId state_index = action.state_index;
auto replacement = replacements.find(action.state_index);
if (replacement != replacements.end()) {
state_index = replacement->second;
}
size_t prior_removed = 0;
for (const auto &replacement : replacements) {
if (replacement.first >= state_index)
break;
prior_removed++;
}
state_index -= prior_removed;
action.state_index = state_index;
}
}
}
}
for (auto i = replacements.rbegin(); i != replacements.rend(); ++i)
parse_table.states.erase(parse_table.states.begin() + i->first);
}
void remove_duplicate_parse_states() {
remove_duplicate_states<ParseState, ParseAction, ParseActionTypeShift>(
&parse_table.states);
}
ParseAction *add_action(ParseStateId state_id, Symbol lookahead,

View file

@ -22,9 +22,9 @@ pair<bool, ConflictType> ParseConflictManager::resolve(
case ParseActionTypeShift:
if (new_action.extra) {
return {false, ConflictTypeNone};
return { false, ConflictTypeNone };
} else if (old_action.extra) {
return {true, ConflictTypeNone};
return { true, ConflictTypeNone };
} else if (new_action.type == ParseActionTypeReduce) {
int min_precedence = old_action.precedence_range.min;
int max_precedence = old_action.precedence_range.max;

View file

@ -23,7 +23,8 @@ enum ConflictType {
class ParseConflictManager {
public:
std::pair<bool, ConflictType> resolve(const ParseAction &, const ParseAction &) const;
std::pair<bool, ConflictType> resolve(const ParseAction &,
const ParseAction &) const;
};
} // namespace build_tables

View file

@ -0,0 +1,58 @@
#ifndef COMPILER_BUILD_TABLES_REMOVE_DUPLICATE_STATES_H_
#define COMPILER_BUILD_TABLES_REMOVE_DUPLICATE_STATES_H_
#include <map>
#include <vector>
namespace tree_sitter {
namespace build_tables {
template <typename StateType, typename ActionType, int advance_action>
std::map<size_t, size_t> remove_duplicate_states(
std::vector<StateType> *states) {
std::map<size_t, size_t> replacements;
while (true) {
std::map<size_t, size_t> duplicates;
for (size_t i = 0, size = states->size(); i < size; i++)
for (size_t j = 0; j < i; j++)
if (states->at(i) == states->at(j)) {
duplicates.insert({ i, j });
break;
}
if (duplicates.empty())
break;
for (StateType &state : *states)
state.each_action([&duplicates, &replacements](ActionType *action) {
if (action->type == advance_action) {
size_t state_index = action->state_index;
auto replacement = duplicates.find(action->state_index);
if (replacement != duplicates.end())
state_index = replacement->second;
size_t prior_removed = 0;
for (const auto &replacement : duplicates) {
if (replacement.first >= state_index)
break;
prior_removed++;
}
state_index -= prior_removed;
replacements.insert({ action->state_index, state_index });
action->state_index = state_index;
}
});
for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i)
states->erase(states->begin() + i->first);
}
return replacements;
}
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_REMOVE_DUPLICATE_STATES_H_

View file

@ -3,6 +3,7 @@
namespace tree_sitter {
using std::function;
using std::string;
using std::to_string;
using std::map;
@ -53,6 +54,16 @@ set<CharacterSet> LexState::expected_inputs() const {
return result;
}
bool LexState::operator==(const LexState &other) const {
return actions == other.actions && default_action == other.default_action &&
is_token_start == other.is_token_start;
}
void LexState::each_action(function<void(LexAction *)> fn) {
for (auto &entry : actions)
fn(&entry.second);
}
LexStateId LexTable::add_state() {
states.push_back(LexState());
return states.size() - 1;

View file

@ -56,9 +56,12 @@ namespace tree_sitter {
class LexState {
public:
LexState();
std::set<rules::CharacterSet> expected_inputs() const;
bool operator==(const LexState &) const;
void each_action(std::function<void(LexAction *)>);
std::map<rules::CharacterSet, LexAction> actions;
LexAction default_action;
std::set<rules::CharacterSet> expected_inputs() const;
bool is_token_start;
};

View file

@ -9,6 +9,7 @@ using std::ostream;
using std::to_string;
using std::set;
using std::vector;
using std::function;
using rules::Symbol;
ParseAction::ParseAction(ParseActionType type, ParseStateId state_index,
@ -126,6 +127,16 @@ set<Symbol> ParseState::expected_inputs() const {
return result;
}
void ParseState::each_action(function<void(ParseAction *)> fn) {
for (auto &entry : actions)
for (ParseAction &action : entry.second)
fn(&action);
}
bool ParseState::operator==(const ParseState &other) const {
return actions == other.actions;
}
set<Symbol> ParseTable::all_symbols() const {
set<Symbol> result;
for (auto &pair : symbols)

View file

@ -79,8 +79,11 @@ namespace tree_sitter {
class ParseState {
public:
ParseState();
std::map<rules::Symbol, std::vector<ParseAction>> actions;
std::set<rules::Symbol> expected_inputs() const;
bool operator==(const ParseState &) const;
void each_action(std::function<void(ParseAction *)>);
std::map<rules::Symbol, std::vector<ParseAction>> actions;
LexStateId lex_state_id;
};

View file

@ -67,7 +67,7 @@ void ts_document_parse(TSDocument *self) {
return;
TSTree *reusable_tree = self->valid ? self->tree : NULL;
if (reusable_tree && !reusable_tree->options.has_changes)
if (reusable_tree && !reusable_tree->has_changes)
return;
TSTree *tree = ts_parser_parse(&self->parser, self->input, reusable_tree);
@ -85,7 +85,7 @@ void ts_document_invalidate(TSDocument *self) {
TSNode ts_document_root_node(const TSDocument *self) {
TSNode result = ts_node_make(self->tree, 0, 0, 0);
while (result.data && !ts_tree_is_visible(result.data))
while (result.data && !((TSTree *)result.data)->visible)
result = ts_node_named_child(result, 0);
return result;
}

View file

@ -45,6 +45,7 @@ static void ts_lexer__start(TSLexer *self, TSStateId lex_state) {
LOG("start_lex state:%d, pos:%lu", lex_state, self->current_position.chars);
LOG_LOOKAHEAD();
self->starting_state = lex_state;
if (!self->chunk)
ts_lexer__get_chunk(self);
if (!self->lookahead_size)
@ -101,7 +102,9 @@ static TSTree *ts_lexer__accept(TSLexer *self, TSSymbol symbol,
result = ts_tree_make_leaf(symbol, padding, size, metadata);
}
result->options.fragile_left = fragile;
if (fragile)
result->lex_state = self->starting_state;
return result;
}

View file

@ -33,7 +33,7 @@ static inline size_t ts_node__offset_row(TSNode self) {
static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) {
const TSTree *tree = ts_node__tree(self);
return include_anonymous ? tree->options.visible : tree->options.named;
return include_anonymous ? tree->visible : tree->named;
}
static inline size_t ts_node__relevant_child_count(TSNode self,
@ -110,7 +110,7 @@ static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous)
if (grandchild_count > 0)
return ts_node__child(child, grandchild_count - 1, include_anonymous);
}
} while (!ts_tree_is_visible(ts_node__tree(result)));
} while (!ts_node__tree(result)->visible);
return ts_node__null();
}
@ -133,7 +133,7 @@ static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous)
if (grandchild_count > 0)
return ts_node__child(child, 0, include_anonymous);
}
} while (!ts_tree_is_visible(ts_node__tree(result)));
} while (!ts_node__tree(result)->visible);
return ts_node__null();
}
@ -218,11 +218,11 @@ bool ts_node_eq(TSNode self, TSNode other) {
}
bool ts_node_is_named(TSNode self) {
return ts_node__tree(self)->options.named;
return ts_node__tree(self)->named;
}
bool ts_node_has_changes(TSNode self) {
return ts_node__tree(self)->options.has_changes;
return ts_node__tree(self)->has_changes;
}
TSNode ts_node_parent(TSNode self) {
@ -233,7 +233,7 @@ TSNode ts_node_parent(TSNode self) {
result = ts_node__direct_parent(result, &index);
if (!result.data)
return ts_node__null();
} while (!ts_tree_is_visible(result.data));
} while (!ts_node__tree(result)->visible);
return result;
}

View file

@ -60,7 +60,7 @@ static void ts_parser__breakdown_top_of_stack(TSParser *self, int head) {
TSStateId state = ts_stack_top_state(self->stack, head_index);
for (size_t j = 0; j < parent->child_count; j++) {
last_child = parent->children[j];
if (!last_child->options.extra) {
if (!last_child->extra) {
TSParseAction action =
ts_language_last_action(self->language, state, last_child->symbol);
assert(action.type == TSParseActionTypeShift);
@ -84,19 +84,26 @@ static void ts_parser__breakdown_top_of_stack(TSParser *self, int head) {
} while (last_child->child_count > 0);
}
static void ts_parser__pop_reusable_subtree(LookaheadState *state);
/*
* Replace the parser's reusable_subtree with its first non-fragile descendant.
* Return true if a suitable descendant is found, false otherwise.
*/
static bool ts_parser__breakdown_reusable_subtree(LookaheadState *state) {
static void ts_parser__breakdown_reusable_subtree(LookaheadState *state) {
do {
if (state->reusable_subtree->symbol == ts_builtin_sym_error)
return false;
if (state->reusable_subtree->child_count == 0)
return false;
if (state->reusable_subtree->symbol == ts_builtin_sym_error) {
ts_parser__pop_reusable_subtree(state);
return;
}
if (state->reusable_subtree->child_count == 0) {
ts_parser__pop_reusable_subtree(state);
return;
}
state->reusable_subtree = state->reusable_subtree->children[0];
} while (ts_tree_is_fragile(state->reusable_subtree));
return true;
}
/*
@ -118,13 +125,32 @@ static void ts_parser__pop_reusable_subtree(LookaheadState *state) {
}
static bool ts_parser__can_reuse(TSParser *self, int head, TSTree *subtree) {
if (!subtree || subtree->symbol == ts_builtin_sym_error ||
ts_tree_is_fragile(subtree))
if (!subtree)
return false;
if (subtree->symbol == ts_builtin_sym_error)
return false;
if (ts_tree_is_fragile(subtree)) {
if (subtree->parse_state != ts_stack_top_state(self->stack, head))
return false;
}
TSStateId state = ts_stack_top_state(self->stack, head);
if (subtree->lex_state != TS_TREE_STATE_INDEPENDENT) {
TSStateId lex_state = self->language->lex_states[state];
if (subtree->lex_state != lex_state)
return false;
}
const TSParseAction *action =
ts_language_actions(self->language, state, subtree->symbol);
return action->type != TSParseActionTypeError && !action->can_hide_split;
if (action->type == TSParseActionTypeError || action->can_hide_split)
return false;
if (subtree->extra && !action->extra)
return false;
return true;
}
/*
@ -142,40 +168,32 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) {
}
if (state->reusable_subtree_pos < position.chars) {
LOG("past_reuse sym:%s", SYM_NAME(state->reusable_subtree->symbol));
LOG("past_reusable sym:%s", SYM_NAME(state->reusable_subtree->symbol));
ts_parser__pop_reusable_subtree(state);
continue;
}
bool can_reuse = true;
if (ts_tree_has_changes(state->reusable_subtree)) {
if (state->is_verifying) {
if (state->reusable_subtree->has_changes) {
if (state->is_verifying && state->reusable_subtree->child_count == 0) {
ts_parser__breakdown_top_of_stack(self, head);
state->is_verifying = false;
}
LOG("breakdown_changed sym:%s", SYM_NAME(state->reusable_subtree->symbol));
can_reuse = false;
} else if (ts_tree_is_extra(state->reusable_subtree)) {
LOG("breakdown_extra sym:%s", SYM_NAME(state->reusable_subtree->symbol));
can_reuse = false;
} else if (!ts_parser__can_reuse(self, head, state->reusable_subtree)) {
LOG("breakdown_non_reusable sym:%s",
SYM_NAME(state->reusable_subtree->symbol));
can_reuse = false;
ts_parser__breakdown_reusable_subtree(state);
continue;
}
if (!can_reuse) {
if (!ts_parser__breakdown_reusable_subtree(state)) {
LOG("dont_reuse sym:%s", SYM_NAME(state->reusable_subtree->symbol));
ts_parser__pop_reusable_subtree(state);
}
if (!ts_parser__can_reuse(self, head, state->reusable_subtree)) {
LOG("breakdown_unreusable sym:%s", SYM_NAME(state->reusable_subtree->symbol));
ts_parser__breakdown_reusable_subtree(state);
continue;
}
TSTree *result = state->reusable_subtree;
TSLength size = ts_tree_total_size(result);
LOG("reuse sym:%s size:%lu extra:%d", SYM_NAME(result->symbol), size.chars,
result->options.extra);
result->extra);
ts_parser__pop_reusable_subtree(state);
return result;
}
@ -217,8 +235,6 @@ static TSTree *ts_parser__select_tree(void *data, TSTree *left, TSTree *right) {
static bool ts_parser__shift(TSParser *self, int head, TSStateId parse_state,
TSTree *lookahead) {
if (self->language->symbol_metadata[lookahead->symbol].extra)
ts_tree_set_fragile(lookahead);
if (ts_stack_push(self->stack, head, parse_state, lookahead)) {
LOG("merge head:%d", head);
vector_erase(&self->lookahead_states, head);
@ -233,7 +249,7 @@ static bool ts_parser__shift_extra(TSParser *self, int head, TSStateId state,
TSSymbolMetadata metadata = self->language->symbol_metadata[lookahead->symbol];
if (metadata.structural && ts_stack_head_count(self->stack) > 1)
lookahead = ts_tree_make_copy(lookahead);
ts_tree_set_extra(lookahead);
lookahead->extra = true;
return ts_parser__shift(self, head, state, lookahead);
}
@ -273,7 +289,7 @@ static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol,
*/
if (!parent) {
for (size_t j = pop_result->tree_count - 1; j + 1 > 0; j--) {
if (pop_result->trees[j]->options.extra) {
if (pop_result->trees[j]->extra) {
trailing_extra_count++;
} else
break;
@ -321,8 +337,12 @@ static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol,
*/
TSStateId state;
TSStateId top_state = ts_stack_top_state(self->stack, new_head);
if (parent->parse_state != TS_TREE_STATE_ERROR)
parent->parse_state = top_state;
if (extra) {
ts_tree_set_extra(parent);
parent->extra = true;
state = top_state;
} else {
TSParseAction action =
@ -362,16 +382,16 @@ static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol,
if (self->is_split || ts_stack_head_count(self->stack) > 1) {
for (size_t i = 0, size = self->reduce_parents.size; i < size; i++) {
TSTree **parent = vector_get(&self->reduce_parents, i);
(*parent)->options.fragile_left = true;
(*parent)->options.fragile_right = true;
(*parent)->fragile_left = true;
(*parent)->fragile_right = true;
(*parent)->parse_state = TS_TREE_STATE_ERROR;
}
}
if (fragile) {
for (size_t i = 0; i < self->reduce_parents.size; i++) {
TSTree **parent = vector_get(&self->reduce_parents, i);
ts_tree_set_fragile_left(*parent);
ts_tree_set_fragile_right(*parent);
(*parent)->fragile_left = (*parent)->fragile_right = true;
}
}
@ -388,9 +408,8 @@ static void ts_parser__reduce_error(TSParser *self, int head,
stack_entry->position =
ts_length_add(stack_entry->position, lookahead->padding);
(*parent)->size = ts_length_add((*parent)->size, lookahead->padding);
(*parent)->fragile_left = (*parent)->fragile_right = true;
lookahead->padding = ts_length_zero();
ts_tree_set_fragile_left(*parent);
ts_tree_set_fragile_right(*parent);
}
}
@ -477,7 +496,7 @@ static TSTree *ts_parser__finish(TSParser *self, int finished_stack_head) {
StackPopResult *pop_result = vector_get(&pop_results, 0);
for (size_t i = 0; i < pop_result->tree_count; i++) {
if (!pop_result->trees[i]->options.extra) {
if (!pop_result->trees[i]->extra) {
TSTree *root = pop_result->trees[i];
size_t leading_extra_count = i;
size_t trailing_extra_count = pop_result->tree_count - 1 - i;
@ -643,10 +662,8 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) {
if (!ts_parser__can_reuse(self, head, lookahead) ||
position.chars != last_position.chars) {
TSTree *reused_lookahead = ts_parser__get_next_lookahead(self, head);
if (ts_parser__can_reuse(self, head, reused_lookahead)) {
lookahead = reused_lookahead;
} else {
lookahead = ts_parser__get_next_lookahead(self, head);
if (!lookahead) {
ts_lexer_reset(&self->lexer, position);
TSStateId parse_state = ts_stack_top_state(self->stack, head);
TSStateId lex_state = self->language->lex_states[parse_state];

View file

@ -283,7 +283,7 @@ Vector ts_stack_pop(Stack *self, int head_index, int child_count,
/*
* Children that are 'extra' do not count towards the total child count.
*/
if (ts_tree_is_extra(node->entry.tree) && !count_extra)
if (node->entry.tree->extra && !count_extra)
path->goal_tree_count++;
/*

View file

@ -1,4 +1,5 @@
#include <assert.h>
#include <limits.h>
#include <string.h>
#include <stdbool.h>
#include <stdio.h>
@ -6,6 +7,9 @@
#include "runtime/tree.h"
#include "runtime/length.h"
TSStateId TS_TREE_STATE_INDEPENDENT = USHRT_MAX;
TSStateId TS_TREE_STATE_ERROR = USHRT_MAX - 1;
TSTree *ts_tree_make_leaf(TSSymbol sym, TSLength padding, TSLength size,
TSSymbolMetadata metadata) {
TSTree *result = malloc(sizeof(TSTree));
@ -18,15 +22,15 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, TSLength padding, TSLength size,
.named_child_count = 0,
.children = NULL,
.padding = padding,
.options =
{
.visible = metadata.visible, .named = metadata.named,
},
.visible = metadata.visible,
.named = metadata.named,
.lex_state = TS_TREE_STATE_INDEPENDENT,
.parse_state = TS_TREE_STATE_INDEPENDENT,
};
if (sym == ts_builtin_sym_error) {
result->options.fragile_left = true;
result->options.fragile_right = true;
result->fragile_left = true;
result->fragile_right = true;
}
return result;
@ -77,9 +81,9 @@ void ts_tree_set_children(TSTree *self, size_t child_count, TSTree **children) {
self->size = ts_length_add(self->size, ts_tree_total_size(child));
}
if (child->options.visible) {
if (child->visible) {
self->visible_child_count++;
if (child->options.named)
if (child->named)
self->named_child_count++;
} else {
self->visible_child_count += child->visible_child_count;
@ -87,15 +91,17 @@ void ts_tree_set_children(TSTree *self, size_t child_count, TSTree **children) {
}
if (child->symbol == ts_builtin_sym_error) {
self->options.fragile_left = self->options.fragile_right = true;
self->fragile_left = self->fragile_right = true;
self->parse_state = TS_TREE_STATE_ERROR;
}
}
if (child_count > 0) {
if (children[0]->options.fragile_left)
self->options.fragile_left = true;
if (children[child_count - 1]->options.fragile_right)
self->options.fragile_right = true;
self->lex_state = children[0]->lex_state;
if (children[0]->fragile_left)
self->fragile_left = true;
if (children[child_count - 1]->fragile_right)
self->fragile_right = true;
}
}
@ -153,9 +159,9 @@ bool ts_tree_eq(const TSTree *self, const TSTree *other) {
if (self->symbol != other->symbol)
return false;
if (self->options.visible != other->options.visible)
if (self->visible != other->visible)
return false;
if (self->options.named != other->options.named)
if (self->named != other->named)
return false;
if (self->symbol == ts_builtin_sym_error)
return self->lookahead_char == other->lookahead_char;
@ -214,8 +220,8 @@ static size_t ts_tree__write_to_string(const TSTree *self,
char *cursor = string;
char **writer = (limit > 0) ? &cursor : &string;
bool visible = is_root || (self->options.visible &&
(include_anonymous || self->options.named));
bool visible = is_root || (self->visible &&
(include_anonymous || self->named));
if (visible && !is_root)
cursor += snprintf(*writer, limit, " ");
@ -262,7 +268,7 @@ void ts_tree_edit(TSTree *self, TSInputEdit edit) {
size_t old_end = edit.position + edit.chars_removed;
assert(old_end <= ts_tree_total_chars(self));
self->options.has_changes = true;
self->has_changes = true;
if (start < self->padding.chars) {
ts_length_set_unknown(&self->padding);

View file

@ -9,12 +9,16 @@ extern "C" {
#include "tree_sitter/parser.h"
#include "runtime/length.h"
extern TSStateId TS_TREE_STATE_INDEPENDENT;
extern TSStateId TS_TREE_STATE_ERROR;
struct TSTree {
struct {
struct TSTree *parent;
size_t index;
TSLength offset;
} context;
size_t child_count;
size_t visible_child_count;
size_t named_child_count;
@ -22,20 +26,20 @@ struct TSTree {
struct TSTree **children;
char lookahead_char;
};
TSLength padding;
TSLength size;
TSSymbol symbol;
struct {
bool visible : 1;
bool named : 1;
bool extra : 1;
bool fragile_left : 1;
bool fragile_right : 1;
bool has_changes : 1;
} options;
unsigned short int ref_count;
TSStateId lex_state;
TSStateId parse_state;
unsigned short ref_count;
bool visible : 1;
bool named : 1;
bool extra : 1;
bool fragile_left : 1;
bool fragile_right : 1;
bool has_changes : 1;
};
TSTree *ts_tree_make_leaf(TSSymbol, TSLength, TSLength, TSSymbolMetadata);
@ -63,61 +67,8 @@ static inline TSLength ts_tree_total_size(const TSTree *self) {
return ts_length_add(self->padding, self->size);
}
static inline TSPoint ts_tree_extent(const TSTree *tree) {
TSPoint result;
result.row = tree->size.rows;
result.column = tree->size.columns;
return result;
}
static inline bool ts_tree_is_extra(const TSTree *tree) {
return tree->options.extra;
}
static inline bool ts_tree_is_visible(const TSTree *tree) {
return tree->options.visible;
}
static inline void ts_tree_set_extra(TSTree *tree) {
tree->options.extra = true;
}
static inline void ts_tree_set_fragile_left(TSTree *tree) {
tree->options.fragile_left = true;
}
static inline void ts_tree_set_fragile_right(TSTree *tree) {
tree->options.fragile_right = true;
}
static inline void ts_tree_set_fragile(TSTree *tree) {
ts_tree_set_fragile_left(tree);
ts_tree_set_fragile_right(tree);
}
static inline bool ts_tree_is_fragile_left(const TSTree *tree) {
return tree->options.fragile_left;
}
static inline bool ts_tree_is_fragile_right(const TSTree *tree) {
return tree->options.fragile_right;
}
static inline bool ts_tree_is_terminal(const TSTree *tree) {
return tree->child_count == 0;
}
static inline bool ts_tree_has_changes(const TSTree *tree) {
return tree->options.has_changes;
}
static inline bool ts_tree_is_empty(const TSTree *tree) {
return ts_tree_total_size(tree).chars == 0;
}
static inline bool ts_tree_is_fragile(const TSTree *tree) {
return ts_tree_is_empty(tree) || tree->options.fragile_left ||
tree->options.fragile_right;
return tree->fragile_left || tree->fragile_right || ts_tree_total_chars(tree) == 0;
}
#ifdef __cplusplus