Merge pull request #14 from maxbrunsfeld/more-incremental-fixes

Fix more errors in incremental subtree reuse
This commit is contained in:
Max Brunsfeld 2015-12-10 21:30:48 -08:00
commit 8747e2d3b9
30 changed files with 41066 additions and 31483 deletions

View file

@ -34,7 +34,7 @@ typedef struct TSLexer {
void (*start_token_fn)(struct TSLexer *);
bool (*advance_fn)(struct TSLexer *, TSStateId);
TSTree *(*accept_fn)(struct TSLexer *, TSSymbol, TSSymbolMetadata,
const char *);
const char *, bool fragile);
const char *chunk;
size_t chunk_start;
@ -107,9 +107,13 @@ struct TSLanguage {
GO_TO_STATE(state_index); \
}
#define ACCEPT_FRAGILE_TOKEN(symbol) \
return lexer->accept_fn(lexer, symbol, ts_symbol_metadata[symbol], \
ts_symbol_names[symbol], true);
#define ACCEPT_TOKEN(symbol) \
return lexer->accept_fn(lexer, symbol, ts_symbol_metadata[symbol], \
ts_symbol_names[symbol]);
ts_symbol_names[symbol], false);
#define LEX_ERROR() \
if (error_mode) { \

View file

@ -27,7 +27,7 @@ function run_tests {
local cmd="out/Debug/${target}"
shift
while getopts "df:ghpv" option; do
while getopts "df:s:ghpv" option; do
case ${option} in
h)
usage
@ -48,6 +48,9 @@ function run_tests {
v)
args+=("--reporter=spec")
;;
s)
export TREE_SITTER_SEED=${OPTARG}
;;
esac
done

View file

@ -14,23 +14,23 @@ describe("item_set_closure", []() {
SyntaxGrammar grammar{{
SyntaxVariable("rule0", VariableTypeNamed, {
Production({
{Symbol(1), 0, AssociativityNone, 100},
{Symbol(11, true), 0, AssociativityNone, 101},
{Symbol(1), 0, AssociativityNone},
{Symbol(11, true), 0, AssociativityNone},
}),
}),
SyntaxVariable("rule1", VariableTypeNamed, {
Production({
{Symbol(12, true), 0, AssociativityNone, 102},
{Symbol(13, true), 0, AssociativityNone, 103},
{Symbol(12, true), 0, AssociativityNone},
{Symbol(13, true), 0, AssociativityNone},
}),
Production({
{Symbol(2), 0, AssociativityNone, 104},
{Symbol(2), 0, AssociativityNone},
})
}),
SyntaxVariable("rule2", VariableTypeNamed, {
Production({
{Symbol(14, true), 0, AssociativityNone, 105},
{Symbol(15, true), 0, AssociativityNone, 106},
{Symbol(14, true), 0, AssociativityNone},
{Symbol(15, true), 0, AssociativityNone},
})
}),
}, {}, {}};
@ -70,14 +70,14 @@ describe("item_set_closure", []() {
SyntaxGrammar grammar{{
SyntaxVariable("rule0", VariableTypeNamed, {
Production({
{Symbol(1), 0, AssociativityNone, 100},
{Symbol(11, true), 0, AssociativityNone, 101},
{Symbol(1), 0, AssociativityNone},
{Symbol(11, true), 0, AssociativityNone},
}),
}),
SyntaxVariable("rule1", VariableTypeNamed, {
Production({
{Symbol(12, true), 0, AssociativityNone, 102},
{Symbol(13, true), 0, AssociativityNone, 103},
{Symbol(12, true), 0, AssociativityNone},
{Symbol(13, true), 0, AssociativityNone},
}),
Production({})
}),

View file

@ -38,6 +38,11 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() {
update = conflict_manager.resolve(LexAction::Accept(sym1, 2, false), LexAction::Accept(sym2, 1, false));
AssertThat(update, IsTrue());
});
it("adds the discarded token to the 'fragile tokens' set", [&]() {
update = conflict_manager.resolve(LexAction::Accept(sym2, 1, false), LexAction::Accept(sym1, 2, false));
AssertThat(conflict_manager.fragile_tokens, Contains(sym2));
});
});
describe("when one token is string-based and the other is regexp-based", [&]() {

View file

@ -12,15 +12,15 @@ describe("ParseItem::completion_status()", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable("rule_0", VariableTypeNamed, {
Production({
{Symbol(11, true), 0, AssociativityNone, 101},
{Symbol(12, true), 0, AssociativityNone, 102},
{Symbol(13), 0, AssociativityNone, 103},
{Symbol(14, true), 4, AssociativityLeft, 104},
{Symbol(11, true), 0, AssociativityNone},
{Symbol(12, true), 0, AssociativityNone},
{Symbol(13), 0, AssociativityNone},
{Symbol(14, true), 4, AssociativityLeft},
}),
Production({
{Symbol(15, true), 0, AssociativityNone, 101},
{Symbol(16, true), 0, AssociativityNone, 102},
{Symbol(17, true), 5, AssociativityRight, 104},
{Symbol(15, true), 0, AssociativityNone},
{Symbol(16, true), 0, AssociativityNone},
{Symbol(17, true), 5, AssociativityRight},
}),
Production({}),
}),
@ -57,26 +57,26 @@ describe("ParseItemSet::transitions())", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable("rule_0", VariableTypeNamed, {
Production({
{Symbol(11, true), 0, AssociativityNone, 101},
{Symbol(12, true), 0, AssociativityNone, 102},
{Symbol(13), 5, AssociativityNone, 103},
{Symbol(14, true), 0, AssociativityNone, 104},
{Symbol(11, true), 0, AssociativityNone},
{Symbol(12, true), 0, AssociativityNone},
{Symbol(13), 5, AssociativityNone},
{Symbol(14, true), 0, AssociativityNone},
}),
Production({
{Symbol(11, true), 0, AssociativityNone, 105},
{Symbol(12, true), 0, AssociativityNone, 106},
{Symbol(15), 6, AssociativityNone, 107},
{Symbol(11, true), 0, AssociativityNone},
{Symbol(12, true), 0, AssociativityNone},
{Symbol(15), 6, AssociativityNone},
})
}),
SyntaxVariable("rule_1", VariableTypeNamed, {
Production({
{Symbol(15), 7, AssociativityNone, 109},
{Symbol(16, true), 0, AssociativityNone, 110},
{Symbol(15), 7, AssociativityNone},
{Symbol(16, true), 0, AssociativityNone},
})
}),
SyntaxVariable("rule_2", VariableTypeNamed, {
Production({
{Symbol(18, true), 0, AssociativityNone, 111},
{Symbol(18, true), 0, AssociativityNone},
})
})
}, {}, {}};

View file

@ -87,14 +87,6 @@ describe("flatten_grammar", []() {
});
};
auto get_rule_id_sequences = [&](vector<Production> productions) {
return collect(productions, [](Production p) {
return collect(p, [](ProductionStep e) {
return e.rule_id;
});
});
};
it("preserves the names and types of the grammar's variables", [&]() {
AssertThat(grammar.variables[0].name, Equals("variable0"));
AssertThat(grammar.variables[1].name, Equals("variable1"));
@ -159,21 +151,6 @@ describe("flatten_grammar", []() {
{ none, AssociativityLeft, AssociativityLeft, none, none }
})));
});
it("associates each unique remaining subsequence of symbols and precedences with a rule_id", [&]() {
// Variable 0: only the last symbol is the same for both productions.
auto variable0_step_ids = get_rule_id_sequences(grammar.variables[0].productions);
AssertThat(variable0_step_ids[0][0], !Equals(variable0_step_ids[1][0]));
AssertThat(variable0_step_ids[0][1], !Equals(variable0_step_ids[1][1]));
AssertThat(variable0_step_ids[0][2], Equals(variable0_step_ids[1][2]));
// Variable 1: the last *two* symbols are the same for both productions.
auto variable1_step_ids = get_rule_id_sequences(grammar.variables[1].productions);
AssertThat(variable1_step_ids[0][0], !Equals(variable1_step_ids[1][0]));
AssertThat(variable1_step_ids[0][1], !Equals(variable1_step_ids[1][1]));
AssertThat(variable1_step_ids[0][4], Equals(variable1_step_ids[1][3]));
AssertThat(variable1_step_ids[0][5], Equals(variable1_step_ids[1][4]));
});
});
END_TEST

View file

@ -479,12 +479,12 @@ static const TSParseAction *ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[sym_comment] = ACTIONS(SHIFT_EXTRA()),
},
[17] = {
[anon_sym_PLUS] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
[anon_sym_DASH] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
[anon_sym_STAR] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
[anon_sym_SLASH] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
[anon_sym_CARET] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
[anon_sym_RPAREN] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
[anon_sym_PLUS] = ACTIONS(REDUCE(sym_exponent, 3)),
[anon_sym_DASH] = ACTIONS(REDUCE(sym_exponent, 3)),
[anon_sym_STAR] = ACTIONS(REDUCE(sym_exponent, 3)),
[anon_sym_SLASH] = ACTIONS(REDUCE(sym_exponent, 3)),
[anon_sym_CARET] = ACTIONS(REDUCE(sym_exponent, 3)),
[anon_sym_RPAREN] = ACTIONS(REDUCE(sym_exponent, 3)),
[sym_comment] = ACTIONS(SHIFT_EXTRA()),
},
[18] = {
@ -598,12 +598,12 @@ static const TSParseAction *ts_parse_actions[STATE_COUNT][SYMBOL_COUNT] = {
[sym_comment] = ACTIONS(SHIFT_EXTRA()),
},
[28] = {
[ts_builtin_sym_end] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
[anon_sym_PLUS] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
[anon_sym_DASH] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
[anon_sym_STAR] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
[anon_sym_SLASH] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
[anon_sym_CARET] = ACTIONS(REDUCE_FRAGILE(sym_exponent, 3)),
[ts_builtin_sym_end] = ACTIONS(REDUCE(sym_exponent, 3)),
[anon_sym_PLUS] = ACTIONS(REDUCE(sym_exponent, 3)),
[anon_sym_DASH] = ACTIONS(REDUCE(sym_exponent, 3)),
[anon_sym_STAR] = ACTIONS(REDUCE(sym_exponent, 3)),
[anon_sym_SLASH] = ACTIONS(REDUCE(sym_exponent, 3)),
[anon_sym_CARET] = ACTIONS(REDUCE(sym_exponent, 3)),
[sym_comment] = ACTIONS(SHIFT_EXTRA()),
},
[29] = {

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,7 @@
#include "runtime/runtime_spec_helper.h"
#include <functional>
#include <set>
#include <utility>
#include "runtime/length.h"
#include "runtime/helpers/read_test_entries.h"
#include "runtime/helpers/spy_input.h"
@ -13,44 +15,6 @@ extern "C" const TSLanguage *ts_language_golang();
extern "C" const TSLanguage *ts_language_c();
extern "C" const TSLanguage *ts_language_cpp();
void expect_the_correct_tree(TSNode node, TSDocument *doc, string tree_string) {
const char *node_string = ts_node_string(node, doc);
AssertThat(node_string, Equals(tree_string));
free((void *)node_string);
}
void expect_a_consistent_tree(TSNode node, TSDocument *doc) {
size_t child_count = ts_node_child_count(node);
size_t start = ts_node_start_char(node);
size_t end = ts_node_end_char(node);
TSPoint start_point = ts_node_start_point(node);
TSPoint end_point = ts_node_end_point(node);
bool has_changes = ts_node_has_changes(node);
bool some_child_has_changes = false;
for (size_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
size_t child_start = ts_node_start_char(child);
size_t child_end = ts_node_end_char(child);
TSPoint child_start_point = ts_node_start_point(child);
TSPoint child_end_point = ts_node_end_point(child);
AssertThat(child_start, IsGreaterThan(start) || Equals(start));
AssertThat(child_end, IsLessThan(end) || Equals(end));
AssertThat(child_start_point, IsGreaterThan(start_point) || Equals(start_point));
AssertThat(child_end_point, IsLessThan(end_point) || Equals(end_point));
if (ts_node_has_changes(child))
some_child_has_changes = true;
}
if (child_count > 0)
AssertThat(has_changes, Equals(some_child_has_changes));
}
START_TEST
map<string, const TSLanguage *> languages({
{"json", ts_language_json()},
{"arithmetic", ts_language_arithmetic()},
@ -60,22 +24,102 @@ map<string, const TSLanguage *> languages({
{"cpp", ts_language_cpp()},
});
void expect_the_correct_tree(TSNode node, TSDocument *doc, string tree_string) {
const char *node_string = ts_node_string(node, doc);
AssertThat(node_string, Equals(tree_string));
free((void *)node_string);
}
void expect_a_consistent_tree(TSNode node, TSDocument *doc) {
size_t child_count = ts_node_child_count(node);
size_t start_char = ts_node_start_char(node);
size_t end_char = ts_node_end_char(node);
TSPoint start_point = ts_node_start_point(node);
TSPoint end_point = ts_node_end_point(node);
bool has_changes = ts_node_has_changes(node);
bool some_child_has_changes = false;
AssertThat(start_char, !IsGreaterThan(end_char));
AssertThat(start_point, !IsGreaterThan(end_point));
size_t last_child_end_char = 0;
TSPoint last_child_end_point = {0, 0};
for (size_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
size_t child_start_char = ts_node_start_char(child);
size_t child_end_char = ts_node_end_char(child);
TSPoint child_start_point = ts_node_start_point(child);
TSPoint child_end_point = ts_node_end_point(child);
if (i > 0) {
AssertThat(child_start_char, !IsLessThan(last_child_end_char));
AssertThat(child_start_point, !IsLessThan(last_child_end_point));
last_child_end_char = child_end_char;
last_child_end_point = child_end_point;
}
AssertThat(child_start_char, !IsLessThan(start_char));
AssertThat(child_end_char, !IsGreaterThan(end_char));
AssertThat(child_start_point, !IsLessThan(start_point));
AssertThat(child_end_point, !IsGreaterThan(end_point));
expect_a_consistent_tree(child, doc);
if (ts_node_has_changes(child))
some_child_has_changes = true;
}
if (child_count > 0)
AssertThat(has_changes, Equals(some_child_has_changes));
}
string random_string(char min, char max) {
string result;
size_t length = random() % 12;
for (size_t i = 0; i < length; i++) {
char inserted_char = min + (random() % (max - min));
result += inserted_char;
}
return result;
}
string random_char(string characters) {
size_t index = random() % characters.size();
return string() + characters[index];
}
string random_words(size_t count) {
string result;
bool just_inserted_word = false;
for (size_t i = 0; i < count; i++) {
if (random() % 10 < 6) {
result += random_char("!(){}[]<>+-=");
} else {
if (just_inserted_word)
result += " ";
result += random_string('a', 'z');
just_inserted_word = true;
}
}
return result;
}
START_TEST
describe("Languages", [&]() {
TSDocument *doc;
before_each([&]() {
doc = ts_document_make();
});
after_each([&]() {
ts_document_free(doc);
});
for (const auto &pair : languages) {
describe(("The " + pair.first + " parser").c_str(), [&]() {
TSDocument *doc;
before_each([&]() {
doc = ts_document_make();
ts_document_set_language(doc, pair.second);
// ts_document_set_debugger(doc, log_debugger_make(false));
// ts_document_set_debugger(doc, log_debugger_make(true));
});
after_each([&]() {
ts_document_free(doc);
});
for (auto &entry : test_entries_for_language(pair.first)) {
@ -97,49 +141,57 @@ describe("Languages", [&]() {
ts_document_parse(doc);
});
srand(2);
std::set<std::pair<size_t, size_t>> deletions;
std::set<std::pair<size_t, string>> insertions;
for (int i = 0; i < 5; i++) {
size_t edit_position = rand() % entry.input.size();
size_t deletion_amount = rand() % (entry.input.size() - edit_position);
string pos_string = to_string(edit_position);
for (size_t i = 0; i < 50; i++) {
size_t edit_position = random() % entry.input.size();
size_t deletion_size = random() % (entry.input.size() - edit_position);
string inserted_text = random_words(random() % 4 + 1);
it_handles_edit_sequence("repairing an inserted error at " + pos_string, [&]() {
ts_document_edit(doc, input->replace(edit_position, 0, "%^&*"));
ts_document_parse(doc);
if (insertions.insert({edit_position, inserted_text}).second) {
string description = "\"" + inserted_text + "\" at " + to_string(edit_position);
ts_document_edit(doc, input->undo());
ts_document_parse(doc);
});
it_handles_edit_sequence("repairing an insertion of " + description, [&]() {
ts_document_edit(doc, input->replace(edit_position, 0, inserted_text));
ts_document_parse(doc);
it_handles_edit_sequence("creating and repairing an inserted error at " + pos_string, [&]() {
ts_document_parse(doc);
ts_document_edit(doc, input->undo());
ts_document_parse(doc);
});
ts_document_edit(doc, input->replace(edit_position, 0, "%^&*"));
it_handles_edit_sequence("performing and repairing an insertion of " + description, [&]() {
ts_document_parse(doc);
ts_document_parse(doc);
ts_document_edit(doc, input->replace(edit_position, 0, inserted_text));
ts_document_parse(doc);
ts_document_edit(doc, input->undo());
ts_document_parse(doc);
});
ts_document_edit(doc, input->undo());
ts_document_parse(doc);
});
}
it_handles_edit_sequence("repairing an errant deletion at " + pos_string, [&]() {
ts_document_parse(doc);
if (deletions.insert({edit_position, deletion_size}).second) {
string desription = to_string(edit_position) + "-" + to_string(edit_position + deletion_size);
ts_document_edit(doc, input->replace(edit_position, deletion_amount, ""));
ts_document_parse(doc);
it_handles_edit_sequence("repairing a deletion of " + desription, [&]() {
ts_document_edit(doc, input->replace(edit_position, deletion_size, ""));
ts_document_parse(doc);
ts_document_edit(doc, input->undo());
ts_document_parse(doc);
});
ts_document_edit(doc, input->undo());
ts_document_parse(doc);
});
it_handles_edit_sequence("creating and repairing an errant deletion at " + pos_string, [&]() {
ts_document_edit(doc, input->replace(edit_position, deletion_amount, ""));
ts_document_parse(doc);
it_handles_edit_sequence("performing and repairing a deletion of " + desription, [&]() {
ts_document_parse(doc);
ts_document_edit(doc, input->undo());
ts_document_parse(doc);
});
ts_document_edit(doc, input->replace(edit_position, deletion_size, ""));
ts_document_parse(doc);
ts_document_edit(doc, input->undo());
ts_document_parse(doc);
});
}
}
}
});

View file

@ -1,5 +1,16 @@
#include "runtime/runtime_spec_helper.h"
int main(int argc, char *argv[]) {
int seed;
const char *seed_env = getenv("TREE_SITTER_SEED");
if (seed_env) {
seed = atoi(seed_env);
} else {
seed = time(nullptr);
}
printf("Random seed: %d\n", seed);
srandom(seed);
return bandit::run(argc, argv);
}

View file

@ -32,7 +32,7 @@ using rules::Symbol;
class LexTableBuilder {
const LexicalGrammar lex_grammar;
const LexConflictManager conflict_manager;
LexConflictManager conflict_manager;
ParseTable *parse_table;
unordered_map<const LexItemSet, LexStateId, LexItemSet::Hash> lex_state_ids;
LexTable lex_table;
@ -59,6 +59,8 @@ class LexTableBuilder {
build_lex_item_set(parse_table->all_symbols(), true);
populate_lex_state(error_item_set, LexTable::ERROR_STATE_ID);
mark_fragile_tokens();
return lex_table;
}
@ -153,6 +155,17 @@ class LexTableBuilder {
if (item.is_token_start())
lex_table.state(state_id).is_token_start = true;
}
void mark_fragile_tokens() {
for (LexState &state : lex_table.states)
if (state.default_action.type == LexActionTypeAccept)
if (has_fragile_token(state.default_action.symbol))
state.default_action.type = LexActionTypeAcceptFragile;
}
bool has_fragile_token(const Symbol &symbol) {
return conflict_manager.fragile_tokens.find(symbol) != conflict_manager.fragile_tokens.end();
}
};
LexTable build_lex_table(ParseTable *table, const LexicalGrammar &grammar) {

View file

@ -37,6 +37,7 @@ class ParseTableBuilder {
ParseTable parse_table;
std::set<string> conflicts;
ParseItemSet null_item_set;
std::set<const Production *> fragile_productions;
public:
ParseTableBuilder(const SyntaxGrammar &grammar,
@ -48,7 +49,7 @@ class ParseTableBuilder {
pair<ParseTable, const GrammarError *> build() {
Symbol start_symbol = Symbol(0, grammar.variables.empty());
Production start_production({
ProductionStep(start_symbol, 0, rules::AssociativityNone, -2),
ProductionStep(start_symbol, 0, rules::AssociativityNone),
});
add_parse_state(ParseItemSet({
@ -78,6 +79,9 @@ class ParseTableBuilder {
add_reduce_extra_actions(state);
}
mark_fragile_reductions();
remove_duplicate_states();
parse_table.symbols.insert({ rules::ERROR(), {} });
return { parse_table, nullptr };
@ -153,6 +157,65 @@ class ParseTableBuilder {
}
}
void mark_fragile_reductions() {
for (ParseState &state : parse_table.states) {
for (auto &entry : state.actions) {
for (ParseAction &action : entry.second) {
if (action.type == ParseActionTypeReduce) {
if (has_fragile_production(action.production))
action.type = ParseActionTypeReduceFragile;
action.production = NULL;
}
}
}
}
}
void remove_duplicate_states() {
bool done = false;
while (!done) {
done = true;
map<ParseStateId, ParseStateId> replacements;
for (size_t i = 0, size = parse_table.states.size(); i < size; i++) {
for (size_t j = 0; j < i; j++) {
if (parse_table.states[i].actions == parse_table.states[j].actions) {
replacements.insert({ i, j });
done = false;
break;
}
}
}
for (ParseState &state : parse_table.states) {
for (auto &entry : state.actions) {
for (ParseAction &action : entry.second) {
if (action.type == ParseActionTypeShift) {
ParseStateId state_index = action.state_index;
auto replacement = replacements.find(action.state_index);
if (replacement != replacements.end()) {
state_index = replacement->second;
}
size_t prior_removed = 0;
for (const auto &replacement : replacements) {
if (replacement.first >= state_index)
break;
prior_removed++;
}
state_index -= prior_removed;
action.state_index = state_index;
}
}
}
}
for (auto replacement = replacements.rbegin(); replacement != replacements.rend(); ++replacement) {
parse_table.states.erase(parse_table.states.begin() + replacement->first);
}
}
}
ParseAction *add_action(ParseStateId state_id, Symbol lookahead,
const ParseAction &new_action,
const ParseItemSet &item_set) {
@ -174,18 +237,23 @@ class ParseTableBuilder {
case ConflictTypeResolved: {
if (resolution.first) {
if (old_action.type == ParseActionTypeReduce)
parse_table.fragile_productions.insert(old_action.production);
fragile_productions.insert(old_action.production);
return &parse_table.set_action(state_id, lookahead, new_action);
} else {
if (new_action.type == ParseActionTypeReduce)
parse_table.fragile_productions.insert(new_action.production);
fragile_productions.insert(new_action.production);
break;
}
}
case ConflictTypeUnresolved: {
if (handle_unresolved_conflict(item_set, lookahead))
if (handle_unresolved_conflict(item_set, lookahead)) {
if (old_action.type == ParseActionTypeReduce)
fragile_productions.insert(old_action.production);
if (new_action.type == ParseActionTypeReduce)
fragile_productions.insert(new_action.production);
return &parse_table.add_action(state_id, lookahead, new_action);
}
break;
}
}
@ -315,6 +383,11 @@ class ParseTableBuilder {
return grammar.variables[symbol.index].name;
}
}
bool has_fragile_production(const Production *production) {
auto end = fragile_productions.end();
return std::find(fragile_productions.begin(), end, production) != end;
}
};
pair<ParseTable, const GrammarError *> build_parse_table(

View file

@ -0,0 +1,16 @@
#ifndef COMPILER_BUILD_TABLES_CONFLICT_TYPE_H_
#define COMPILER_BUILD_TABLES_CONFLICT_TYPE_H_
namespace tree_sitter {
namespace build_tables {
enum ConflictType {
ConflictTypeNone,
ConflictTypeResolved,
ConflictTypeUnresolved
};
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_CONFLICT_TYPE_H_

View file

@ -10,7 +10,7 @@ LexConflictManager::LexConflictManager(const LexicalGrammar &grammar)
: grammar(grammar) {}
bool LexConflictManager::resolve(const LexAction &new_action,
const LexAction &old_action) const {
const LexAction &old_action) {
if (new_action.type < old_action.type)
return !resolve(old_action, new_action);
@ -24,16 +24,27 @@ bool LexConflictManager::resolve(const LexAction &new_action,
switch (new_action.type) {
case LexActionTypeAccept: {
int new_precedence = new_action.precedence_range.min;
bool result;
if (new_precedence > old_precedence)
return true;
result = true;
else if (new_precedence < old_precedence)
return false;
result = false;
else if (new_action.is_string && !old_action.is_string)
return true;
result = true;
else if (old_action.is_string && !new_action.is_string)
return false;
result = false;
else if (new_action.symbol.index < old_action.symbol.index)
result = true;
else
return new_action.symbol.index < old_action.symbol.index;
result = false;
if (result)
fragile_tokens.insert(old_action.symbol);
else
fragile_tokens.insert(new_action.symbol);
return result;
}
case LexActionTypeAdvance:

View file

@ -1,8 +1,10 @@
#ifndef COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_
#define COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_
#include <set>
#include "tree_sitter/compiler.h"
#include "compiler/lexical_grammar.h"
#include "compiler/rules/symbol.h"
namespace tree_sitter {
@ -15,7 +17,9 @@ class LexConflictManager {
public:
explicit LexConflictManager(const LexicalGrammar &);
bool resolve(const LexAction &, const LexAction &) const;
bool resolve(const LexAction &, const LexAction &);
std::set<rules::Symbol> fragile_tokens;
};
} // namespace build_tables

View file

@ -22,7 +22,7 @@ ParseItem::ParseItem(const Symbol &lhs, const Production &production,
bool ParseItem::operator==(const ParseItem &other) const {
return ((variable_index == other.variable_index) &&
(step_index == other.step_index) &&
(remaining_rule_id() == other.remaining_rule_id()));
(production == other.production));
}
bool ParseItem::operator<(const ParseItem &other) const {
@ -34,7 +34,7 @@ bool ParseItem::operator<(const ParseItem &other) const {
return true;
if (step_index > other.step_index)
return false;
return remaining_rule_id() < other.remaining_rule_id();
return production < other.production;
}
Symbol ParseItem::lhs() const {
@ -72,21 +72,10 @@ rules::Associativity ParseItem::associativity() const {
return production->at(step_index).associativity;
}
pair<int, int> ParseItem::remaining_rule_id() const {
if (production->empty())
return { -2, -1 };
else if (completion_status().is_done)
return { production->back().associativity, production->back().precedence };
else
return { -1, production->at(step_index).rule_id };
}
size_t ParseItem::Hash::operator()(const ParseItem &item) const {
size_t result = hash<int>()(item.variable_index);
result ^= hash<unsigned int>()(item.step_index);
result ^= hash<size_t>()(item.production->size());
pair<int, int> id = item.remaining_rule_id();
result ^= hash<int>()(id.first) ^ hash<int>()(id.second);
result ^= hash<void *>()((void *)item.production);
return result;
}

View file

@ -29,7 +29,6 @@ class ParseItem {
bool operator==(const ParseItem &other) const;
bool operator<(const ParseItem &other) const;
rules::Symbol lhs() const;
std::pair<int, int> remaining_rule_id() const;
int precedence() const;
rules::Associativity associativity() const;
CompletionStatus completion_status() const;

View file

@ -302,6 +302,9 @@ class CCodeGenerator {
case LexActionTypeAccept:
line("ACCEPT_TOKEN(" + symbol_id(action.symbol) + ");");
break;
case LexActionTypeAcceptFragile:
line("ACCEPT_FRAGILE_TOKEN(" + symbol_id(action.symbol) + ");");
break;
case LexActionTypeError:
line("LEX_ERROR();");
break;
@ -324,13 +327,13 @@ class CCodeGenerator {
case ParseActionTypeShiftExtra:
add("SHIFT_EXTRA()");
break;
case ParseActionTypeReduceFragile:
add("REDUCE_FRAGILE(" + symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
break;
case ParseActionTypeReduce:
if (reduce_action_is_fragile(action))
add("REDUCE_FRAGILE(" + symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
else
add("REDUCE(" + symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
add("REDUCE(" + symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
break;
case ParseActionTypeReduceExtra:
add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")");
@ -392,11 +395,6 @@ class CCodeGenerator {
}
}
bool reduce_action_is_fragile(const ParseAction &action) const {
return parse_table.fragile_productions.find(action.production) !=
parse_table.fragile_productions.end();
}
// C-code generation functions
void _switch(string condition, function<void()> body) {

View file

@ -14,6 +14,7 @@ namespace tree_sitter {
typedef enum {
LexActionTypeError,
LexActionTypeAccept,
LexActionTypeAcceptFragile,
LexActionTypeAdvance
} LexActionType;

View file

@ -69,14 +69,13 @@ ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
}
bool ParseAction::operator==(const ParseAction &other) const {
bool types_eq = type == other.type;
bool symbols_eq = symbol == other.symbol;
bool state_indices_eq = state_index == other.state_index;
bool consumed_symbol_counts_eq =
consumed_symbol_count == other.consumed_symbol_count;
bool precedences_eq = precedence_range == other.precedence_range;
return types_eq && symbols_eq && state_indices_eq &&
consumed_symbol_counts_eq && precedences_eq;
return (
type == other.type &&
symbol == other.symbol &&
state_index == other.state_index &&
production == other.production &&
consumed_symbol_count == other.consumed_symbol_count
);
}
bool ParseAction::operator<(const ParseAction &other) const {
@ -92,6 +91,10 @@ bool ParseAction::operator<(const ParseAction &other) const {
return true;
if (other.state_index < state_index)
return false;
if (production < other.production)
return true;
if (other.production < production)
return false;
return consumed_symbol_count < other.consumed_symbol_count;
}

View file

@ -18,6 +18,7 @@ typedef uint64_t ParseStateId;
typedef enum {
ParseActionTypeError,
ParseActionTypeReduceExtra,
ParseActionTypeReduceFragile,
ParseActionTypeShiftExtra,
ParseActionTypeShift,
@ -97,7 +98,6 @@ class ParseTable {
std::vector<ParseState> states;
std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;
std::set<const Production *> fragile_productions;
};
} // namespace tree_sitter

View file

@ -72,39 +72,6 @@ class FlattenRule : public rules::RuleFn<void> {
}
};
struct ProductionSlice {
vector<ProductionStep>::const_iterator start;
vector<ProductionStep>::const_iterator end;
bool operator==(const ProductionSlice &other) const {
if (end - start != other.end - other.start)
return false;
for (auto iter1 = start, iter2 = other.start; iter1 != end; ++iter1, ++iter2)
if (!(iter1->symbol == iter2->symbol &&
iter1->precedence == iter2->precedence &&
iter1->associativity == iter2->associativity))
return false;
return true;
}
};
void assign_rule_ids(Production *production,
vector<ProductionSlice> *unique_slices) {
auto end = production->end();
for (auto iter = production->begin(); iter != end; ++iter) {
ProductionSlice slice{ iter, end };
auto existing_id =
find(unique_slices->cbegin(), unique_slices->cend(), slice);
if (existing_id == unique_slices->end()) {
unique_slices->push_back(slice);
iter->rule_id = unique_slices->size();
} else {
iter->rule_id = existing_id - unique_slices->cbegin() + 1;
}
}
}
SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {
SyntaxGrammar result;
result.expected_conflicts = grammar.expected_conflicts;
@ -112,17 +79,15 @@ SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {
for (const Variable &variable : grammar.variables) {
vector<Production> productions;
for (const rule_ptr &rule_component : extract_choices(variable.rule))
productions.push_back(FlattenRule().flatten(rule_component));
for (const rule_ptr &rule_component : extract_choices(variable.rule)) {
Production production = FlattenRule().flatten(rule_component);
if (std::find(productions.begin(), productions.end(), production) == productions.end())
productions.push_back(production);
}
result.variables.push_back(
SyntaxVariable(variable.name, variable.type, productions));
}
vector<ProductionSlice> unique_slices;
for (SyntaxVariable &variable : result.variables)
for (Production &production : variable.productions)
assign_rule_ids(&production, &unique_slices);
return result;
}

View file

@ -23,19 +23,11 @@ ProductionStep::ProductionStep(const rules::Symbol &symbol, int precedence,
rules::Associativity associativity)
: symbol(symbol),
precedence(precedence),
associativity(associativity),
rule_id(0) {}
ProductionStep::ProductionStep(const rules::Symbol &symbol, int precedence,
rules::Associativity associativity, int rule_id)
: symbol(symbol),
precedence(precedence),
associativity(associativity),
rule_id(rule_id) {}
associativity(associativity) {}
bool ProductionStep::operator==(const ProductionStep &other) const {
return symbol == other.symbol && precedence == other.precedence &&
rule_id == other.rule_id && associativity == other.associativity;
associativity == other.associativity;
}
const vector<Production> &SyntaxGrammar::productions(

View file

@ -13,13 +13,11 @@ namespace tree_sitter {
struct ProductionStep {
ProductionStep(const rules::Symbol &, int, rules::Associativity);
ProductionStep(const rules::Symbol &, int, rules::Associativity, int);
bool operator==(const ProductionStep &) const;
rules::Symbol symbol;
int precedence;
rules::Associativity associativity;
int rule_id;
};
typedef std::vector<ProductionStep> Production;

View file

@ -85,20 +85,24 @@ static bool ts_lexer__advance(TSLexer *self, TSStateId state) {
static TSTree *ts_lexer__accept(TSLexer *self, TSSymbol symbol,
TSSymbolMetadata metadata,
const char *symbol_name) {
const char *symbol_name, bool fragile) {
TSLength size =
ts_length_sub(self->current_position, self->token_start_position);
TSLength padding =
ts_length_sub(self->token_start_position, self->token_end_position);
self->token_end_position = self->current_position;
TSTree *result;
if (symbol == ts_builtin_sym_error) {
LOG("error_char");
return ts_tree_make_error(size, padding, self->lookahead);
result = ts_tree_make_error(size, padding, self->lookahead);
} else {
LOG("accept_token sym:%s", symbol_name);
return ts_tree_make_leaf(symbol, padding, size, metadata);
result = ts_tree_make_leaf(symbol, padding, size, metadata);
}
result->options.fragile_left = fragile;
return result;
}
/*

View file

@ -71,7 +71,10 @@ static void ts_parser__breakdown_top_of_stack(TSParser *self, int head) {
merged = ts_stack_push(self->stack, pop_result->head_index, state, pop_result->trees[j]);
}
assert(i == 0 || merged);
if (i == 0)
assert(!merged);
else
assert(merged);
}
free(removed_trees);
@ -140,14 +143,31 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) {
continue;
}
if (ts_tree_has_changes(state->reusable_subtree) ||
ts_tree_is_fragile(state->reusable_subtree) ||
ts_tree_is_extra(state->reusable_subtree) ||
(state->reusable_subtree->child_count > 0 &&
!ts_parser__can_reuse(self, head, state->reusable_subtree))) {
LOG("breakdown sym:%s", SYM_NAME(state->reusable_subtree->symbol));
if (!ts_parser__breakdown_reusable_subtree(state))
bool can_reuse = true;
if (ts_tree_has_changes(state->reusable_subtree)) {
if (state->is_verifying) {
ts_parser__breakdown_top_of_stack(self, head);
state->is_verifying = false;
}
LOG("breakdown_changed sym:%s", SYM_NAME(state->reusable_subtree->symbol));
can_reuse = false;
} else if (ts_tree_is_fragile(state->reusable_subtree)) {
LOG("breakdown_fragile sym:%s", SYM_NAME(state->reusable_subtree->symbol));
can_reuse = false;
} else if (ts_tree_is_extra(state->reusable_subtree)) {
LOG("breakdown_extra sym:%s", SYM_NAME(state->reusable_subtree->symbol));
can_reuse = false;
} else if (state->reusable_subtree->child_count > 0 &&
!ts_parser__can_reuse(self, head, state->reusable_subtree)) {
LOG("breakdown_unexpected sym:%s", SYM_NAME(state->reusable_subtree->symbol));
can_reuse = false;
}
if (!can_reuse) {
if (!ts_parser__breakdown_reusable_subtree(state)) {
LOG("dont_reuse sym:%s", SYM_NAME(state->reusable_subtree->symbol));
ts_parser__pop_reusable_subtree(state);
}
continue;
}
@ -276,7 +296,6 @@ static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol,
if (i > 0) {
if (symbol == ts_builtin_sym_error) {
ts_stack_remove_head(self->stack, new_head);
free(pop_result->trees);
continue;
}
@ -331,7 +350,7 @@ static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol,
}
}
if (ts_stack_head_count(self->stack) > 1) {
if (self->is_split || ts_stack_head_count(self->stack) > 1) {
for (size_t i = 0, size = self->reduce_parents.size; i < size; i++) {
TSTree **parent = vector_get(&self->reduce_parents, i);
(*parent)->options.fragile_left = true;
@ -614,6 +633,7 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) {
TSTree *lookahead = NULL;
TSLength position = ts_length_zero(), last_position;
self->is_split = ts_stack_head_count(self->stack) > 1;
for (int head = 0; head < ts_stack_head_count(self->stack);) {
StackEntry *entry = ts_stack_head(self->stack, head);
last_position = position;

View file

@ -15,6 +15,7 @@ typedef struct {
Vector lookahead_states;
Vector reduce_parents;
int finished_stack_head;
bool is_split;
} TSParser;
TSParser ts_parser_make();