Merge pull request #200 from tree-sitter/missing-tokens-in-included-ranges

Fix bug where missing token was inserted outside of any included range
This commit is contained in:
Max Brunsfeld 2018-09-11 17:45:58 -07:00 committed by GitHub
commit dadd100fc3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 63 additions and 4 deletions

View file

@ -256,6 +256,10 @@ void ts_lexer_advance_to_end(Lexer *self) {
}
}
void ts_lexer_mark_end(Lexer *self) {
ts_lexer__mark_end(&self->data);
}
static const TSRange DEFAULT_RANGES[] = {
{
.start_point = {

View file

@ -36,6 +36,7 @@ void ts_lexer_set_input(Lexer *, TSInput);
void ts_lexer_reset(Lexer *, Length);
void ts_lexer_start(Lexer *);
void ts_lexer_advance_to_end(Lexer *);
void ts_lexer_mark_end(Lexer *);
void ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count);
TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count);

View file

@ -853,6 +853,7 @@ static void ts_parser__handle_error(TSParser *self, StackVersion version,
uint32_t previous_version_count = ts_stack_version_count(self->stack);
ts_parser__do_all_potential_reductions(self, version, 0);
uint32_t version_count = ts_stack_version_count(self->stack);
Length position = ts_stack_position(self->stack, version);
// Push a discontinuity onto the stack. Merge all of the stack versions that
// were created in the previous step.
@ -873,9 +874,16 @@ static void ts_parser__handle_error(TSParser *self, StackVersion version,
state_after_missing_symbol,
lookahead_symbol
)) {
// In case the parser is currently outside of any included range, the lexer will
// snap to the beginning of the next included range. The missing token's padding
// must be assigned to position it within the next included range.
ts_lexer_reset(&self->lexer, position);
ts_lexer_mark_end(&self->lexer);
Length padding = length_sub(self->lexer.token_end_position, position);
StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v);
const Subtree *missing_tree = ts_subtree_new_missing_leaf(
&self->tree_pool, missing_symbol, self->language
&self->tree_pool, missing_symbol, padding, self->language
);
ts_stack_push(
self->stack, version_with_missing_tree,

View file

@ -395,9 +395,9 @@ Subtree *ts_subtree_new_error_node(SubtreePool *pool, SubtreeArray *children,
return ts_subtree_new_node(pool, ts_builtin_sym_error, children, 0, language);
}
Subtree *ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol,
Subtree *ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding,
const TSLanguage *language) {
Subtree *result = ts_subtree_new_leaf(pool, symbol, length_zero(), length_zero(), language);
Subtree *result = ts_subtree_new_leaf(pool, symbol, padding, length_zero(), language);
result->is_missing = true;
result->error_cost = ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY;
return result;

View file

@ -94,7 +94,7 @@ Subtree *ts_subtree_new_node(SubtreePool *, TSSymbol, SubtreeArray *, unsigned,
Subtree *ts_subtree_new_copy(SubtreePool *, const Subtree *);
Subtree *ts_subtree_new_error_node(SubtreePool *, SubtreeArray *, const TSLanguage *);
Subtree *ts_subtree_new_error(SubtreePool *, Length, Length, int32_t, const TSLanguage *);
Subtree *ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, const TSLanguage *);
Subtree *ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, const TSLanguage *);
Subtree *ts_subtree_make_mut(SubtreePool *, const Subtree *);
void ts_subtree_retain(const Subtree *tree);
void ts_subtree_release(SubtreePool *, const Subtree *tree);

View file

@ -934,6 +934,52 @@ describe("Parser", [&]() {
assert_root_node("(program (ERROR (identifier)))");
});
it("does not allow missing tokens to be inserted outside of included ranges", [&]() {
string test_grammar = R"JSON({
"name": "test_leading_missing_token",
"rules": {
"program": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "A"},
{"type": "SYMBOL", "name": "b"},
{"type": "SYMBOL", "name": "c"},
{"type": "SYMBOL", "name": "A"},
{"type": "SYMBOL", "name": "b"},
{"type": "SYMBOL", "name": "c"}
]
},
"A": {"type": "SYMBOL", "name": "a"},
"a": {"type": "STRING", "value": "a"},
"b": {"type": "STRING", "value": "b"},
"c": {"type": "STRING", "value": "c"}
}
})JSON";
const TSLanguage *language = load_test_language(
"test_leading_missing_token",
ts_compile_grammar(test_grammar.c_str(), nullptr)
);
ts_parser_set_language(parser, language);
// There's a missing `a` token at the beginning of the code. It must be inserted
// at the beginning of the first included range, not at {0, 0}.
string source_code = "__bc__bc__";
TSRange included_ranges[2] = {
{{0, 2}, {0, 4}, 2, 4},
{{0, 6}, {0, 8}, 6, 8},
};
ts_parser_set_included_ranges(parser, included_ranges, 2);
tree = ts_parser_parse_string(parser, nullptr, source_code.c_str(), source_code.size());
root = ts_tree_root_node(tree);
assert_root_node("(program (A (MISSING)) (b) (c) (A (MISSING)) (b) (c))");
AssertThat(ts_node_start_point(root), Equals<TSPoint>({0, 2}));
AssertThat(ts_node_start_point(ts_node_child(root, 3)), Equals<TSPoint>({0, 4}));
});
it("allows external scanners to detect the boundaries of included ranges", [&]() {
string source_code = "a <%= b() %> c <% d() %>";