Merge pull request #200 from tree-sitter/missing-tokens-in-included-ranges
Fix bug where missing token was inserted outside of any included range
This commit is contained in:
commit
dadd100fc3
6 changed files with 63 additions and 4 deletions
|
|
@ -256,6 +256,10 @@ void ts_lexer_advance_to_end(Lexer *self) {
|
|||
}
|
||||
}
|
||||
|
||||
void ts_lexer_mark_end(Lexer *self) {
|
||||
ts_lexer__mark_end(&self->data);
|
||||
}
|
||||
|
||||
static const TSRange DEFAULT_RANGES[] = {
|
||||
{
|
||||
.start_point = {
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ void ts_lexer_set_input(Lexer *, TSInput);
|
|||
void ts_lexer_reset(Lexer *, Length);
|
||||
void ts_lexer_start(Lexer *);
|
||||
void ts_lexer_advance_to_end(Lexer *);
|
||||
void ts_lexer_mark_end(Lexer *);
|
||||
void ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count);
|
||||
TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count);
|
||||
|
||||
|
|
|
|||
|
|
@ -853,6 +853,7 @@ static void ts_parser__handle_error(TSParser *self, StackVersion version,
|
|||
uint32_t previous_version_count = ts_stack_version_count(self->stack);
|
||||
ts_parser__do_all_potential_reductions(self, version, 0);
|
||||
uint32_t version_count = ts_stack_version_count(self->stack);
|
||||
Length position = ts_stack_position(self->stack, version);
|
||||
|
||||
// Push a discontinuity onto the stack. Merge all of the stack versions that
|
||||
// were created in the previous step.
|
||||
|
|
@ -873,9 +874,16 @@ static void ts_parser__handle_error(TSParser *self, StackVersion version,
|
|||
state_after_missing_symbol,
|
||||
lookahead_symbol
|
||||
)) {
|
||||
// In case the parser is currently outside of any included range, the lexer will
|
||||
// snap to the beginning of the next included range. The missing token's padding
|
||||
// must be assigned to position it within the next included range.
|
||||
ts_lexer_reset(&self->lexer, position);
|
||||
ts_lexer_mark_end(&self->lexer);
|
||||
Length padding = length_sub(self->lexer.token_end_position, position);
|
||||
|
||||
StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v);
|
||||
const Subtree *missing_tree = ts_subtree_new_missing_leaf(
|
||||
&self->tree_pool, missing_symbol, self->language
|
||||
&self->tree_pool, missing_symbol, padding, self->language
|
||||
);
|
||||
ts_stack_push(
|
||||
self->stack, version_with_missing_tree,
|
||||
|
|
|
|||
|
|
@ -395,9 +395,9 @@ Subtree *ts_subtree_new_error_node(SubtreePool *pool, SubtreeArray *children,
|
|||
return ts_subtree_new_node(pool, ts_builtin_sym_error, children, 0, language);
|
||||
}
|
||||
|
||||
Subtree *ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol,
|
||||
Subtree *ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding,
|
||||
const TSLanguage *language) {
|
||||
Subtree *result = ts_subtree_new_leaf(pool, symbol, length_zero(), length_zero(), language);
|
||||
Subtree *result = ts_subtree_new_leaf(pool, symbol, padding, length_zero(), language);
|
||||
result->is_missing = true;
|
||||
result->error_cost = ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY;
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ Subtree *ts_subtree_new_node(SubtreePool *, TSSymbol, SubtreeArray *, unsigned,
|
|||
Subtree *ts_subtree_new_copy(SubtreePool *, const Subtree *);
|
||||
Subtree *ts_subtree_new_error_node(SubtreePool *, SubtreeArray *, const TSLanguage *);
|
||||
Subtree *ts_subtree_new_error(SubtreePool *, Length, Length, int32_t, const TSLanguage *);
|
||||
Subtree *ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, const TSLanguage *);
|
||||
Subtree *ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, const TSLanguage *);
|
||||
Subtree *ts_subtree_make_mut(SubtreePool *, const Subtree *);
|
||||
void ts_subtree_retain(const Subtree *tree);
|
||||
void ts_subtree_release(SubtreePool *, const Subtree *tree);
|
||||
|
|
|
|||
|
|
@ -934,6 +934,52 @@ describe("Parser", [&]() {
|
|||
assert_root_node("(program (ERROR (identifier)))");
|
||||
});
|
||||
|
||||
it("does not allow missing tokens to be inserted outside of included ranges", [&]() {
|
||||
string test_grammar = R"JSON({
|
||||
"name": "test_leading_missing_token",
|
||||
"rules": {
|
||||
"program": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "A"},
|
||||
{"type": "SYMBOL", "name": "b"},
|
||||
{"type": "SYMBOL", "name": "c"},
|
||||
{"type": "SYMBOL", "name": "A"},
|
||||
{"type": "SYMBOL", "name": "b"},
|
||||
{"type": "SYMBOL", "name": "c"}
|
||||
]
|
||||
},
|
||||
"A": {"type": "SYMBOL", "name": "a"},
|
||||
"a": {"type": "STRING", "value": "a"},
|
||||
"b": {"type": "STRING", "value": "b"},
|
||||
"c": {"type": "STRING", "value": "c"}
|
||||
}
|
||||
})JSON";
|
||||
|
||||
const TSLanguage *language = load_test_language(
|
||||
"test_leading_missing_token",
|
||||
ts_compile_grammar(test_grammar.c_str(), nullptr)
|
||||
);
|
||||
|
||||
ts_parser_set_language(parser, language);
|
||||
|
||||
// There's a missing `a` token at the beginning of the code. It must be inserted
|
||||
// at the beginning of the first included range, not at {0, 0}.
|
||||
string source_code = "__bc__bc__";
|
||||
TSRange included_ranges[2] = {
|
||||
{{0, 2}, {0, 4}, 2, 4},
|
||||
{{0, 6}, {0, 8}, 6, 8},
|
||||
};
|
||||
ts_parser_set_included_ranges(parser, included_ranges, 2);
|
||||
tree = ts_parser_parse_string(parser, nullptr, source_code.c_str(), source_code.size());
|
||||
root = ts_tree_root_node(tree);
|
||||
|
||||
|
||||
assert_root_node("(program (A (MISSING)) (b) (c) (A (MISSING)) (b) (c))");
|
||||
AssertThat(ts_node_start_point(root), Equals<TSPoint>({0, 2}));
|
||||
AssertThat(ts_node_start_point(ts_node_child(root, 3)), Equals<TSPoint>({0, 4}));
|
||||
});
|
||||
|
||||
it("allows external scanners to detect the boundaries of included ranges", [&]() {
|
||||
string source_code = "a <%= b() %> c <% d() %>";
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue