From 508499bab148b06da3ba3dffea1191d0eb72f2ed Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 11 Sep 2018 17:25:28 -0700 Subject: [PATCH] Fix bug where missing token was inserted outside of any included range --- src/runtime/lexer.c | 4 ++++ src/runtime/lexer.h | 1 + src/runtime/parser.c | 10 +++++++- src/runtime/subtree.c | 4 ++-- src/runtime/subtree.h | 2 +- test/runtime/parser_test.cc | 46 +++++++++++++++++++++++++++++++++++++ 6 files changed, 63 insertions(+), 4 deletions(-) diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index f4ddaf25..10cc7f14 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -256,6 +256,10 @@ void ts_lexer_advance_to_end(Lexer *self) { } } +void ts_lexer_mark_end(Lexer *self) { + ts_lexer__mark_end(&self->data); +} + static const TSRange DEFAULT_RANGES[] = { { .start_point = { diff --git a/src/runtime/lexer.h b/src/runtime/lexer.h index 68ded2b0..c926d9e8 100644 --- a/src/runtime/lexer.h +++ b/src/runtime/lexer.h @@ -36,6 +36,7 @@ void ts_lexer_set_input(Lexer *, TSInput); void ts_lexer_reset(Lexer *, Length); void ts_lexer_start(Lexer *); void ts_lexer_advance_to_end(Lexer *); +void ts_lexer_mark_end(Lexer *); void ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count); TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count); diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 546db10f..46993781 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -853,6 +853,7 @@ static void ts_parser__handle_error(TSParser *self, StackVersion version, uint32_t previous_version_count = ts_stack_version_count(self->stack); ts_parser__do_all_potential_reductions(self, version, 0); uint32_t version_count = ts_stack_version_count(self->stack); + Length position = ts_stack_position(self->stack, version); // Push a discontinuity onto the stack. Merge all of the stack versions that // were created in the previous step. @@ -873,9 +874,16 @@ static void ts_parser__handle_error(TSParser *self, StackVersion version, state_after_missing_symbol, lookahead_symbol )) { + // In case the parser is currently outside of any included range, the lexer will + // snap to the beginning of the next included range. The missing token's padding + // must be assigned to position it within the next included range. + ts_lexer_reset(&self->lexer, position); + ts_lexer_mark_end(&self->lexer); + Length padding = length_sub(self->lexer.token_end_position, position); + StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v); const Subtree *missing_tree = ts_subtree_new_missing_leaf( - &self->tree_pool, missing_symbol, self->language + &self->tree_pool, missing_symbol, padding, self->language ); ts_stack_push( self->stack, version_with_missing_tree, diff --git a/src/runtime/subtree.c b/src/runtime/subtree.c index 1a052a72..ff9b37ef 100644 --- a/src/runtime/subtree.c +++ b/src/runtime/subtree.c @@ -395,9 +395,9 @@ Subtree *ts_subtree_new_error_node(SubtreePool *pool, SubtreeArray *children, return ts_subtree_new_node(pool, ts_builtin_sym_error, children, 0, language); } -Subtree *ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, +Subtree *ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding, const TSLanguage *language) { - Subtree *result = ts_subtree_new_leaf(pool, symbol, length_zero(), length_zero(), language); + Subtree *result = ts_subtree_new_leaf(pool, symbol, padding, length_zero(), language); result->is_missing = true; result->error_cost = ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY; return result; diff --git a/src/runtime/subtree.h b/src/runtime/subtree.h index 31c942e0..bd8dd0c5 100644 --- a/src/runtime/subtree.h +++ b/src/runtime/subtree.h @@ -94,7 +94,7 @@ Subtree *ts_subtree_new_node(SubtreePool *, TSSymbol, SubtreeArray *, unsigned, Subtree *ts_subtree_new_copy(SubtreePool *, const Subtree *); Subtree *ts_subtree_new_error_node(SubtreePool *, SubtreeArray *, const TSLanguage *); Subtree *ts_subtree_new_error(SubtreePool *, Length, Length, int32_t, const TSLanguage *); -Subtree *ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, const TSLanguage *); +Subtree *ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, const TSLanguage *); Subtree *ts_subtree_make_mut(SubtreePool *, const Subtree *); void ts_subtree_retain(const Subtree *tree); void ts_subtree_release(SubtreePool *, const Subtree *tree); diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc index e30b82c3..4932f155 100644 --- a/test/runtime/parser_test.cc +++ b/test/runtime/parser_test.cc @@ -934,6 +934,52 @@ describe("Parser", [&]() { assert_root_node("(program (ERROR (identifier)))"); }); + it("does not allow missing tokens to be inserted outside of included ranges", [&]() { + string test_grammar = R"JSON({ + "name": "test_leading_missing_token", + "rules": { + "program": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "A"}, + {"type": "SYMBOL", "name": "b"}, + {"type": "SYMBOL", "name": "c"}, + {"type": "SYMBOL", "name": "A"}, + {"type": "SYMBOL", "name": "b"}, + {"type": "SYMBOL", "name": "c"} + ] + }, + "A": {"type": "SYMBOL", "name": "a"}, + "a": {"type": "STRING", "value": "a"}, + "b": {"type": "STRING", "value": "b"}, + "c": {"type": "STRING", "value": "c"} + } + })JSON"; + + const TSLanguage *language = load_test_language( + "test_leading_missing_token", + ts_compile_grammar(test_grammar.c_str(), nullptr) + ); + + ts_parser_set_language(parser, language); + + // There's a missing `a` token at the beginning of the code. It must be inserted + // at the beginning of the first included range, not at {0, 0}. + string source_code = "__bc__bc__"; + TSRange included_ranges[2] = { + {{0, 2}, {0, 4}, 2, 4}, + {{0, 6}, {0, 8}, 6, 8}, + }; + ts_parser_set_included_ranges(parser, included_ranges, 2); + tree = ts_parser_parse_string(parser, nullptr, source_code.c_str(), source_code.size()); + root = ts_tree_root_node(tree); + + + assert_root_node("(program (A (MISSING)) (b) (c) (A (MISSING)) (b) (c))"); + AssertThat(ts_node_start_point(root), Equals({0, 2})); + AssertThat(ts_node_start_point(ts_node_child(root, 3)), Equals({0, 4})); + }); + it("allows external scanners to detect the boundaries of included ranges", [&]() { string source_code = "a <%= b() %> c <% d() %>";