From e29d3714f7ee821bb717ad4222bf5280ec7a67a9 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 11 Mar 2021 11:25:10 -0800 Subject: [PATCH 1/2] Fix behavior of Lexer.get_column when at EOF --- lib/src/lexer.c | 115 +++++++-------- .../uses_current_column/corpus.txt | 76 ++++++++++ .../uses_current_column/grammar.json | 69 +++++++++ .../uses_current_column/scanner.c | 133 ++++++++++++++++++ 4 files changed, 337 insertions(+), 56 deletions(-) create mode 100644 test/fixtures/test_grammars/uses_current_column/corpus.txt create mode 100644 test/fixtures/test_grammars/uses_current_column/grammar.json create mode 100644 test/fixtures/test_grammars/uses_current_column/scanner.c diff --git a/lib/src/lexer.c b/lib/src/lexer.c index 08e90a8c..f349d76f 100644 --- a/lib/src/lexer.c +++ b/lib/src/lexer.c @@ -102,6 +102,56 @@ static void ts_lexer__get_lookahead(Lexer *self) { } } +static void ts_lexer_goto(Lexer *self, Length position) { + self->current_position = position; + bool found_included_range = false; + + // Move to the first valid position at or after the given position. + for (unsigned i = 0; i < self->included_range_count; i++) { + TSRange *included_range = &self->included_ranges[i]; + if (included_range->end_byte > position.bytes) { + if (included_range->start_byte > position.bytes) { + self->current_position = (Length) { + .bytes = included_range->start_byte, + .extent = included_range->start_point, + }; + } + + self->current_included_range_index = i; + found_included_range = true; + break; + } + } + + if (found_included_range) { + // If the current position is outside of the current chunk of text, + // then clear out the current chunk of text. + if (self->chunk && ( + position.bytes < self->chunk_start || + position.bytes >= self->chunk_start + self->chunk_size + )) { + ts_lexer__clear_chunk(self); + } + + self->lookahead_size = 0; + self->data.lookahead = '\0'; + } + + // If the given position is beyond any of included ranges, move to the EOF + // state - past the end of the included ranges. + else { + self->current_included_range_index = self->included_range_count; + TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1]; + self->current_position = (Length) { + .bytes = last_included_range->end_byte, + .extent = last_included_range->end_point, + }; + ts_lexer__clear_chunk(self); + self->lookahead_size = 1; + self->data.lookahead = '\0'; + } +} + // Advance to the next character in the source code, retrieving a new // chunk of source code if needed. static void ts_lexer__advance(TSLexer *_self, bool skip) { @@ -185,12 +235,15 @@ static uint32_t ts_lexer__get_column(TSLexer *_self) { Lexer *self = (Lexer *)_self; uint32_t goal_byte = self->current_position.bytes; - self->current_position.bytes -= self->current_position.extent.column; - self->current_position.extent.column = 0; - - if (self->current_position.bytes < self->chunk_start) { - ts_lexer__get_chunk(self); - } + ts_lexer_goto(self, (Length) { + .bytes = self->current_position.bytes - self->current_position.extent.column, + .extent = { + .row = self->current_position.extent.row, + .column = 0, + } + }); + if (!self->chunk_size) ts_lexer__get_chunk(self); + if (!self->lookahead_size) ts_lexer__get_lookahead(self); uint32_t result = 0; while (self->current_position.bytes < goal_byte) { @@ -247,56 +300,6 @@ void ts_lexer_delete(Lexer *self) { ts_free(self->included_ranges); } -static void ts_lexer_goto(Lexer *self, Length position) { - self->current_position = position; - bool found_included_range = false; - - // Move to the first valid position at or after the given position. - for (unsigned i = 0; i < self->included_range_count; i++) { - TSRange *included_range = &self->included_ranges[i]; - if (included_range->end_byte > position.bytes) { - if (included_range->start_byte > position.bytes) { - self->current_position = (Length) { - .bytes = included_range->start_byte, - .extent = included_range->start_point, - }; - } - - self->current_included_range_index = i; - found_included_range = true; - break; - } - } - - if (found_included_range) { - // If the current position is outside of the current chunk of text, - // then clear out the current chunk of text. - if (self->chunk && ( - position.bytes < self->chunk_start || - position.bytes >= self->chunk_start + self->chunk_size - )) { - ts_lexer__clear_chunk(self); - } - - self->lookahead_size = 0; - self->data.lookahead = '\0'; - } - - // If the given position is beyond any of included ranges, move to the EOF - // state - past the end of the included ranges. - else { - self->current_included_range_index = self->included_range_count; - TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1]; - self->current_position = (Length) { - .bytes = last_included_range->end_byte, - .extent = last_included_range->end_point, - }; - ts_lexer__clear_chunk(self); - self->lookahead_size = 1; - self->data.lookahead = '\0'; - } -} - void ts_lexer_set_input(Lexer *self, TSInput input) { self->input = input; ts_lexer__clear_chunk(self); diff --git a/test/fixtures/test_grammars/uses_current_column/corpus.txt b/test/fixtures/test_grammars/uses_current_column/corpus.txt new file mode 100644 index 00000000..9638e25e --- /dev/null +++ b/test/fixtures/test_grammars/uses_current_column/corpus.txt @@ -0,0 +1,76 @@ +=============== +Simple blocks +=============== + +do a + e +f + +--- + +(block + (do_expression (block + (identifier) + (identifier))) + (identifier)) + +===================== +Nested blocks +===================== + +a = do b + c + do e + f + g + h +i + +--- + +(block + (binary_expression + (identifier) + (do_expression (block + (identifier) + (binary_expression + (identifier) + (do_expression (block + (identifier) + (identifier) + (identifier)))) + (identifier)))) + (identifier)) + +=============================== +Blocks with leading newlines +=============================== + +do + + + a = b + do + c + d + e + f + +--- + +(block + (do_expression (block + (binary_expression (identifier) (identifier)) + (do_expression (block + (identifier) + (identifier))) + (identifier) + (identifier)))) + +===================== +Unterminated blocks +===================== + +do +--- + +(ERROR) diff --git a/test/fixtures/test_grammars/uses_current_column/grammar.json b/test/fixtures/test_grammars/uses_current_column/grammar.json new file mode 100644 index 00000000..90c740b6 --- /dev/null +++ b/test/fixtures/test_grammars/uses_current_column/grammar.json @@ -0,0 +1,69 @@ +{ + "name": "uses_current_column", + + "externals": [ + {"type": "SYMBOL", "name": "_indent"}, + {"type": "SYMBOL", "name": "_dedent"}, + {"type": "SYMBOL", "name": "_newline"} + ], + + "extras": [ + {"type": "PATTERN", "value": "\\s"} + ], + + "rules": { + "block": { + "type": "REPEAT1", + "content": {"type": "SYMBOL", "name": "_statement"} + }, + + "_statement": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "_expression"}, + {"type": "SYMBOL", "name": "_newline"} + ] + }, + + "_expression": { + "type": "CHOICE", + "members": [ + {"type": "SYMBOL", "name": "do_expression"}, + {"type": "SYMBOL", "name": "binary_expression"}, + {"type": "SYMBOL", "name": "identifier"} + ] + }, + + "do_expression": { + "type": "SEQ", + "members": [ + {"type": "STRING", "value": "do"}, + {"type": "SYMBOL", "name": "_indent"}, + {"type": "SYMBOL", "name": "block"}, + {"type": "SYMBOL", "name": "_dedent"} + ] + }, + + "binary_expression": { + "type": "PREC_LEFT", + "value": 1, + "content": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "_expression"}, + { + "type": "CHOICE", + "members": [ + {"type": "STRING", "value": "="}, + {"type": "STRING", "value": "+"}, + {"type": "STRING", "value": "-"} + ] + }, + {"type": "SYMBOL", "name": "_expression"} + ] + } + }, + + "identifier": {"type": "PATTERN", "value": "\\w+"} + } +} diff --git a/test/fixtures/test_grammars/uses_current_column/scanner.c b/test/fixtures/test_grammars/uses_current_column/scanner.c new file mode 100644 index 00000000..efd27f9f --- /dev/null +++ b/test/fixtures/test_grammars/uses_current_column/scanner.c @@ -0,0 +1,133 @@ +#include +#include +#include + +enum TokenType { + INDENT, + DEDENT, + NEWLINE, +}; + +typedef struct { + uint8_t queued_dedent_count; + uint8_t indent_count; + int8_t indents[32]; +} Scanner; + +void *tree_sitter_uses_current_column_external_scanner_create() { + Scanner *self = malloc(sizeof(Scanner)); + self->queued_dedent_count = 0; + self->indent_count = 1; + self->indents[0] = 0; + return (void *)self; +} + +void tree_sitter_uses_current_column_external_scanner_destroy(void *payload) { + free(payload); +} + +unsigned tree_sitter_uses_current_column_external_scanner_serialize( + void *payload, + char *buffer +) { + Scanner *self = (Scanner *)payload; + buffer[0] = self->queued_dedent_count; + for (unsigned i = 0; i < self->indent_count; i++) { + buffer[i + 1] = self->indents[i]; + } + return self->indent_count + 1; +} + +void tree_sitter_uses_current_column_external_scanner_deserialize( + void *payload, + const char *buffer, + unsigned length +) { + Scanner *self = (Scanner *)payload; + if (length > 0) { + self->queued_dedent_count = buffer[0]; + self->indent_count = length - 1; + for (unsigned i = 0; i < self->indent_count; i++) { + self->indents[i] = buffer[i + 1]; + } + } else { + self->queued_dedent_count = 0; + self->indent_count = 1; + self->indents[0] = 0; + } +} + +bool tree_sitter_uses_current_column_external_scanner_scan( + void *payload, + TSLexer *lexer, + const bool *valid_symbols +) { + Scanner *self = (Scanner *)payload; + lexer->mark_end(lexer); + + // If dedents were found in a previous run, and are valid now, + // then return a dedent. + if (self->queued_dedent_count > 0 && valid_symbols[DEDENT]) { + lexer->result_symbol = DEDENT; + self->queued_dedent_count--; + return true; + } + + // If an indent is valid, then add an entry to the indent stack + // for the current column, and return an indent. + if (valid_symbols[INDENT]) { + while (iswspace(lexer->lookahead)) { + lexer->advance(lexer, false); + } + uint32_t column = lexer->get_column(lexer); + if (column > self->indents[self->indent_count - 1]) { + self->indents[self->indent_count++] = column - 2; + lexer->result_symbol = INDENT; + return true; + } else { + return false; + } + } + + // If at the end of a statement, then get the current indent + // level and pop some number of entries off of the indent stack. + if (valid_symbols[NEWLINE] || valid_symbols[DEDENT]) { + while (lexer->lookahead == ' ') { + lexer->advance(lexer, false); + } + + if (lexer->lookahead == '\n') { + lexer->advance(lexer, false); + + uint32_t next_column = 0; + for (;;) { + if (lexer->lookahead == ' ') { + next_column++; + lexer->advance(lexer, false); + } else if (lexer->lookahead == '\n') { + next_column = 0; + lexer->advance(lexer, false); + } else { + break; + } + } + + unsigned dedent_count = 0; + while (next_column < self->indents[self->indent_count - 1]) { + dedent_count++; + self->indent_count--; + } + + if (dedent_count > 0 && valid_symbols[DEDENT]) { + lexer->result_symbol = DEDENT; + return true; + } else if (valid_symbols[NEWLINE]) { + self->queued_dedent_count += dedent_count; + lexer->result_symbol = NEWLINE; + return true; + } + } + } + + return false; +} From a40045a419e5b0a7818c4dbc0a2ff49c8dbca822 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 11 Mar 2021 14:46:13 -0800 Subject: [PATCH 2/2] When editing, properly invalidate trees that depend on get_column --- cli/src/tests/helpers/fixtures.rs | 6 ++ cli/src/tests/parser_test.rs | 79 ++++++++++++++++++- lib/src/lexer.c | 22 +----- lib/src/lexer.h | 11 +-- lib/src/parser.c | 3 + lib/src/subtree.c | 41 ++++++++-- lib/src/subtree.h | 7 +- .../uses_current_column/scanner.c | 2 +- 8 files changed, 136 insertions(+), 35 deletions(-) diff --git a/cli/src/tests/helpers/fixtures.rs b/cli/src/tests/helpers/fixtures.rs index fc459777..d098bd28 100644 --- a/cli/src/tests/helpers/fixtures.rs +++ b/cli/src/tests/helpers/fixtures.rs @@ -74,3 +74,9 @@ pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> .load_language_from_sources(name, &HEADER_DIR, &parser_c_path, &scanner_path) .unwrap() } + +pub fn get_test_grammar(name: &str) -> (String, Option) { + let dir = fixtures_dir().join("test_grammars").join(name); + let grammar = fs::read_to_string(&dir.join("grammar.json")).unwrap(); + (grammar, Some(dir)) +} diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index b02f04b2..d623126f 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -1,5 +1,5 @@ use super::helpers::edits::ReadRecorder; -use super::helpers::fixtures::{get_language, get_test_language}; +use super::helpers::fixtures::{get_language, get_test_grammar, get_test_language}; use crate::generate::generate_parser_for_grammar; use crate::parse::{perform_edit, Edit}; use std::sync::atomic::{AtomicUsize, Ordering}; @@ -406,6 +406,83 @@ fn test_parsing_empty_file_with_reused_tree() { parser.parse("\n ", tree.as_ref()); } +#[test] +fn test_parsing_after_editing_tree_that_depends_on_column_values() { + let (grammar, path) = get_test_grammar("uses_current_column"); + let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar).unwrap(); + + let mut parser = Parser::new(); + parser + .set_language(get_test_language( + &grammar_name, + &parser_code, + path.as_ref().map(AsRef::as_ref), + )) + .unwrap(); + + let mut code = b" +a = b +c = do d + e + f + g +h + i + " + .to_vec(); + let mut tree = parser.parse(&code, None).unwrap(); + assert_eq!( + tree.root_node().to_sexp(), + concat!( + "(block ", + "(binary_expression (identifier) (identifier)) ", + "(binary_expression (identifier) (do_expression (block (identifier) (binary_expression (identifier) (identifier)) (identifier)))) ", + "(binary_expression (identifier) (identifier)))", + ) + ); + + perform_edit( + &mut tree, + &mut code, + &Edit { + position: 8, + deleted_length: 0, + inserted_text: b"1234".to_vec(), + }, + ); + + assert_eq!( + code, + b" +a = b +c1234 = do d + e + f + g +h + i + " + ); + + let mut recorder = ReadRecorder::new(&code); + let tree = parser + .parse_with(&mut |i, _| recorder.read(i), Some(&tree)) + .unwrap(); + + assert_eq!( + tree.root_node().to_sexp(), + concat!( + "(block ", + "(binary_expression (identifier) (identifier)) ", + "(binary_expression (identifier) (do_expression (block (identifier)))) ", + "(binary_expression (identifier) (identifier)) ", + "(identifier) ", + "(binary_expression (identifier) (identifier)))", + ) + ); + + assert_eq!( + recorder.strings_read(), + vec!["\nc1234 = do d\n e + f\n g\n"] + ); +} + // Thread safety #[test] diff --git a/lib/src/lexer.c b/lib/src/lexer.c index f349d76f..5d1965ad 100644 --- a/lib/src/lexer.c +++ b/lib/src/lexer.c @@ -233,25 +233,8 @@ static void ts_lexer__mark_end(TSLexer *_self) { static uint32_t ts_lexer__get_column(TSLexer *_self) { Lexer *self = (Lexer *)_self; - uint32_t goal_byte = self->current_position.bytes; - - ts_lexer_goto(self, (Length) { - .bytes = self->current_position.bytes - self->current_position.extent.column, - .extent = { - .row = self->current_position.extent.row, - .column = 0, - } - }); - if (!self->chunk_size) ts_lexer__get_chunk(self); - if (!self->lookahead_size) ts_lexer__get_lookahead(self); - - uint32_t result = 0; - while (self->current_position.bytes < goal_byte) { - ts_lexer__advance(&self->data, false); - result++; - } - - return result; + self->did_get_column = true; + return self->current_position.extent.column; } // Is the lexer at a boundary between two disjoint included ranges of @@ -318,6 +301,7 @@ void ts_lexer_start(Lexer *self) { self->token_start_position = self->current_position; self->token_end_position = LENGTH_UNDEFINED; self->data.result_symbol = 0; + self->did_get_column = false; if (!ts_lexer__eof(&self->data)) { if (!self->chunk_size) ts_lexer__get_chunk(self); if (!self->lookahead_size) ts_lexer__get_lookahead(self); diff --git a/lib/src/lexer.h b/lib/src/lexer.h index 5e392945..c1a5bfdb 100644 --- a/lib/src/lexer.h +++ b/lib/src/lexer.h @@ -17,16 +17,17 @@ typedef struct { Length token_end_position; TSRange *included_ranges; - size_t included_range_count; - size_t current_included_range_index; - const char *chunk; + TSInput input; + TSLogger logger; + + uint32_t included_range_count; + uint32_t current_included_range_index; uint32_t chunk_start; uint32_t chunk_size; uint32_t lookahead_size; + bool did_get_column; - TSInput input; - TSLogger logger; char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE]; } Lexer; diff --git a/lib/src/parser.c b/lib/src/parser.c index 35069f63..0f0b4ac4 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -403,6 +403,7 @@ static Subtree ts_parser__lex( bool found_external_token = false; bool error_mode = parse_state == ERROR_STATE; bool skipped_error = false; + bool called_get_column = false; int32_t first_error_character = 0; Length error_start_position = length_zero(); Length error_end_position = length_zero(); @@ -445,6 +446,7 @@ static Subtree ts_parser__lex( (!error_mode && ts_stack_has_advanced_since_error(self->stack, version)) )) { found_external_token = true; + called_get_column = self->lexer.did_get_column; break; } @@ -546,6 +548,7 @@ static Subtree ts_parser__lex( lookahead_bytes, parse_state, found_external_token, + called_get_column, is_keyword, self->language ); diff --git a/lib/src/subtree.c b/lib/src/subtree.c index e90dc9d7..e5f253ea 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -166,7 +166,8 @@ static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t l Subtree ts_subtree_new_leaf( SubtreePool *pool, TSSymbol symbol, Length padding, Length size, - uint32_t lookahead_bytes, TSStateId parse_state, bool has_external_tokens, + uint32_t lookahead_bytes, TSStateId parse_state, + bool has_external_tokens, bool depends_on_column, bool is_keyword, const TSLanguage *language ) { TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); @@ -213,6 +214,7 @@ Subtree ts_subtree_new_leaf( .fragile_right = false, .has_changes = false, .has_external_tokens = has_external_tokens, + .depends_on_column = depends_on_column, .is_missing = false, .is_keyword = is_keyword, {{.first_leaf = {.symbol = 0, .parse_state = 0}}} @@ -245,7 +247,7 @@ Subtree ts_subtree_new_error( ) { Subtree result = ts_subtree_new_leaf( pool, ts_builtin_sym_error, padding, size, bytes_scanned, - parse_state, false, false, language + parse_state, false, false, false, language ); SubtreeHeapData *data = (SubtreeHeapData *)result.ptr; data->fragile_left = true; @@ -378,6 +380,7 @@ void ts_subtree_summarize_children( self.ptr->repeat_depth = 0; self.ptr->node_count = 1; self.ptr->has_external_tokens = false; + self.ptr->depends_on_column = false; self.ptr->dynamic_precedence = 0; uint32_t structural_index = 0; @@ -388,6 +391,13 @@ void ts_subtree_summarize_children( for (uint32_t i = 0; i < self.ptr->child_count; i++) { Subtree child = children[i]; + if ( + self.ptr->size.extent.row == 0 && + ts_subtree_depends_on_column(child) + ) { + self.ptr->depends_on_column = true; + } + if (i == 0) { self.ptr->padding = ts_subtree_padding(child); self.ptr->size = ts_subtree_size(child); @@ -545,7 +555,7 @@ Subtree ts_subtree_new_missing_leaf( ) { Subtree result = ts_subtree_new_leaf( pool, symbol, padding, length_zero(), 0, - 0, false, false, language + 0, false, false, false, language ); if (result.data.is_inline) { result.data.is_missing = true; @@ -670,6 +680,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool Edit edit = entry.edit; bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes; bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes; + bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree); Length size = ts_subtree_size(*entry.tree); Length padding = ts_subtree_padding(*entry.tree); @@ -733,6 +744,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool data->fragile_right = false; data->has_changes = false; data->has_external_tokens = false; + data->depends_on_column = false; data->is_missing = result.data.is_missing; data->is_keyword = result.data.is_keyword; result.ptr = data; @@ -755,9 +767,18 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool // If this child ends before the edit, it is not affected. if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue; - // If this child starts after the edit, then we're done processing children. - if (child_left.bytes > edit.old_end.bytes || - (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) break; + // Keep editing child nodes until a node is reached that starts after the edit. + // Also, if this node's validity depends on its column position, then continue + // invaliditing child nodes until reaching a line break. + if (( + (child_left.bytes > edit.old_end.bytes) || + (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0) + ) && ( + !invalidate_first_row || + child_left.extent.row > entry.tree->ptr->padding.extent.row + )) { + break; + } // Transform edit into the child's coordinate space. Edit child_edit = { @@ -775,8 +796,10 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool // Interpret all inserted text as applying to the *first* child that touches the edit. // Subsequent children are only never have any text inserted into them; they are only // shrunk to compensate for the edit. - if (child_right.bytes > edit.start.bytes || - (child_right.bytes == edit.start.bytes && is_pure_insertion)) { + if ( + child_right.bytes > edit.start.bytes || + (child_right.bytes == edit.start.bytes && is_pure_insertion) + ) { edit.new_end = edit.start; } @@ -981,12 +1004,14 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, "state: %d\n" "error-cost: %u\n" "has-changes: %u\n" + "depends-on-column: %u\n" "repeat-depth: %u\n" "lookahead-bytes: %u", start_offset, end_offset, ts_subtree_parse_state(*self), ts_subtree_error_cost(*self), ts_subtree_has_changes(*self), + ts_subtree_depends_on_column(*self), ts_subtree_repeat_depth(*self), ts_subtree_lookahead_bytes(*self) ); diff --git a/lib/src/subtree.h b/lib/src/subtree.h index b020deb6..d227db10 100644 --- a/lib/src/subtree.h +++ b/lib/src/subtree.h @@ -78,6 +78,7 @@ typedef struct { bool fragile_right : 1; bool has_changes : 1; bool has_external_tokens : 1; + bool depends_on_column: 1; bool is_missing : 1; bool is_keyword : 1; @@ -138,7 +139,7 @@ void ts_subtree_pool_delete(SubtreePool *); Subtree ts_subtree_new_leaf( SubtreePool *, TSSymbol, Length, Length, uint32_t, - TSStateId, bool, bool, const TSLanguage * + TSStateId, bool, bool, bool, const TSLanguage * ); Subtree ts_subtree_new_error( SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage * @@ -284,6 +285,10 @@ static inline bool ts_subtree_has_external_tokens(Subtree self) { return self.data.is_inline ? false : self.ptr->has_external_tokens; } +static inline bool ts_subtree_depends_on_column(Subtree self) { + return self.data.is_inline ? false : self.ptr->depends_on_column; +} + static inline bool ts_subtree_is_fragile(Subtree self) { return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right); } diff --git a/test/fixtures/test_grammars/uses_current_column/scanner.c b/test/fixtures/test_grammars/uses_current_column/scanner.c index efd27f9f..62b16392 100644 --- a/test/fixtures/test_grammars/uses_current_column/scanner.c +++ b/test/fixtures/test_grammars/uses_current_column/scanner.c @@ -92,7 +92,7 @@ bool tree_sitter_uses_current_column_external_scanner_scan( // If at the end of a statement, then get the current indent // level and pop some number of entries off of the indent stack. if (valid_symbols[NEWLINE] || valid_symbols[DEDENT]) { - while (lexer->lookahead == ' ') { + while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') { lexer->advance(lexer, false); }