From cd1abd93510a87afe930eddef6862551e10d93c0 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Tue, 8 Oct 2024 17:45:25 -0400 Subject: [PATCH] fix(lib): correct unexpected side effect in `get_column` when the lexer is at EOF (cherry picked from commit 538a19797614e5934d9319f59a0d567b4f246ee2) --- cli/src/tests/parser_test.rs | 14 ++++++++ lib/src/lexer.c | 12 +++---- .../test_grammars/get_col_eof/corpus.txt | 0 .../test_grammars/get_col_eof/grammar.js | 11 ++++++ .../test_grammars/get_col_eof/scanner.c | 34 +++++++++++++++++++ 5 files changed, 65 insertions(+), 6 deletions(-) create mode 100644 test/fixtures/test_grammars/get_col_eof/corpus.txt create mode 100644 test/fixtures/test_grammars/get_col_eof/grammar.js create mode 100644 test/fixtures/test_grammars/get_col_eof/scanner.c diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index e1319395..6c783aee 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -1507,6 +1507,20 @@ fn test_parsing_with_scanner_logging() { assert!(found); } +#[test] +fn test_parsing_get_column_at_eof() { + let dir = fixtures_dir().join("test_grammars").join("get_col_eof"); + let grammar_json = load_grammar_file(&dir.join("grammar.js"), None).unwrap(); + let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap(); + + let mut parser = Parser::new(); + parser + .set_language(&get_test_language(&grammar_name, &parser_code, Some(&dir))) + .unwrap(); + + parser.parse("a", None).unwrap(); +} + const fn simple_range(start: usize, end: usize) -> Range { Range { start_byte: start, diff --git a/lib/src/lexer.c b/lib/src/lexer.c index e795618d..0d60589a 100644 --- a/lib/src/lexer.c +++ b/lib/src/lexer.c @@ -252,12 +252,12 @@ static uint32_t ts_lexer__get_column(TSLexer *_self) { uint32_t goal_byte = self->current_position.bytes; self->did_get_column = true; - self->current_position.bytes -= self->current_position.extent.column; - self->current_position.extent.column = 0; - - if (self->current_position.bytes < self->chunk_start) { - ts_lexer__get_chunk(self); - } + Length start_of_col = { + self->current_position.bytes - self->current_position.extent.column, + {self->current_position.extent.row, 0}, + }; + ts_lexer_goto(self, start_of_col); + ts_lexer__get_chunk(self); uint32_t result = 0; if (!ts_lexer__eof(_self)) { diff --git a/test/fixtures/test_grammars/get_col_eof/corpus.txt b/test/fixtures/test_grammars/get_col_eof/corpus.txt new file mode 100644 index 00000000..e69de29b diff --git a/test/fixtures/test_grammars/get_col_eof/grammar.js b/test/fixtures/test_grammars/get_col_eof/grammar.js new file mode 100644 index 00000000..3b70db2f --- /dev/null +++ b/test/fixtures/test_grammars/get_col_eof/grammar.js @@ -0,0 +1,11 @@ +module.exports = grammar({ + name: "get_col_eof", + + externals: $ => [ + $.char + ], + + rules: { + source_file: $ => repeat($.char), + } +}); diff --git a/test/fixtures/test_grammars/get_col_eof/scanner.c b/test/fixtures/test_grammars/get_col_eof/scanner.c new file mode 100644 index 00000000..1d262cf9 --- /dev/null +++ b/test/fixtures/test_grammars/get_col_eof/scanner.c @@ -0,0 +1,34 @@ +#include "tree_sitter/parser.h" + +enum TokenType { CHAR }; + +void *tree_sitter_get_col_eof_external_scanner_create(void) { return NULL; } + +void tree_sitter_get_col_eof_external_scanner_destroy(void *scanner) {} + +unsigned tree_sitter_get_col_eof_external_scanner_serialize(void *scanner, + char *buffer) { + return 0; +} + +void tree_sitter_get_col_eof_external_scanner_deserialize(void *scanner, + const char *buffer, + unsigned length) {} + +bool tree_sitter_get_col_eof_external_scanner_scan(void *scanner, + TSLexer *lexer, + const bool *valid_symbols) { + if (lexer->eof(lexer)) { + return false; + } + + if (valid_symbols[CHAR]) { + lexer->advance(lexer, false); + lexer->get_column(lexer); + lexer->result_symbol = CHAR; + lexer->mark_end(lexer); + return true; + } + + return false; +}