fix(lib): correct unexpected side effect in get_column when the lexer is at EOF

(cherry picked from commit 538a197976)
This commit is contained in:
Amaan Qureshi 2024-10-08 17:45:25 -04:00 committed by Christian Clason
parent 46bdc14e20
commit cd1abd9351
5 changed files with 65 additions and 6 deletions

View file

@ -1507,6 +1507,20 @@ fn test_parsing_with_scanner_logging() {
assert!(found);
}
#[test]
fn test_parsing_get_column_at_eof() {
let dir = fixtures_dir().join("test_grammars").join("get_col_eof");
let grammar_json = load_grammar_file(&dir.join("grammar.js"), None).unwrap();
let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap();
let mut parser = Parser::new();
parser
.set_language(&get_test_language(&grammar_name, &parser_code, Some(&dir)))
.unwrap();
parser.parse("a", None).unwrap();
}
const fn simple_range(start: usize, end: usize) -> Range {
Range {
start_byte: start,

View file

@ -252,12 +252,12 @@ static uint32_t ts_lexer__get_column(TSLexer *_self) {
uint32_t goal_byte = self->current_position.bytes;
self->did_get_column = true;
self->current_position.bytes -= self->current_position.extent.column;
self->current_position.extent.column = 0;
if (self->current_position.bytes < self->chunk_start) {
ts_lexer__get_chunk(self);
}
Length start_of_col = {
self->current_position.bytes - self->current_position.extent.column,
{self->current_position.extent.row, 0},
};
ts_lexer_goto(self, start_of_col);
ts_lexer__get_chunk(self);
uint32_t result = 0;
if (!ts_lexer__eof(_self)) {

View file

View file

@ -0,0 +1,11 @@
module.exports = grammar({
name: "get_col_eof",
externals: $ => [
$.char
],
rules: {
source_file: $ => repeat($.char),
}
});

View file

@ -0,0 +1,34 @@
#include "tree_sitter/parser.h"
enum TokenType { CHAR };
void *tree_sitter_get_col_eof_external_scanner_create(void) { return NULL; }
void tree_sitter_get_col_eof_external_scanner_destroy(void *scanner) {}
unsigned tree_sitter_get_col_eof_external_scanner_serialize(void *scanner,
char *buffer) {
return 0;
}
void tree_sitter_get_col_eof_external_scanner_deserialize(void *scanner,
const char *buffer,
unsigned length) {}
bool tree_sitter_get_col_eof_external_scanner_scan(void *scanner,
TSLexer *lexer,
const bool *valid_symbols) {
if (lexer->eof(lexer)) {
return false;
}
if (valid_symbols[CHAR]) {
lexer->advance(lexer, false);
lexer->get_column(lexer);
lexer->result_symbol = CHAR;
lexer->mark_end(lexer);
return true;
}
return false;
}