From 0cb2ef1082c1ddd05634560ccee643930b11640e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 6 Dec 2019 15:26:57 -0800 Subject: [PATCH] Fix code paths that still conflated null characters with EOF --- cli/src/tests/parser_test.rs | 11 +++++++++++ lib/src/parser.c | 2 +- lib/src/subtree.c | 4 ++-- test/fixtures/error_corpus/json_errors.txt | 2 +- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 694cbd1e..8a549d48 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -266,6 +266,17 @@ fn test_parsing_invalid_chars_at_eof() { assert_eq!(tree.root_node().to_sexp(), "(ERROR (UNEXPECTED INVALID))"); } +#[test] +fn test_parsing_unexpected_null_characters_within_source() { + let mut parser = Parser::new(); + parser.set_language(get_language("javascript")).unwrap(); + let tree = parser.parse(b"var \0 something;", None).unwrap(); + assert_eq!( + tree.root_node().to_sexp(), + "(program (variable_declaration (ERROR (UNEXPECTED '\\0')) (variable_declarator name: (identifier))))" + ); +} + #[test] fn test_parsing_ends_when_input_callback_returns_empty() { let mut parser = Parser::new(); diff --git a/lib/src/parser.c b/lib/src/parser.c index 23bae017..f381afcc 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -439,7 +439,7 @@ static Subtree ts_parser__lex( } if (self->lexer.current_position.bytes == error_end_position.bytes) { - if (self->lexer.data.lookahead == 0) { + if (self->lexer.data.eof(&self->lexer.data)) { self->lexer.data.result_symbol = ts_builtin_sym_error; break; } diff --git a/lib/src/subtree.c b/lib/src/subtree.c index c6c11223..30144fa1 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -766,10 +766,10 @@ Subtree ts_subtree_last_external_token(Subtree tree) { } static size_t ts_subtree__write_char_to_string(char *s, size_t n, int32_t c) { - if (c == 0) - return snprintf(s, n, "EOF"); if (c == -1) return snprintf(s, n, "INVALID"); + else if (c == '\0') + return snprintf(s, n, "'\\0'"); else if (c == '\n') return snprintf(s, n, "'\\n'"); else if (c == '\t') diff --git a/test/fixtures/error_corpus/json_errors.txt b/test/fixtures/error_corpus/json_errors.txt index b26aaf1d..53ce94e4 100644 --- a/test/fixtures/error_corpus/json_errors.txt +++ b/test/fixtures/error_corpus/json_errors.txt @@ -65,4 +65,4 @@ incomplete tokens at EOF nul --- -(ERROR (UNEXPECTED EOF)) +(ERROR (UNEXPECTED '\0'))