From fa6c1471ef58f1a14c5656855119e8adb39335b6 Mon Sep 17 00:00:00 2001 From: Riley Bruins Date: Fri, 15 Nov 2024 20:49:06 -0800 Subject: [PATCH] fix(lib): correct escape detection for invalid anonymous nodes The current quotation escape checker fails in the case that there is an anonymous node that is just an escaped backslash (it thinks the backslash escapes the quote, when really it is just an escaped backslash itself. See the added test case for an example of this). This commit ensures the node identification logic keeps track of the number of backslashes seen so it can accurately determine if the quotation is escaped or not. --- cli/src/tests/query_test.rs | 30 ++++++++++++++++++++++++++++++ lib/binding_rust/lib.rs | 35 ++++++++++++++++------------------- 2 files changed, 46 insertions(+), 19 deletions(-) diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 2a492572..e1433c97 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -201,6 +201,36 @@ fn test_query_errors_on_invalid_symbols() { allocations::record(|| { let language = get_language("javascript"); + assert_eq!( + Query::new(&language, "\">>>>\"").unwrap_err(), + QueryError { + row: 0, + offset: 1, + column: 1, + kind: QueryErrorKind::NodeType, + message: ">>>>".to_string() + } + ); + assert_eq!( + Query::new(&language, "\"te\\\"st\"").unwrap_err(), + QueryError { + row: 0, + offset: 1, + column: 1, + kind: QueryErrorKind::NodeType, + message: "te\\\"st".to_string() + } + ); + assert_eq!( + Query::new(&language, "\"\\\\\" @cap").unwrap_err(), + QueryError { + row: 0, + offset: 1, + column: 1, + kind: QueryErrorKind::NodeType, + message: "\\\\".to_string() + } + ); assert_eq!( Query::new(&language, "(clas)").unwrap_err(), QueryError { diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 5c04c0a8..6950c63b 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -2353,27 +2353,24 @@ impl Query { ffi::TSQueryErrorNodeType | ffi::TSQueryErrorField | ffi::TSQueryErrorCapture => { let suffix = source.split_at(offset).1; let in_quotes = source.as_bytes()[offset - 1] == b'"'; - let mut end_offset = suffix.len(); - if let Some(pos) = suffix - .char_indices() - .take_while(|(_, c)| *c != '\n') - .find_map(|(i, c)| match c { - '"' if in_quotes - && i > 0 - && suffix.chars().nth(i - 1) != Some('\\') => - { - Some(i) + let mut backslashes = 0; + let end_offset = suffix + .find(|c| { + if in_quotes { + if c == '"' && backslashes % 2 == 0 { + true + } else if c == '\\' { + backslashes += 1; + false + } else { + backslashes = 0; + false + } + } else { + !char::is_alphanumeric(c) && c != '_' && c != '-' } - c if !in_quotes - && (c.is_whitespace() || c == '(' || c == ')' || c == ':') => - { - Some(i) - } - _ => None, }) - { - end_offset = pos; - } + .unwrap_or(suffix.len()); message = suffix.split_at(end_offset).0.to_string(); kind = match error_type { ffi::TSQueryErrorNodeType => QueryErrorKind::NodeType,