tree-sitter/cli/src/query_testing.rs

use std::fs;

use anyhow::{anyhow, Result};
use bstr::{BStr, ByteSlice};
use lazy_static::lazy_static;
use regex::Regex;
use tree_sitter::{Language, Parser, Point};

lazy_static! {
    static ref CAPTURE_NAME_REGEX: Regex = Regex::new("[\\w_\\-.]+").unwrap();
}

#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Utf8Point {
    pub row: usize,
    pub column: usize,
}

impl std::fmt::Display for Utf8Point {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(f, "({}, {})", self.row, self.column)
    }
}

impl Utf8Point {
    pub const fn new(row: usize, column: usize) -> Self {
        Self { row, column }
    }
}

pub fn to_utf8_point(point: Point, source: &[u8]) -> Utf8Point {
    if point.column == 0 {
        return Utf8Point::new(point.row, 0);
    }

    let bstr = BStr::new(source);
    let line = bstr.lines_with_terminator().nth(point.row).unwrap();
    let mut utf8_column = 0;

    for (_, grapheme_end, _) in line.grapheme_indices() {
        utf8_column += 1;
        if grapheme_end >= point.column {
            break;
        }
    }

    Utf8Point {
        row: point.row,
        column: utf8_column,
    }
}

#[derive(Debug, Eq, PartialEq)]
pub struct CaptureInfo {
    pub name: String,
    pub start: Utf8Point,
    pub end: Utf8Point,
}

#[derive(Debug, PartialEq, Eq)]
pub struct Assertion {
    pub position: Utf8Point,
    pub negative: bool,
    pub expected_capture_name: String,
}

impl Assertion {
    #[must_use]
    pub const fn new(
        row: usize,
        col: usize,
        negative: bool,
        expected_capture_name: String,
    ) -> Self {
        Self {
            position: Utf8Point::new(row, col),
            negative,
            expected_capture_name,
        }
    }
}

/// Parse the given source code, finding all of the comments that contain
/// highlighting assertions. Return a vector of (position, expected highlight name)
/// pairs.
pub fn parse_position_comments(
    parser: &mut Parser,
    language: &Language,
    source: &[u8],
) -> Result<Vec<Assertion>> {
    let mut result = Vec::new();
    let mut assertion_ranges = Vec::new();

    // Parse the code.
    parser.set_included_ranges(&[]).unwrap();
    parser.set_language(language).unwrap();
    let tree = parser.parse(source, None).unwrap();

    // Walk the tree, finding comment nodes that contain assertions.
    let mut ascending = false;
    let mut cursor = tree.root_node().walk();
    loop {
        if ascending {
            let node = cursor.node();

            // Find every comment node.
            if node.kind().to_lowercase().contains("comment") {
                if let Ok(text) = node.utf8_text(source) {
                    let mut position = node.start_position();
                    if position.row > 0 {
                        // Find the arrow character ("^" or "<-") in the comment. A left arrow
                        // refers to the column where the comment node starts. An up arrow refers
                        // to its own column.
                        let mut has_left_caret = false;
                        let mut has_arrow = false;
                        let mut negative = false;
                        let mut arrow_end = 0;
                        for (i, c) in text.char_indices() {
                            arrow_end = i + 1;
                            if c == '-' && has_left_caret {
                                has_arrow = true;
                                break;
                            }
                            if c == '^' {
                                has_arrow = true;
                                position.column += i;
                                break;
                            }
                            has_left_caret = c == '<';
                        }

                        // find any ! after arrows but before capture name
                        if has_arrow {
                            for (i, c) in text[arrow_end..].char_indices() {
                                if c == '!' {
                                    negative = true;
                                    arrow_end += i + 1;
                                    break;
                                } else if !c.is_whitespace() {
                                    break;
                                }
                            }
                        }

                        // If the comment node contains an arrow and a highlight name, record the
                        // highlight name and the position.
                        if let (true, Some(mat)) =
                            (has_arrow, CAPTURE_NAME_REGEX.find(&text[arrow_end..]))
                        {
                            assertion_ranges.push((node.start_position(), node.end_position()));
                            result.push(Assertion {
                                position: to_utf8_point(position, source),
                                negative,
                                expected_capture_name: mat.as_str().to_string(),
                            });
                        }
                    }
                }
            }

            // Continue walking the tree.
            if cursor.goto_next_sibling() {
                ascending = false;
            } else if !cursor.goto_parent() {
                break;
            }
        } else if !cursor.goto_first_child() {
            ascending = true;
        }
    }

    // Adjust the row number in each assertion's position to refer to the line of
    // code *above* the assertion. There can be multiple lines of assertion comments,
    // so the positions may have to be decremented by more than one row.
    let mut i = 0;
    for assertion in &mut result {
        loop {
            let on_assertion_line = assertion_ranges[i..]
                .iter()
                .any(|(start, _)| start.row == assertion.position.row);
            if on_assertion_line {
                assertion.position.row -= 1;
            } else {
                while i < assertion_ranges.len()
                    && assertion_ranges[i].0.row < assertion.position.row
                {
                    i += 1;
                }
                break;
            }
        }
    }

    // The assertions can end up out of order due to the line adjustments.
    result.sort_unstable_by_key(|a| a.position);

    Ok(result)
}

pub fn assert_expected_captures(
    infos: &[CaptureInfo],
    path: &str,
    parser: &mut Parser,
    language: &Language,
) -> Result<usize> {
    let contents = fs::read_to_string(path)?;
    let pairs = parse_position_comments(parser, language, contents.as_bytes())?;
    for assertion in &pairs {
        if let Some(found) = &infos.iter().find(|p| {
            assertion.position.row == p.start.row
                && assertion.position >= p.start
                && assertion.position < p.end
        }) {
            if assertion.expected_capture_name != found.name && found.name != "name" {
                return Err(anyhow!(
                    "Assertion failed: at {}, found {}, expected {}",
                    found.start,
                    assertion.expected_capture_name,
                    found.name
                ));
            }
        } else {
            return Err(anyhow!(
                "Assertion failed: could not match {} at {}",
                assertion.expected_capture_name,
                assertion.position
            ));
        }
    }
    Ok(pairs.len())
}
style: format imports 2024-04-09 13:35:08 -04:00			`use std::fs;`

cli: Use anyhow and thiserror for errors This patch updates the CLI to use anyhow and thiserror for error management. The main feature that our custom `Error` type was providing was a _list_ of messages, which would allow us to annotate "lower-level" errors with more contextual information. This is exactly what's provided by anyhow's `Context` trait. (This is setup work for a future PR that will pull the `config` and `loader` modules out into separate crates; by using `anyhow` we wouldn't have to deal with a circular dependency between with the new crates.) 2021-06-09 12:32:22 -04:00			`use anyhow::{anyhow, Result};`
fix: properly handle utf8 code points for highlight and tag assertions 2024-09-08 14:26:34 -04:00			`use bstr::{BStr, ByteSlice};`
move shared code to query_testing 2020-10-26 14:27:33 -04:00			`use lazy_static::lazy_static;`
			`use regex::Regex;`
			`use tree_sitter::{Language, Parser, Point};`

			`lazy_static! {`
better name for capture regex 2020-11-10 16:23:39 -05:00			`static ref CAPTURE_NAME_REGEX: Regex = Regex::new("[\\w_\\-.]+").unwrap();`
move shared code to query_testing 2020-10-26 14:27:33 -04:00			`}`

fix: properly handle utf8 code points for highlight and tag assertions 2024-09-08 14:26:34 -04:00			`#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]`
			`pub struct Utf8Point {`
			`pub row: usize,`
			`pub column: usize,`
			`}`

			`impl std::fmt::Display for Utf8Point {`
			`fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {`
			`write!(f, "({}, {})", self.row, self.column)`
			`}`
			`}`

			`impl Utf8Point {`
			`pub const fn new(row: usize, column: usize) -> Self {`
			`Self { row, column }`
			`}`
			`}`

			`pub fn to_utf8_point(point: Point, source: &[u8]) -> Utf8Point {`
			`if point.column == 0 {`
			`return Utf8Point::new(point.row, 0);`
			`}`

			`let bstr = BStr::new(source);`
			`let line = bstr.lines_with_terminator().nth(point.row).unwrap();`
			`let mut utf8_column = 0;`

			`for (_, grapheme_end, _) in line.grapheme_indices() {`
			`utf8_column += 1;`
			`if grapheme_end >= point.column {`
			`break;`
			`}`
			`}`

			`Utf8Point {`
			`row: point.row,`
			`column: utf8_column,`
			`}`
			`}`

move shared code to query_testing 2020-10-26 14:27:33 -04:00			`#[derive(Debug, Eq, PartialEq)]`
			`pub struct CaptureInfo {`
			`pub name: String,`
fix: properly handle utf8 code points for highlight and tag assertions 2024-09-08 14:26:34 -04:00			`pub start: Utf8Point,`
			`pub end: Utf8Point,`
move shared code to query_testing 2020-10-26 14:27:33 -04:00			`}`

propitiate the tests 2020-10-26 14:35:18 -04:00			`#[derive(Debug, PartialEq, Eq)]`
move shared code to query_testing 2020-10-26 14:27:33 -04:00			`pub struct Assertion {`
fix: properly handle utf8 code points for highlight and tag assertions 2024-09-08 14:26:34 -04:00			`pub position: Utf8Point,`
feat: add negative assertions, remove duplicate code 2023-07-24 23:44:10 -04:00			`pub negative: bool,`
rename Assertion.expected to expected_capture_name 2020-11-10 16:20:51 -05:00			`pub expected_capture_name: String,`
move shared code to query_testing 2020-10-26 14:27:33 -04:00			`}`

feat: add negative assertions, remove duplicate code 2023-07-24 23:44:10 -04:00			`impl Assertion {`
chore(cli): apply clippy fixes 2024-02-04 01:30:33 -05:00			`#[must_use]`
chore: clippy lints 2024-09-07 20:13:58 -04:00			`pub const fn new(`
			`row: usize,`
			`col: usize,`
			`negative: bool,`
			`expected_capture_name: String,`
			`) -> Self {`
feat: add negative assertions, remove duplicate code 2023-07-24 23:44:10 -04:00			`Self {`
fix: properly handle utf8 code points for highlight and tag assertions 2024-09-08 14:26:34 -04:00			`position: Utf8Point::new(row, col),`
feat: add negative assertions, remove duplicate code 2023-07-24 23:44:10 -04:00			`negative,`
			`expected_capture_name,`
			`}`
			`}`
			`}`

move shared code to query_testing 2020-10-26 14:27:33 -04:00			`/// Parse the given source code, finding all of the comments that contain`
			`/// highlighting assertions. Return a vector of (position, expected highlight name)`
			`/// pairs.`
propitiate the tests 2020-10-26 14:35:18 -04:00			`pub fn parse_position_comments(`
move shared code to query_testing 2020-10-26 14:27:33 -04:00			`parser: &mut Parser,`
chore(cli): apply clippy fixes 2024-02-04 01:30:33 -05:00			`language: &Language,`
move shared code to query_testing 2020-10-26 14:27:33 -04:00			`source: &[u8],`
			`) -> Result<Vec<Assertion>> {`
			`let mut result = Vec::new();`
			`let mut assertion_ranges = Vec::new();`

			`// Parse the code.`
			`parser.set_included_ranges(&[]).unwrap();`
chore(cli): apply clippy fixes 2024-02-04 01:30:33 -05:00			`parser.set_language(language).unwrap();`
move shared code to query_testing 2020-10-26 14:27:33 -04:00			`let tree = parser.parse(source, None).unwrap();`

			`// Walk the tree, finding comment nodes that contain assertions.`
			`let mut ascending = false;`
			`let mut cursor = tree.root_node().walk();`
			`loop {`
			`if ascending {`
			`let node = cursor.node();`

			`// Find every comment node.`
fix(assertions): case shouldn't matter for comment node detection 2024-02-28 07:47:14 -05:00			`if node.kind().to_lowercase().contains("comment") {`
move shared code to query_testing 2020-10-26 14:27:33 -04:00			`if let Ok(text) = node.utf8_text(source) {`
			`let mut position = node.start_position();`
Fix possible infinite loop when running syntax highlighting tests Fixes #1347 2021-08-29 15:03:53 -07:00			`if position.row > 0 {`
fix: correct comment quote 2024-09-17 09:39:49 +08:00			`// Find the arrow character ("^" or "<-") in the comment. A left arrow`
Fix possible infinite loop when running syntax highlighting tests Fixes #1347 2021-08-29 15:03:53 -07:00			`// refers to the column where the comment node starts. An up arrow refers`
			`// to its own column.`
			`let mut has_left_caret = false;`
			`let mut has_arrow = false;`
feat: add negative assertions, remove duplicate code 2023-07-24 23:44:10 -04:00			`let mut negative = false;`
Fix possible infinite loop when running syntax highlighting tests Fixes #1347 2021-08-29 15:03:53 -07:00			`let mut arrow_end = 0;`
			`for (i, c) in text.char_indices() {`
			`arrow_end = i + 1;`
			`if c == '-' && has_left_caret {`
			`has_arrow = true;`
			`break;`
			`}`
			`if c == '^' {`
			`has_arrow = true;`
			`position.column += i;`
			`break;`
			`}`
			`has_left_caret = c == '<';`
move shared code to query_testing 2020-10-26 14:27:33 -04:00			`}`

feat: add negative assertions, remove duplicate code 2023-07-24 23:44:10 -04:00			`// find any ! after arrows but before capture name`
			`if has_arrow {`
			`for (i, c) in text[arrow_end..].char_indices() {`
			`if c == '!' {`
			`negative = true;`
			`arrow_end += i + 1;`
			`break;`
			`} else if !c.is_whitespace() {`
			`break;`
			`}`
			`}`
			`}`

Fix possible infinite loop when running syntax highlighting tests Fixes #1347 2021-08-29 15:03:53 -07:00			`// If the comment node contains an arrow and a highlight name, record the`
			`// highlight name and the position.`
			`if let (true, Some(mat)) =`
			`(has_arrow, CAPTURE_NAME_REGEX.find(&text[arrow_end..]))`
			`{`
			`assertion_ranges.push((node.start_position(), node.end_position()));`
			`result.push(Assertion {`
fix: properly handle utf8 code points for highlight and tag assertions 2024-09-08 14:26:34 -04:00			`position: to_utf8_point(position, source),`
feat: add negative assertions, remove duplicate code 2023-07-24 23:44:10 -04:00			`negative,`
Fix possible infinite loop when running syntax highlighting tests Fixes #1347 2021-08-29 15:03:53 -07:00			`expected_capture_name: mat.as_str().to_string(),`
			`});`
			`}`
move shared code to query_testing 2020-10-26 14:27:33 -04:00			`}`
			`}`
			`}`

			`// Continue walking the tree.`
			`if cursor.goto_next_sibling() {`
			`ascending = false;`
			`} else if !cursor.goto_parent() {`
			`break;`
			`}`
			`} else if !cursor.goto_first_child() {`
			`ascending = true;`
			`}`
			`}`

			`// Adjust the row number in each assertion's position to refer to the line of`
			`// code above the assertion. There can be multiple lines of assertion comments,`
			`// so the positions may have to be decremented by more than one row.`
			`let mut i = 0;`
chore(cli): apply clippy fixes 2024-02-04 01:30:33 -05:00			`for assertion in &mut result {`
move shared code to query_testing 2020-10-26 14:27:33 -04:00			`loop {`
			`let on_assertion_line = assertion_ranges[i..]`
			`.iter()`
			`.any(\|(start, _)\| start.row == assertion.position.row);`
			`if on_assertion_line {`
			`assertion.position.row -= 1;`
			`} else {`
			`while i < assertion_ranges.len()`
			`&& assertion_ranges[i].0.row < assertion.position.row`
			`{`
			`i += 1;`
			`}`
			`break;`
			`}`
			`}`
			`}`

			`// The assertions can end up out of order due to the line adjustments.`
			`result.sort_unstable_by_key(\|a\| a.position);`

			`Ok(result)`
			`}`

			`pub fn assert_expected_captures(`
chore(cli): apply clippy fixes 2024-02-04 01:30:33 -05:00			`infos: &[CaptureInfo],`
feat(test): test all queries Fallback to default testing for all queries present in the parser's queries directory. For a given query <QUERY>.scm, the test files are searched in test/<QUERY>/* Also mimic the output of other test-running subcommands when testing queries. Co-authored-by: Thomas Vigouroux <thomas.vigouroux@protonmail.com> 2024-09-24 11:43:13 -04:00			`path: &str,`
move shared code to query_testing 2020-10-26 14:27:33 -04:00			`parser: &mut Parser,`
chore(cli): apply clippy fixes 2024-02-04 01:30:33 -05:00			`language: &Language,`
feat(test): test all queries Fallback to default testing for all queries present in the parser's queries directory. For a given query <QUERY>.scm, the test files are searched in test/<QUERY>/* Also mimic the output of other test-running subcommands when testing queries. Co-authored-by: Thomas Vigouroux <thomas.vigouroux@protonmail.com> 2024-09-24 11:43:13 -04:00			`) -> Result<usize> {`
move shared code to query_testing 2020-10-26 14:27:33 -04:00			`let contents = fs::read_to_string(path)?;`
propitiate the tests 2020-10-26 14:35:18 -04:00			`let pairs = parse_position_comments(parser, language, contents.as_bytes())?;`
feat(test): test all queries Fallback to default testing for all queries present in the parser's queries directory. For a given query <QUERY>.scm, the test files are searched in test/<QUERY>/* Also mimic the output of other test-running subcommands when testing queries. Co-authored-by: Thomas Vigouroux <thomas.vigouroux@protonmail.com> 2024-09-24 11:43:13 -04:00			`for assertion in &pairs {`
			`if let Some(found) = &infos.iter().find(\|p\| {`
			`assertion.position.row == p.start.row`
			`&& assertion.position >= p.start`
			`&& assertion.position < p.end`
inline this lambda 2020-11-23 12:05:32 -05:00			`}) {`
feat(test): test all queries Fallback to default testing for all queries present in the parser's queries directory. For a given query <QUERY>.scm, the test files are searched in test/<QUERY>/* Also mimic the output of other test-running subcommands when testing queries. Co-authored-by: Thomas Vigouroux <thomas.vigouroux@protonmail.com> 2024-09-24 11:43:13 -04:00			`if assertion.expected_capture_name != found.name && found.name != "name" {`
			`return Err(anyhow!(`
Allow overlap in specs. 2020-11-23 11:58:07 -05:00			`"Assertion failed: at {}, found {}, expected {}",`
feat(test): test all queries Fallback to default testing for all queries present in the parser's queries directory. For a given query <QUERY>.scm, the test files are searched in test/<QUERY>/* Also mimic the output of other test-running subcommands when testing queries. Co-authored-by: Thomas Vigouroux <thomas.vigouroux@protonmail.com> 2024-09-24 11:43:13 -04:00			`found.start,`
			`assertion.expected_capture_name,`
			`found.name`
			`));`
Allow overlap in specs. 2020-11-23 11:58:07 -05:00			`}`
feat(test): test all queries Fallback to default testing for all queries present in the parser's queries directory. For a given query <QUERY>.scm, the test files are searched in test/<QUERY>/* Also mimic the output of other test-running subcommands when testing queries. Co-authored-by: Thomas Vigouroux <thomas.vigouroux@protonmail.com> 2024-09-24 11:43:13 -04:00			`} else {`
			`return Err(anyhow!(`
			`"Assertion failed: could not match {} at {}",`
			`assertion.expected_capture_name,`
			`assertion.position`
			`));`
move shared code to query_testing 2020-10-26 14:27:33 -04:00			`}`
			`}`
feat(test): test all queries Fallback to default testing for all queries present in the parser's queries directory. For a given query <QUERY>.scm, the test files are searched in test/<QUERY>/* Also mimic the output of other test-running subcommands when testing queries. Co-authored-by: Thomas Vigouroux <thomas.vigouroux@protonmail.com> 2024-09-24 11:43:13 -04:00			`Ok(pairs.len())`
move shared code to query_testing 2020-10-26 14:27:33 -04:00			`}`