From 1704c604bf663801876572fe08b746e787cd7fdb Mon Sep 17 00:00:00 2001 From: Will Lillis Date: Sun, 20 Jul 2025 20:18:33 -0400 Subject: [PATCH] feat(test): allow cst as expected output of test case --- crates/cli/src/parse.rs | 139 ++++++++-------- crates/cli/src/test.rs | 166 +++++++++++++++---- docs/src/creating-parsers/5-writing-tests.md | 6 +- 3 files changed, 210 insertions(+), 101 deletions(-) diff --git a/crates/cli/src/parse.rs b/crates/cli/src/parse.rs index d6966f72..2badad79 100644 --- a/crates/cli/src/parse.rs +++ b/crates/cli/src/parse.rs @@ -1,6 +1,6 @@ use std::{ fmt, fs, - io::{self, StdoutLock, Write}, + io::{self, Write}, path::{Path, PathBuf}, sync::atomic::{AtomicUsize, Ordering}, time::{Duration, Instant}, @@ -501,53 +501,7 @@ pub fn parse_file_at_path( } if opts.output == ParseOutput::Cst { - let lossy_source_code = String::from_utf8_lossy(&source_code); - let total_width = lossy_source_code - .lines() - .enumerate() - .map(|(row, col)| { - (row as f64).log10() as usize + (col.len() as f64).log10() as usize + 1 - }) - .max() - .unwrap_or(1); - let mut indent_level = 1; - let mut did_visit_children = false; - let mut in_error = false; - loop { - if did_visit_children { - if cursor.goto_next_sibling() { - did_visit_children = false; - } else if cursor.goto_parent() { - did_visit_children = true; - indent_level -= 1; - if !cursor.node().has_error() { - in_error = false; - } - } else { - break; - } - } else { - cst_render_node( - opts, - &mut cursor, - &source_code, - &mut stdout, - total_width, - indent_level, - in_error, - )?; - if cursor.goto_first_child() { - did_visit_children = false; - indent_level += 1; - if cursor.node().has_error() { - in_error = true; - } - } else { - did_visit_children = true; - } - } - } - cursor.reset(tree.root_node()); + render_cst(&source_code, &tree, &mut cursor, opts, &mut stdout)?; println!(); } @@ -781,6 +735,61 @@ const fn escape_invisible(c: char) -> Option<&'static str> { }) } +pub fn render_cst<'a, 'b: 'a>( + source_code: &[u8], + tree: &'b Tree, + cursor: &mut TreeCursor<'a>, + opts: &ParseFileOptions, + out: &mut impl Write, +) -> Result<()> { + let lossy_source_code = String::from_utf8_lossy(source_code); + let total_width = lossy_source_code + .lines() + .enumerate() + .map(|(row, col)| (row as f64).log10() as usize + (col.len() as f64).log10() as usize + 1) + .max() + .unwrap_or(1); + let mut indent_level = 1; + let mut did_visit_children = false; + let mut in_error = false; + loop { + if did_visit_children { + if cursor.goto_next_sibling() { + did_visit_children = false; + } else if cursor.goto_parent() { + did_visit_children = true; + indent_level -= 1; + if !cursor.node().has_error() { + in_error = false; + } + } else { + break; + } + } else { + cst_render_node( + opts, + cursor, + source_code, + out, + total_width, + indent_level, + in_error, + )?; + if cursor.goto_first_child() { + did_visit_children = false; + indent_level += 1; + if cursor.node().has_error() { + in_error = true; + } + } else { + did_visit_children = true; + } + } + } + cursor.reset(tree.root_node()); + Ok(()) +} + fn render_node_text(source: &str) -> String { source .chars() @@ -796,7 +805,7 @@ fn render_node_text(source: &str) -> String { fn write_node_text( opts: &ParseFileOptions, - stdout: &mut StdoutLock<'static>, + out: &mut impl Write, cursor: &TreeCursor, is_named: bool, source: &str, @@ -812,7 +821,7 @@ fn write_node_text( if !is_named { write!( - stdout, + out, "{}{}{}", paint(quote_color, &String::from(quote)), paint(color, &render_node_text(source)), @@ -838,7 +847,7 @@ fn write_node_text( let formatted_line = render_line_feed(line, opts); if !opts.no_ranges { write!( - stdout, + out, "{}{}{}{}{}{}", if multiline { "\n" } else { "" }, if multiline { @@ -857,7 +866,7 @@ fn write_node_text( )?; } else { write!( - stdout, + out, "\n{}{}{}{}", " ".repeat(indent_level + 1), paint(quote_color, &String::from(quote)), @@ -920,7 +929,7 @@ fn cst_render_node( opts: &ParseFileOptions, cursor: &mut TreeCursor, source_code: &[u8], - stdout: &mut StdoutLock<'static>, + out: &mut impl Write, total_width: usize, indent_level: usize, in_error: bool, @@ -929,13 +938,13 @@ fn cst_render_node( let is_named = node.is_named(); if !opts.no_ranges { write!( - stdout, + out, "{}", render_node_range(opts, cursor, is_named, false, total_width, node.range()) )?; } write!( - stdout, + out, "{}{}", " ".repeat(indent_level), if in_error && !node.has_error() { @@ -947,14 +956,14 @@ fn cst_render_node( if is_named { if let Some(field_name) = cursor.field_name() { write!( - stdout, + out, "{}", paint(opts.parse_theme.field, &format!("{field_name}: ")) )?; } if node.has_error() || node.is_error() { - write!(stdout, "{}", paint(opts.parse_theme.error, "•"))?; + write!(out, "{}", paint(opts.parse_theme.error, "•"))?; } let kind_color = if node.is_error() { @@ -964,13 +973,13 @@ fn cst_render_node( } else { opts.parse_theme.node_kind }; - write!(stdout, "{} ", paint(kind_color, node.kind()))?; + write!(out, "{} ", paint(kind_color, node.kind()))?; if node.child_count() == 0 { // Node text from a pattern or external scanner write_node_text( opts, - stdout, + out, cursor, is_named, &String::from_utf8_lossy(&source_code[node.start_byte()..node.end_byte()]), @@ -979,17 +988,13 @@ fn cst_render_node( )?; } } else if node.is_missing() { - write!(stdout, "{}: ", paint(opts.parse_theme.missing, "MISSING"))?; - write!( - stdout, - "\"{}\"", - paint(opts.parse_theme.missing, node.kind()) - )?; + write!(out, "{}: ", paint(opts.parse_theme.missing, "MISSING"))?; + write!(out, "\"{}\"", paint(opts.parse_theme.missing, node.kind()))?; } else { // Terminal literals, like "fn" write_node_text( opts, - stdout, + out, cursor, is_named, node.kind(), @@ -997,7 +1002,7 @@ fn cst_render_node( (total_width, indent_level), )?; } - writeln!(stdout)?; + writeln!(out)?; Ok(()) } diff --git a/crates/cli/src/test.rs b/crates/cli/src/test.rs index e128bc80..a0de1072 100644 --- a/crates/cli/src/test.rs +++ b/crates/cli/src/test.rs @@ -23,7 +23,9 @@ use tree_sitter::{format_sexp, Language, LogType, Parser, Query, Tree}; use walkdir::WalkDir; use super::util; -use crate::parse::Stats; +use crate::parse::{ + render_cst, ParseDebugType, ParseFileOptions, ParseOutput, ParseStats, ParseTheme, Stats, +}; static HEADER_REGEX: LazyLock = LazyLock::new(|| { ByteRegexBuilder::new( @@ -82,6 +84,7 @@ pub struct TestAttributes { pub platform: bool, pub fail_fast: bool, pub error: bool, + pub cst: bool, pub languages: Vec>, } @@ -102,6 +105,7 @@ impl Default for TestAttributes { platform: true, fail_fast: false, error: false, + cst: false, languages: vec!["".into()], } } @@ -246,22 +250,27 @@ pub fn run_tests_at_path(parser: &mut Parser, opts: &mut TestOptions) -> Result< if opts.color { print_diff_key(); } - for (i, (name, actual, expected)) in failures.iter().enumerate() { + for (i, (name, actual, expected, is_cst)) in failures.iter().enumerate() { if expected == "NO ERROR" { println!("\n {}. {name}:\n", i + 1); println!(" Expected an ERROR node, but got:"); - println!( - " {}", - paint( - opts.color.then_some(AnsiColor::Red), - &format_sexp(actual, 2) - ) - ); + let actual = if *is_cst { + actual + } else { + &format_sexp(actual, 2) + }; + println!(" {}", paint(opts.color.then_some(AnsiColor::Red), actual)); } else { println!("\n {}. {name}:", i + 1); - let actual = format_sexp(actual, 2); - let expected = format_sexp(expected, 2); - print_diff(&actual, &expected, opts.color); + if *is_cst { + print_diff(actual, expected, opts.color); + } else { + print_diff( + &format_sexp(actual, 2), + &format_sexp(expected, 2), + opts.color, + ); + } } } } @@ -348,6 +357,8 @@ pub fn paint(color: Option>, text: &str) -> String { format!("{style}{text}{style:#}") } +// TODO: Move the ridicululous tuple arguments into structs + /// This will return false if we want to "fail fast". It will bail and not parse any more tests. #[allow(clippy::too_many_arguments)] fn run_tests( @@ -355,7 +366,9 @@ fn run_tests( test_entry: TestEntry, opts: &mut TestOptions, mut indent_level: u32, - failures: &mut Vec<(String, String, String)>, + // (name, actual, expected, is_cst) + failures: &mut Vec<(String, String, String, bool)>, + // ???? corrected_entries: &mut Vec<(String, String, String, String, usize, usize)>, has_parse_errors: &mut bool, ) -> Result { @@ -431,7 +444,11 @@ fn run_tests( opts.stats.successful_parses += 1; if opts.update { let input = String::from_utf8(input.clone()).unwrap(); - let output = format_sexp(&output, 0); + let output = if attributes.cst { + output.clone() + } else { + format_sexp(&output, 0) + }; corrected_entries.push(( name.clone(), input, @@ -445,7 +462,11 @@ fn run_tests( if opts.update { let input = String::from_utf8(input.clone()).unwrap(); // Keep the original `expected` output if the actual output has no error - let output = format_sexp(&output, 0); + let output = if attributes.cst { + output.clone() + } else { + format_sexp(&output, 0) + }; corrected_entries.push(( name.clone(), input, @@ -461,10 +482,16 @@ fn run_tests( opts.test_num, paint(opts.color.then_some(AnsiColor::Red), &name), )?; + let actual = if attributes.cst { + render_test_cst(&input, &tree)? + } else { + tree.root_node().to_sexp() + }; failures.push(( name.clone(), - tree.root_node().to_sexp(), + actual, "NO ERROR".to_string(), + attributes.cst, )); } @@ -472,8 +499,12 @@ fn run_tests( return Ok(false); } } else { - let mut actual = tree.root_node().to_sexp(); - if !(opts.show_fields || has_fields) { + let mut actual = if attributes.cst { + render_test_cst(&input, &tree)? + } else { + tree.root_node().to_sexp() + }; + if !(attributes.cst || opts.show_fields || has_fields) { actual = strip_sexp_fields(&actual); } @@ -487,7 +518,11 @@ fn run_tests( opts.stats.successful_parses += 1; if opts.update { let input = String::from_utf8(input.clone()).unwrap(); - let output = format_sexp(&output, 0); + let output = if attributes.cst { + actual + } else { + format_sexp(&output, 0) + }; corrected_entries.push(( name.clone(), input, @@ -500,8 +535,11 @@ fn run_tests( } else { if opts.update { let input = String::from_utf8(input.clone()).unwrap(); - let expected_output = format_sexp(&output, 0); - let actual_output = format_sexp(&actual, 0); + let (expected_output, actual_output) = if attributes.cst { + (output.clone(), actual.clone()) + } else { + (format_sexp(&output, 0), format_sexp(&actual, 0)) + }; // Only bail early before updating if the actual is not the output, // sometimes users want to test cases that @@ -544,7 +582,7 @@ fn run_tests( paint(opts.color.then_some(AnsiColor::Red), &name), )?; } - failures.push((name.clone(), actual, output.clone())); + failures.push((name.clone(), actual, output.clone(), attributes.cst)); if attributes.fail_fast { return Ok(false); @@ -657,6 +695,28 @@ fn run_tests( Ok(true) } +/// Convenience wrapper to render a CST for a test entry. +fn render_test_cst(input: &[u8], tree: &Tree) -> Result { + let mut rendered_cst: Vec = Vec::new(); + let mut cursor = tree.walk(); + let opts = ParseFileOptions { + edits: &[], + output: ParseOutput::Cst, + stats: &mut ParseStats::default(), + print_time: false, + timeout: 0, + debug: ParseDebugType::Quiet, + debug_graph: false, + cancellation_flag: None, + encoding: None, + open_log: false, + no_ranges: false, + parse_theme: &ParseTheme::empty(), + }; + render_cst(input, tree, &mut cursor, &opts, &mut rendered_cst)?; + Ok(String::from_utf8_lossy(&rendered_cst).trim().to_string()) +} + // Parse time is interpreted in ns before converting to ms to avoid truncation issues // Parse rates often have several outliers, leading to a large standard deviation. Taking // the log of these rates serves to "flatten" out the distribution, yielding a more @@ -776,8 +836,8 @@ fn parse_test_content(name: String, content: &str, file_path: Option) - .name("suffix2") .map(|m| String::from_utf8_lossy(m.as_bytes())); - let (mut skip, mut platform, mut fail_fast, mut error, mut languages) = - (false, None, false, false, vec![]); + let (mut skip, mut platform, mut fail_fast, mut error, mut cst, mut languages) = + (false, None, false, false, false, vec![]); let test_name_and_markers = c .name("test_name_and_markers") @@ -818,6 +878,7 @@ fn parse_test_content(name: String, content: &str, file_path: Option) - languages.push(lang.into()); } } + ":cst" => (seen_marker, cst) = (true, true), _ if !seen_marker => { test_name.push_str(line); } @@ -858,6 +919,7 @@ fn parse_test_content(name: String, content: &str, file_path: Option) - platform: platform.unwrap_or(true), fail_fast, error, + cst, languages, }, )) @@ -910,16 +972,22 @@ fn parse_test_content(name: String, content: &str, file_path: Option) - input.pop(); } - // Remove all comments - let output = COMMENT_REGEX.replace_all(output, "").to_string(); + let (output, has_fields) = if prev_attributes.cst { + (output.trim().to_string(), false) + } else { + // Remove all comments + let output = COMMENT_REGEX.replace_all(output, "").to_string(); - // Normalize the whitespace in the expected output. - let output = WHITESPACE_REGEX.replace_all(output.trim(), " "); - let output = output.replace(" )", ")"); + // Normalize the whitespace in the expected output. + let output = WHITESPACE_REGEX.replace_all(output.trim(), " "); + let output = output.replace(" )", ")"); - // Identify if the expected output has fields indicated. If not, then - // fields will not be checked. - let has_fields = SEXP_FIELD_REGEX.is_match(&output); + // Identify if the expected output has fields indicated. If not, then + // fields will not be checked. + let has_fields = SEXP_FIELD_REGEX.is_match(&output); + + (output, has_fields) + }; let file_name = if let Some(ref path) = file_path { path.file_name().map(|n| n.to_string_lossy().to_string()) @@ -1493,6 +1561,7 @@ a platform: true, fail_fast: false, error: false, + cst: false, languages: vec!["".into()] }, file_name: None, @@ -1522,6 +1591,16 @@ Test with bad platform marker a --- (b) + +==================== +Test with cst marker +:cst +==================== +1 +--- +0:0 - 1:0 source_file +0:0 - 0:1 expression +0:0 - 0:1 number_literal `1` ", std::env::consts::OS, if std::env::consts::OS == "linux" { @@ -1552,6 +1631,7 @@ a platform: true, fail_fast: true, error: false, + cst: false, languages: vec!["".into()] }, file_name: None, @@ -1573,9 +1653,31 @@ a platform: false, fail_fast: false, error: false, + cst: false, languages: vec!["foo".into()] }, file_name: None, + }, + TestEntry::Example { + name: "Test with cst marker".to_string(), + input: b"1".to_vec(), + output: "0:0 - 1:0 source_file +0:0 - 0:1 expression +0:0 - 0:1 number_literal `1`" + .to_string(), + header_delim_len: 20, + divider_delim_len: 3, + has_fields: false, + attributes_str: ":cst".to_string(), + attributes: TestAttributes { + skip: false, + platform: true, + fail_fast: false, + error: false, + cst: true, + languages: vec!["".into()] + }, + file_name: None, } ] } diff --git a/docs/src/creating-parsers/5-writing-tests.md b/docs/src/creating-parsers/5-writing-tests.md index b1011968..7ed483b1 100644 --- a/docs/src/creating-parsers/5-writing-tests.md +++ b/docs/src/creating-parsers/5-writing-tests.md @@ -99,8 +99,8 @@ you can repeat the attribute on a new line. The following attributes are available: -* `:skip` — This attribute will skip the test when running `tree-sitter test`. - This is useful when you want to temporarily disable running a test without deleting it. +* `:cst` - This attribute specifies that the expected output should be in the form of a CST instead of the normal S-expression. This +CST matches the format given by `parse --cst`. * `:error` — This attribute will assert that the parse tree contains an error. It's useful to just validate that a certain input is invalid without displaying the whole parse tree, as such you should omit the parse tree below the `---` line. * `:fail-fast` — This attribute will stop the testing additional tests if the test marked with this attribute fails. @@ -109,6 +109,8 @@ multi-parser repos, such as XML and DTD, or Typescript and TSX. The default pars the `grammars` field in the `tree-sitter.json` config file, so having a way to pick a second or even third parser is useful. * `:platform(PLATFORM)` — This attribute specifies the platform on which the test should run. It is useful to test platform-specific behavior (e.g. Windows newlines are different from Unix). This attribute must match up with Rust's [`std::env::consts::OS`][constants]. +* `:skip` — This attribute will skip the test when running `tree-sitter test`. +This is useful when you want to temporarily disable running a test without deleting it. Examples using attributes: