feat(test): allow cst as expected output of test case

This commit is contained in:
Will Lillis 2025-07-20 20:18:33 -04:00
parent 21b38004da
commit 1704c604bf
3 changed files with 210 additions and 101 deletions

View file

@ -1,6 +1,6 @@
use std::{
fmt, fs,
io::{self, StdoutLock, Write},
io::{self, Write},
path::{Path, PathBuf},
sync::atomic::{AtomicUsize, Ordering},
time::{Duration, Instant},
@ -501,53 +501,7 @@ pub fn parse_file_at_path(
}
if opts.output == ParseOutput::Cst {
let lossy_source_code = String::from_utf8_lossy(&source_code);
let total_width = lossy_source_code
.lines()
.enumerate()
.map(|(row, col)| {
(row as f64).log10() as usize + (col.len() as f64).log10() as usize + 1
})
.max()
.unwrap_or(1);
let mut indent_level = 1;
let mut did_visit_children = false;
let mut in_error = false;
loop {
if did_visit_children {
if cursor.goto_next_sibling() {
did_visit_children = false;
} else if cursor.goto_parent() {
did_visit_children = true;
indent_level -= 1;
if !cursor.node().has_error() {
in_error = false;
}
} else {
break;
}
} else {
cst_render_node(
opts,
&mut cursor,
&source_code,
&mut stdout,
total_width,
indent_level,
in_error,
)?;
if cursor.goto_first_child() {
did_visit_children = false;
indent_level += 1;
if cursor.node().has_error() {
in_error = true;
}
} else {
did_visit_children = true;
}
}
}
cursor.reset(tree.root_node());
render_cst(&source_code, &tree, &mut cursor, opts, &mut stdout)?;
println!();
}
@ -781,6 +735,61 @@ const fn escape_invisible(c: char) -> Option<&'static str> {
})
}
pub fn render_cst<'a, 'b: 'a>(
source_code: &[u8],
tree: &'b Tree,
cursor: &mut TreeCursor<'a>,
opts: &ParseFileOptions,
out: &mut impl Write,
) -> Result<()> {
let lossy_source_code = String::from_utf8_lossy(source_code);
let total_width = lossy_source_code
.lines()
.enumerate()
.map(|(row, col)| (row as f64).log10() as usize + (col.len() as f64).log10() as usize + 1)
.max()
.unwrap_or(1);
let mut indent_level = 1;
let mut did_visit_children = false;
let mut in_error = false;
loop {
if did_visit_children {
if cursor.goto_next_sibling() {
did_visit_children = false;
} else if cursor.goto_parent() {
did_visit_children = true;
indent_level -= 1;
if !cursor.node().has_error() {
in_error = false;
}
} else {
break;
}
} else {
cst_render_node(
opts,
cursor,
source_code,
out,
total_width,
indent_level,
in_error,
)?;
if cursor.goto_first_child() {
did_visit_children = false;
indent_level += 1;
if cursor.node().has_error() {
in_error = true;
}
} else {
did_visit_children = true;
}
}
}
cursor.reset(tree.root_node());
Ok(())
}
fn render_node_text(source: &str) -> String {
source
.chars()
@ -796,7 +805,7 @@ fn render_node_text(source: &str) -> String {
fn write_node_text(
opts: &ParseFileOptions,
stdout: &mut StdoutLock<'static>,
out: &mut impl Write,
cursor: &TreeCursor,
is_named: bool,
source: &str,
@ -812,7 +821,7 @@ fn write_node_text(
if !is_named {
write!(
stdout,
out,
"{}{}{}",
paint(quote_color, &String::from(quote)),
paint(color, &render_node_text(source)),
@ -838,7 +847,7 @@ fn write_node_text(
let formatted_line = render_line_feed(line, opts);
if !opts.no_ranges {
write!(
stdout,
out,
"{}{}{}{}{}{}",
if multiline { "\n" } else { "" },
if multiline {
@ -857,7 +866,7 @@ fn write_node_text(
)?;
} else {
write!(
stdout,
out,
"\n{}{}{}{}",
" ".repeat(indent_level + 1),
paint(quote_color, &String::from(quote)),
@ -920,7 +929,7 @@ fn cst_render_node(
opts: &ParseFileOptions,
cursor: &mut TreeCursor,
source_code: &[u8],
stdout: &mut StdoutLock<'static>,
out: &mut impl Write,
total_width: usize,
indent_level: usize,
in_error: bool,
@ -929,13 +938,13 @@ fn cst_render_node(
let is_named = node.is_named();
if !opts.no_ranges {
write!(
stdout,
out,
"{}",
render_node_range(opts, cursor, is_named, false, total_width, node.range())
)?;
}
write!(
stdout,
out,
"{}{}",
" ".repeat(indent_level),
if in_error && !node.has_error() {
@ -947,14 +956,14 @@ fn cst_render_node(
if is_named {
if let Some(field_name) = cursor.field_name() {
write!(
stdout,
out,
"{}",
paint(opts.parse_theme.field, &format!("{field_name}: "))
)?;
}
if node.has_error() || node.is_error() {
write!(stdout, "{}", paint(opts.parse_theme.error, ""))?;
write!(out, "{}", paint(opts.parse_theme.error, ""))?;
}
let kind_color = if node.is_error() {
@ -964,13 +973,13 @@ fn cst_render_node(
} else {
opts.parse_theme.node_kind
};
write!(stdout, "{} ", paint(kind_color, node.kind()))?;
write!(out, "{} ", paint(kind_color, node.kind()))?;
if node.child_count() == 0 {
// Node text from a pattern or external scanner
write_node_text(
opts,
stdout,
out,
cursor,
is_named,
&String::from_utf8_lossy(&source_code[node.start_byte()..node.end_byte()]),
@ -979,17 +988,13 @@ fn cst_render_node(
)?;
}
} else if node.is_missing() {
write!(stdout, "{}: ", paint(opts.parse_theme.missing, "MISSING"))?;
write!(
stdout,
"\"{}\"",
paint(opts.parse_theme.missing, node.kind())
)?;
write!(out, "{}: ", paint(opts.parse_theme.missing, "MISSING"))?;
write!(out, "\"{}\"", paint(opts.parse_theme.missing, node.kind()))?;
} else {
// Terminal literals, like "fn"
write_node_text(
opts,
stdout,
out,
cursor,
is_named,
node.kind(),
@ -997,7 +1002,7 @@ fn cst_render_node(
(total_width, indent_level),
)?;
}
writeln!(stdout)?;
writeln!(out)?;
Ok(())
}

View file

@ -23,7 +23,9 @@ use tree_sitter::{format_sexp, Language, LogType, Parser, Query, Tree};
use walkdir::WalkDir;
use super::util;
use crate::parse::Stats;
use crate::parse::{
render_cst, ParseDebugType, ParseFileOptions, ParseOutput, ParseStats, ParseTheme, Stats,
};
static HEADER_REGEX: LazyLock<ByteRegex> = LazyLock::new(|| {
ByteRegexBuilder::new(
@ -82,6 +84,7 @@ pub struct TestAttributes {
pub platform: bool,
pub fail_fast: bool,
pub error: bool,
pub cst: bool,
pub languages: Vec<Box<str>>,
}
@ -102,6 +105,7 @@ impl Default for TestAttributes {
platform: true,
fail_fast: false,
error: false,
cst: false,
languages: vec!["".into()],
}
}
@ -246,22 +250,27 @@ pub fn run_tests_at_path(parser: &mut Parser, opts: &mut TestOptions) -> Result<
if opts.color {
print_diff_key();
}
for (i, (name, actual, expected)) in failures.iter().enumerate() {
for (i, (name, actual, expected, is_cst)) in failures.iter().enumerate() {
if expected == "NO ERROR" {
println!("\n {}. {name}:\n", i + 1);
println!(" Expected an ERROR node, but got:");
println!(
" {}",
paint(
opts.color.then_some(AnsiColor::Red),
&format_sexp(actual, 2)
)
);
let actual = if *is_cst {
actual
} else {
&format_sexp(actual, 2)
};
println!(" {}", paint(opts.color.then_some(AnsiColor::Red), actual));
} else {
println!("\n {}. {name}:", i + 1);
let actual = format_sexp(actual, 2);
let expected = format_sexp(expected, 2);
print_diff(&actual, &expected, opts.color);
if *is_cst {
print_diff(actual, expected, opts.color);
} else {
print_diff(
&format_sexp(actual, 2),
&format_sexp(expected, 2),
opts.color,
);
}
}
}
}
@ -348,6 +357,8 @@ pub fn paint(color: Option<impl Into<Color>>, text: &str) -> String {
format!("{style}{text}{style:#}")
}
// TODO: Move the ridicululous tuple arguments into structs
/// This will return false if we want to "fail fast". It will bail and not parse any more tests.
#[allow(clippy::too_many_arguments)]
fn run_tests(
@ -355,7 +366,9 @@ fn run_tests(
test_entry: TestEntry,
opts: &mut TestOptions,
mut indent_level: u32,
failures: &mut Vec<(String, String, String)>,
// (name, actual, expected, is_cst)
failures: &mut Vec<(String, String, String, bool)>,
// ????
corrected_entries: &mut Vec<(String, String, String, String, usize, usize)>,
has_parse_errors: &mut bool,
) -> Result<bool> {
@ -431,7 +444,11 @@ fn run_tests(
opts.stats.successful_parses += 1;
if opts.update {
let input = String::from_utf8(input.clone()).unwrap();
let output = format_sexp(&output, 0);
let output = if attributes.cst {
output.clone()
} else {
format_sexp(&output, 0)
};
corrected_entries.push((
name.clone(),
input,
@ -445,7 +462,11 @@ fn run_tests(
if opts.update {
let input = String::from_utf8(input.clone()).unwrap();
// Keep the original `expected` output if the actual output has no error
let output = format_sexp(&output, 0);
let output = if attributes.cst {
output.clone()
} else {
format_sexp(&output, 0)
};
corrected_entries.push((
name.clone(),
input,
@ -461,10 +482,16 @@ fn run_tests(
opts.test_num,
paint(opts.color.then_some(AnsiColor::Red), &name),
)?;
let actual = if attributes.cst {
render_test_cst(&input, &tree)?
} else {
tree.root_node().to_sexp()
};
failures.push((
name.clone(),
tree.root_node().to_sexp(),
actual,
"NO ERROR".to_string(),
attributes.cst,
));
}
@ -472,8 +499,12 @@ fn run_tests(
return Ok(false);
}
} else {
let mut actual = tree.root_node().to_sexp();
if !(opts.show_fields || has_fields) {
let mut actual = if attributes.cst {
render_test_cst(&input, &tree)?
} else {
tree.root_node().to_sexp()
};
if !(attributes.cst || opts.show_fields || has_fields) {
actual = strip_sexp_fields(&actual);
}
@ -487,7 +518,11 @@ fn run_tests(
opts.stats.successful_parses += 1;
if opts.update {
let input = String::from_utf8(input.clone()).unwrap();
let output = format_sexp(&output, 0);
let output = if attributes.cst {
actual
} else {
format_sexp(&output, 0)
};
corrected_entries.push((
name.clone(),
input,
@ -500,8 +535,11 @@ fn run_tests(
} else {
if opts.update {
let input = String::from_utf8(input.clone()).unwrap();
let expected_output = format_sexp(&output, 0);
let actual_output = format_sexp(&actual, 0);
let (expected_output, actual_output) = if attributes.cst {
(output.clone(), actual.clone())
} else {
(format_sexp(&output, 0), format_sexp(&actual, 0))
};
// Only bail early before updating if the actual is not the output,
// sometimes users want to test cases that
@ -544,7 +582,7 @@ fn run_tests(
paint(opts.color.then_some(AnsiColor::Red), &name),
)?;
}
failures.push((name.clone(), actual, output.clone()));
failures.push((name.clone(), actual, output.clone(), attributes.cst));
if attributes.fail_fast {
return Ok(false);
@ -657,6 +695,28 @@ fn run_tests(
Ok(true)
}
/// Convenience wrapper to render a CST for a test entry.
fn render_test_cst(input: &[u8], tree: &Tree) -> Result<String> {
let mut rendered_cst: Vec<u8> = Vec::new();
let mut cursor = tree.walk();
let opts = ParseFileOptions {
edits: &[],
output: ParseOutput::Cst,
stats: &mut ParseStats::default(),
print_time: false,
timeout: 0,
debug: ParseDebugType::Quiet,
debug_graph: false,
cancellation_flag: None,
encoding: None,
open_log: false,
no_ranges: false,
parse_theme: &ParseTheme::empty(),
};
render_cst(input, tree, &mut cursor, &opts, &mut rendered_cst)?;
Ok(String::from_utf8_lossy(&rendered_cst).trim().to_string())
}
// Parse time is interpreted in ns before converting to ms to avoid truncation issues
// Parse rates often have several outliers, leading to a large standard deviation. Taking
// the log of these rates serves to "flatten" out the distribution, yielding a more
@ -776,8 +836,8 @@ fn parse_test_content(name: String, content: &str, file_path: Option<PathBuf>) -
.name("suffix2")
.map(|m| String::from_utf8_lossy(m.as_bytes()));
let (mut skip, mut platform, mut fail_fast, mut error, mut languages) =
(false, None, false, false, vec![]);
let (mut skip, mut platform, mut fail_fast, mut error, mut cst, mut languages) =
(false, None, false, false, false, vec![]);
let test_name_and_markers = c
.name("test_name_and_markers")
@ -818,6 +878,7 @@ fn parse_test_content(name: String, content: &str, file_path: Option<PathBuf>) -
languages.push(lang.into());
}
}
":cst" => (seen_marker, cst) = (true, true),
_ if !seen_marker => {
test_name.push_str(line);
}
@ -858,6 +919,7 @@ fn parse_test_content(name: String, content: &str, file_path: Option<PathBuf>) -
platform: platform.unwrap_or(true),
fail_fast,
error,
cst,
languages,
},
))
@ -910,16 +972,22 @@ fn parse_test_content(name: String, content: &str, file_path: Option<PathBuf>) -
input.pop();
}
// Remove all comments
let output = COMMENT_REGEX.replace_all(output, "").to_string();
let (output, has_fields) = if prev_attributes.cst {
(output.trim().to_string(), false)
} else {
// Remove all comments
let output = COMMENT_REGEX.replace_all(output, "").to_string();
// Normalize the whitespace in the expected output.
let output = WHITESPACE_REGEX.replace_all(output.trim(), " ");
let output = output.replace(" )", ")");
// Normalize the whitespace in the expected output.
let output = WHITESPACE_REGEX.replace_all(output.trim(), " ");
let output = output.replace(" )", ")");
// Identify if the expected output has fields indicated. If not, then
// fields will not be checked.
let has_fields = SEXP_FIELD_REGEX.is_match(&output);
// Identify if the expected output has fields indicated. If not, then
// fields will not be checked.
let has_fields = SEXP_FIELD_REGEX.is_match(&output);
(output, has_fields)
};
let file_name = if let Some(ref path) = file_path {
path.file_name().map(|n| n.to_string_lossy().to_string())
@ -1493,6 +1561,7 @@ a
platform: true,
fail_fast: false,
error: false,
cst: false,
languages: vec!["".into()]
},
file_name: None,
@ -1522,6 +1591,16 @@ Test with bad platform marker
a
---
(b)
====================
Test with cst marker
:cst
====================
1
---
0:0 - 1:0 source_file
0:0 - 0:1 expression
0:0 - 0:1 number_literal `1`
",
std::env::consts::OS,
if std::env::consts::OS == "linux" {
@ -1552,6 +1631,7 @@ a
platform: true,
fail_fast: true,
error: false,
cst: false,
languages: vec!["".into()]
},
file_name: None,
@ -1573,9 +1653,31 @@ a
platform: false,
fail_fast: false,
error: false,
cst: false,
languages: vec!["foo".into()]
},
file_name: None,
},
TestEntry::Example {
name: "Test with cst marker".to_string(),
input: b"1".to_vec(),
output: "0:0 - 1:0 source_file
0:0 - 0:1 expression
0:0 - 0:1 number_literal `1`"
.to_string(),
header_delim_len: 20,
divider_delim_len: 3,
has_fields: false,
attributes_str: ":cst".to_string(),
attributes: TestAttributes {
skip: false,
platform: true,
fail_fast: false,
error: false,
cst: true,
languages: vec!["".into()]
},
file_name: None,
}
]
}

View file

@ -99,8 +99,8 @@ you can repeat the attribute on a new line.
The following attributes are available:
* `:skip` — This attribute will skip the test when running `tree-sitter test`.
This is useful when you want to temporarily disable running a test without deleting it.
* `:cst` - This attribute specifies that the expected output should be in the form of a CST instead of the normal S-expression. This
CST matches the format given by `parse --cst`.
* `:error` — This attribute will assert that the parse tree contains an error. It's useful to just validate that a certain
input is invalid without displaying the whole parse tree, as such you should omit the parse tree below the `---` line.
* `:fail-fast` — This attribute will stop the testing additional tests if the test marked with this attribute fails.
@ -109,6 +109,8 @@ multi-parser repos, such as XML and DTD, or Typescript and TSX. The default pars
the `grammars` field in the `tree-sitter.json` config file, so having a way to pick a second or even third parser is useful.
* `:platform(PLATFORM)` — This attribute specifies the platform on which the test should run. It is useful to test platform-specific
behavior (e.g. Windows newlines are different from Unix). This attribute must match up with Rust's [`std::env::consts::OS`][constants].
* `:skip` — This attribute will skip the test when running `tree-sitter test`.
This is useful when you want to temporarily disable running a test without deleting it.
Examples using attributes: