Reorganize rust crates into a flat crates directory, simplify some CI steps (#4496)

* Move all rust crates (except lib) into crates dir, w/o nesting

* Remove stale path from .gitattributes

* Rename lib.rs files for easier navigation

* Rename mod.rs file for easier navigation

* Fix emscripten-version path

* Fix fixtures dir paths

* Use the default rustfmt settings

* Don't use nightly on CI
This commit is contained in:
Max Brunsfeld 2025-06-06 14:25:37 -07:00 committed by GitHub
parent a6e530b33d
commit 0fdf569571
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
163 changed files with 69 additions and 89 deletions

View file

@ -0,0 +1,278 @@
use std::{
future::Future,
pin::{pin, Pin},
ptr,
task::{self, Context, Poll, RawWaker, RawWakerVTable, Waker},
};
use tree_sitter::Parser;
use super::helpers::fixtures::get_language;
#[test]
fn test_node_in_fut() {
let (ret, pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("bash");
parser.set_language(&language).unwrap();
let tree = parser.parse("#", None).unwrap();
let root = tree.root_node();
let root_ref = &root;
let fut_val_fn = || async {
yield_now().await;
root.child(0).unwrap().kind()
};
yield_now().await;
let fut_ref_fn = || async {
yield_now().await;
root_ref.child(0).unwrap().kind()
};
let f1 = fut_val_fn().await;
let f2 = fut_ref_fn().await;
assert_eq!(f1, f2);
let fut_val = async {
yield_now().await;
root.child(0).unwrap().kind()
};
let fut_ref = async {
yield_now().await;
root_ref.child(0).unwrap().kind()
};
let f1 = fut_val.await;
let f2 = fut_ref.await;
assert_eq!(f1, f2);
f1
})
.join();
assert_eq!(ret, "comment");
assert_eq!(pended, 5);
}
#[test]
fn test_node_and_cursor_ref_in_fut() {
let ((), pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("c");
parser.set_language(&language).unwrap();
let tree = parser.parse("#", None).unwrap();
let root = tree.root_node();
let root_ref = &root;
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
cursor_ref.goto_first_child();
let fut_val = async {
yield_now().await;
let _ = root.to_sexp();
};
yield_now().await;
let fut_ref = async {
yield_now().await;
let _ = root_ref.to_sexp();
cursor_ref.goto_first_child();
};
fut_val.await;
fut_ref.await;
cursor_ref.goto_first_child();
})
.join();
assert_eq!(pended, 3);
}
#[test]
fn test_node_and_cursor_ref_in_fut_with_fut_fabrics() {
let ((), pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("javascript");
parser.set_language(&language).unwrap();
let tree = parser.parse("#", None).unwrap();
let root = tree.root_node();
let root_ref = &root;
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
cursor_ref.goto_first_child();
let fut_val = || async {
yield_now().await;
let _ = root.to_sexp();
};
yield_now().await;
let fut_ref = || async move {
yield_now().await;
let _ = root_ref.to_sexp();
cursor_ref.goto_first_child();
};
fut_val().await;
fut_val().await;
fut_ref().await;
})
.join();
assert_eq!(pended, 4);
}
#[test]
fn test_node_and_cursor_ref_in_fut_with_inner_spawns() {
let (ret, pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("rust");
parser.set_language(&language).unwrap();
let tree = parser.parse("#", None).unwrap();
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
cursor_ref.goto_first_child();
let fut_val = || {
let tree = tree.clone();
async move {
let root = tree.root_node();
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
yield_now().await;
let _ = root.to_sexp();
cursor_ref.goto_first_child();
}
};
yield_now().await;
let fut_ref = || {
let tree = tree.clone();
async move {
let root = tree.root_node();
let root_ref = &root;
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
yield_now().await;
let _ = root_ref.to_sexp();
cursor_ref.goto_first_child();
}
};
let ((), p1) = tokio_like_spawn(fut_val()).await.unwrap();
let ((), p2) = tokio_like_spawn(fut_ref()).await.unwrap();
cursor_ref.goto_first_child();
fut_val().await;
fut_val().await;
fut_ref().await;
cursor_ref.goto_first_child();
p1 + p2
})
.join();
assert_eq!(pended, 4);
assert_eq!(ret, 2);
}
fn tokio_like_spawn<T>(future: T) -> JoinHandle<(T::Output, usize)>
where
T: Future + Send + 'static,
T::Output: Send + 'static,
{
// No runtime, just noop waker
let waker = noop_waker();
let mut cx = task::Context::from_waker(&waker);
let mut pending = 0;
let mut future = pin!(future);
let ret = loop {
match future.as_mut().poll(&mut cx) {
Poll::Pending => pending += 1,
Poll::Ready(r) => {
break r;
}
}
};
JoinHandle::new((ret, pending))
}
async fn yield_now() {
struct SimpleYieldNow {
yielded: bool,
}
impl Future for SimpleYieldNow {
type Output = ();
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> {
cx.waker().wake_by_ref();
if self.yielded {
return Poll::Ready(());
}
self.yielded = true;
Poll::Pending
}
}
SimpleYieldNow { yielded: false }.await;
}
pub const fn noop_waker() -> Waker {
const VTABLE: RawWakerVTable = RawWakerVTable::new(
// Cloning just returns a new no-op raw waker
|_| RAW,
// `wake` does nothing
|_| {},
// `wake_by_ref` does nothing
|_| {},
// Dropping does nothing as we don't allocate anything
|_| {},
);
const RAW: RawWaker = RawWaker::new(ptr::null(), &VTABLE);
unsafe { Waker::from_raw(RAW) }
}
struct JoinHandle<T> {
data: Option<T>,
}
impl<T> JoinHandle<T> {
#[must_use]
const fn new(data: T) -> Self {
Self { data: Some(data) }
}
fn join(&mut self) -> T {
self.data.take().unwrap()
}
}
impl<T: Unpin> Future for JoinHandle<T> {
type Output = std::result::Result<T, ()>;
fn poll(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Self::Output> {
let data = self.get_mut().data.take().unwrap();
Poll::Ready(Ok(data))
}
}

View file

@ -0,0 +1,439 @@
use std::{collections::HashMap, env, fs};
use tree_sitter::Parser;
use tree_sitter_proc_macro::test_with_seed;
use crate::{
fuzz::{
corpus_test::{
check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
},
edits::{get_random_edit, invert_edit},
flatten_tests, new_seed,
random::Rand,
EDIT_COUNT, EXAMPLE_EXCLUDE, EXAMPLE_INCLUDE, ITERATION_COUNT, LANGUAGE_FILTER,
LOG_GRAPH_ENABLED, START_SEED,
},
parse::perform_edit,
test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields},
tests::{
allocations,
helpers::fixtures::{fixtures_dir, get_language, get_test_language, SCRATCH_BASE_DIR},
},
};
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_bash_language(seed: usize) {
test_language_corpus(
"bash",
seed,
Some(&[
// Fragile tests where edit customization changes
// lead to significant parse tree structure changes.
"bash - corpus - commands - Nested Heredocs",
"bash - corpus - commands - Quoted Heredocs",
"bash - corpus - commands - Heredocs with weird characters",
]),
None,
);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_c_language(seed: usize) {
test_language_corpus("c", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_cpp_language(seed: usize) {
test_language_corpus("cpp", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_embedded_template_language(seed: usize) {
test_language_corpus("embedded-template", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_go_language(seed: usize) {
test_language_corpus("go", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_html_language(seed: usize) {
test_language_corpus("html", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_java_language(seed: usize) {
test_language_corpus(
"java",
seed,
Some(&["java - corpus - expressions - switch with unnamed pattern variable"]),
None,
);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_javascript_language(seed: usize) {
test_language_corpus("javascript", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_json_language(seed: usize) {
test_language_corpus("json", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_php_language(seed: usize) {
test_language_corpus("php", seed, None, Some("php"));
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_python_language(seed: usize) {
test_language_corpus("python", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_ruby_language(seed: usize) {
test_language_corpus("ruby", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_rust_language(seed: usize) {
test_language_corpus("rust", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_typescript_language(seed: usize) {
test_language_corpus("typescript", seed, None, Some("typescript"));
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_tsx_language(seed: usize) {
test_language_corpus("typescript", seed, None, Some("tsx"));
}
pub fn test_language_corpus(
language_name: &str,
start_seed: usize,
skipped: Option<&[&str]>,
language_dir: Option<&str>,
) {
if let Some(filter) = LANGUAGE_FILTER.as_ref() {
if language_name != filter {
return;
}
}
let language_dir = language_dir.unwrap_or_default();
let grammars_dir = fixtures_dir().join("grammars");
let error_corpus_dir = fixtures_dir().join("error_corpus");
let template_corpus_dir = fixtures_dir().join("template_corpus");
let corpus_dir = grammars_dir.join(language_name).join("test").join("corpus");
println!("Testing {language_name} corpus @ {}", corpus_dir.display());
let error_corpus_file = error_corpus_dir.join(format!("{language_name}_errors.txt"));
let template_corpus_file = template_corpus_dir.join(format!("{language_name}_templates.txt"));
let main_tests = parse_tests(&corpus_dir).unwrap();
let error_tests = parse_tests(&error_corpus_file).unwrap_or_default();
let template_tests = parse_tests(&template_corpus_file).unwrap_or_default();
let mut tests = flatten_tests(
main_tests,
EXAMPLE_INCLUDE.as_ref(),
EXAMPLE_EXCLUDE.as_ref(),
);
tests.extend(flatten_tests(
error_tests,
EXAMPLE_INCLUDE.as_ref(),
EXAMPLE_EXCLUDE.as_ref(),
));
tests.extend(
flatten_tests(
template_tests,
EXAMPLE_INCLUDE.as_ref(),
EXAMPLE_EXCLUDE.as_ref(),
)
.into_iter()
.map(|mut t| {
t.template_delimiters = Some(("<%", "%>"));
t
}),
);
tests.retain(|t| t.languages[0].is_empty() || t.languages.contains(&Box::from(language_dir)));
let mut skipped = skipped.map(|x| x.iter().map(|x| (*x, 0)).collect::<HashMap<&str, usize>>());
let language_path = if language_dir.is_empty() {
language_name.to_string()
} else {
format!("{language_name}/{language_dir}")
};
let language = get_language(&language_path);
let mut failure_count = 0;
let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
if log_seed {
println!(" start seed: {start_seed}");
}
println!();
for (test_index, test) in tests.iter().enumerate() {
let test_name = format!("{language_name} - {}", test.name);
if let Some(skipped) = skipped.as_mut() {
if let Some(counter) = skipped.get_mut(test_name.as_str()) {
println!(" {test_index}. {test_name} - SKIPPED");
*counter += 1;
continue;
}
}
println!(" {test_index}. {test_name}");
let passed = allocations::record(|| {
let mut log_session = None;
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(&language).unwrap();
set_included_ranges(&mut parser, &test.input, test.template_delimiters);
let tree = parser.parse(&test.input, None).unwrap();
let mut actual_output = tree.root_node().to_sexp();
if !test.has_fields {
actual_output = strip_sexp_fields(&actual_output);
}
if actual_output != test.output {
println!("Incorrect initial parse for {test_name}");
print_diff_key();
print_diff(&actual_output, &test.output, true);
println!();
return false;
}
true
})
.unwrap();
if !passed {
failure_count += 1;
continue;
}
let mut parser = Parser::new();
parser.set_language(&language).unwrap();
let tree = parser.parse(&test.input, None).unwrap();
drop(parser);
for trial in 0..*ITERATION_COUNT {
let seed = start_seed + trial;
let passed = allocations::record(|| {
let mut rand = Rand::new(seed);
let mut log_session = None;
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(&language).unwrap();
let mut tree = tree.clone();
let mut input = test.input.clone();
if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));
}
// Perform a random series of edits and reparse.
let mut undo_stack = Vec::new();
for _ in 0..=rand.unsigned(*EDIT_COUNT) {
let edit = get_random_edit(&mut rand, &input);
undo_stack.push(invert_edit(&input, &edit));
perform_edit(&mut tree, &mut input, &edit).unwrap();
}
if log_seed {
println!(" {test_index}.{trial:<2} seed: {seed}");
}
if dump_edits {
fs::write(
SCRATCH_BASE_DIR
.join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
&input,
)
.unwrap();
}
if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));
}
set_included_ranges(&mut parser, &input, test.template_delimiters);
let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
// Check that the new tree is consistent.
check_consistent_sizes(&tree2, &input);
if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
return false;
}
// Undo all of the edits and re-parse again.
while let Some(edit) = undo_stack.pop() {
perform_edit(&mut tree2, &mut input, &edit).unwrap();
}
if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));
}
set_included_ranges(&mut parser, &test.input, test.template_delimiters);
let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
// Verify that the final tree matches the expectation from the corpus.
let mut actual_output = tree3.root_node().to_sexp();
if !test.has_fields {
actual_output = strip_sexp_fields(&actual_output);
}
if actual_output != test.output {
println!("Incorrect parse for {test_name} - seed {seed}");
print_diff_key();
print_diff(&actual_output, &test.output, true);
println!();
return false;
}
// Check that the edited tree is consistent.
check_consistent_sizes(&tree3, &input);
if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
return false;
}
true
}).unwrap();
if !passed {
failure_count += 1;
break;
}
}
}
assert!(
failure_count == 0,
"{failure_count} {language_name} corpus tests failed"
);
if let Some(skipped) = skipped.as_mut() {
skipped.retain(|_, v| *v == 0);
if !skipped.is_empty() {
println!("Non matchable skip definitions:");
for k in skipped.keys() {
println!(" {k}");
}
panic!("Non matchable skip definitions needs to be removed");
}
}
}
#[test]
fn test_feature_corpus_files() {
let test_grammars_dir = fixtures_dir().join("test_grammars");
let mut failure_count = 0;
for entry in fs::read_dir(test_grammars_dir).unwrap() {
let entry = entry.unwrap();
if !entry.metadata().unwrap().is_dir() {
continue;
}
let language_name = entry.file_name();
let language_name = language_name.to_str().unwrap();
if let Some(filter) = LANGUAGE_FILTER.as_ref() {
if language_name != filter {
continue;
}
}
let test_path = entry.path();
let mut grammar_path = test_path.join("grammar.js");
if !grammar_path.exists() {
grammar_path = test_path.join("grammar.json");
}
let error_message_path = test_path.join("expected_error.txt");
let grammar_json = tree_sitter_generate::load_grammar_file(&grammar_path, None).unwrap();
let generate_result =
tree_sitter_generate::generate_parser_for_grammar(&grammar_json, Some((0, 0, 0)));
if error_message_path.exists() {
if EXAMPLE_INCLUDE.is_some() || EXAMPLE_EXCLUDE.is_some() {
continue;
}
eprintln!("test language: {language_name:?}");
let expected_message = fs::read_to_string(&error_message_path)
.unwrap()
.replace("\r\n", "\n");
if let Err(e) = generate_result {
let actual_message = e.to_string().replace("\r\n", "\n");
if expected_message != actual_message {
eprintln!(
"Unexpected error message.\n\nExpected:\n\n{expected_message}\nActual:\n\n{actual_message}\n",
);
failure_count += 1;
}
} else {
eprintln!("Expected error message but got none for test grammar '{language_name}'",);
failure_count += 1;
}
} else {
if let Err(e) = &generate_result {
eprintln!("Unexpected error for test grammar '{language_name}':\n{e}",);
failure_count += 1;
continue;
}
let corpus_path = test_path.join("corpus.txt");
let c_code = generate_result.unwrap().1;
let language = get_test_language(language_name, &c_code, Some(&test_path));
let test = parse_tests(&corpus_path).unwrap();
let tests = flatten_tests(test, EXAMPLE_INCLUDE.as_ref(), EXAMPLE_EXCLUDE.as_ref());
if !tests.is_empty() {
eprintln!("test language: {language_name:?}");
}
for test in tests {
eprintln!(" example: {:?}", test.name);
let passed = allocations::record(|| {
let mut log_session = None;
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(&language).unwrap();
let tree = parser.parse(&test.input, None).unwrap();
let mut actual_output = tree.root_node().to_sexp();
if !test.has_fields {
actual_output = strip_sexp_fields(&actual_output);
}
if actual_output == test.output {
true
} else {
print_diff_key();
print_diff(&actual_output, &test.output, true);
println!();
false
}
})
.unwrap();
if !passed {
failure_count += 1;
}
}
}
}
assert!(failure_count == 0, "{failure_count} corpus tests failed");
}

View file

@ -0,0 +1,254 @@
use std::{fs, path::Path};
use tree_sitter_loader::Loader;
use crate::tests::helpers::fixtures::scratch_dir;
#[test]
fn detect_language_by_first_line_regex() {
let strace_dir = tree_sitter_dir(
r#"{
"grammars": [
{
"name": "strace",
"path": ".",
"scope": "source.strace",
"file-types": [
"strace"
],
"first-line-regex": "[0-9:.]* *execve"
}
],
"metadata": {
"version": "0.0.1"
}
}
"#,
"strace",
);
let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf());
let config = loader
.find_language_configurations_at_path(strace_dir.path(), false)
.unwrap();
// this is just to validate that we can read the tree-sitter.json correctly
assert_eq!(config[0].scope.as_ref().unwrap(), "source.strace");
let file_name = strace_dir.path().join("strace.log");
fs::write(&file_name, "execve\nworld").unwrap();
assert_eq!(
get_lang_scope(&loader, &file_name),
Some("source.strace".into())
);
let file_name = strace_dir.path().join("strace.log");
fs::write(&file_name, "447845 execve\nworld").unwrap();
assert_eq!(
get_lang_scope(&loader, &file_name),
Some("source.strace".into())
);
let file_name = strace_dir.path().join("strace.log");
fs::write(&file_name, "hello\nexecve").unwrap();
assert!(get_lang_scope(&loader, &file_name).is_none());
let file_name = strace_dir.path().join("strace.log");
fs::write(&file_name, "").unwrap();
assert!(get_lang_scope(&loader, &file_name).is_none());
let dummy_dir = tree_sitter_dir(
r#"{
"grammars": [
{
"name": "dummy",
"scope": "source.dummy",
"path": ".",
"file-types": [
"dummy"
]
}
],
"metadata": {
"version": "0.0.1"
}
}
"#,
"dummy",
);
// file-type takes precedence over first-line-regex
loader
.find_language_configurations_at_path(dummy_dir.path(), false)
.unwrap();
let file_name = dummy_dir.path().join("strace.dummy");
fs::write(&file_name, "execve").unwrap();
assert_eq!(
get_lang_scope(&loader, &file_name),
Some("source.dummy".into())
);
}
#[test]
fn detect_langauge_by_double_barrel_file_extension() {
let blade_dir = tree_sitter_dir(
r#"{
"grammars": [
{
"name": "blade",
"path": ".",
"scope": "source.blade",
"file-types": [
"blade.php"
]
}
],
"metadata": {
"version": "0.0.1"
}
}
"#,
"blade",
);
let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf());
let config = loader
.find_language_configurations_at_path(blade_dir.path(), false)
.unwrap();
// this is just to validate that we can read the tree-sitter.json correctly
assert_eq!(config[0].scope.as_ref().unwrap(), "source.blade");
let file_name = blade_dir.path().join("foo.blade.php");
fs::write(&file_name, "").unwrap();
assert_eq!(
get_lang_scope(&loader, &file_name),
Some("source.blade".into())
);
}
#[test]
fn detect_language_without_filename() {
let gitignore_dir = tree_sitter_dir(
r#"{
"grammars": [
{
"name": "gitignore",
"path": ".",
"scope": "source.gitignore",
"file-types": [
".gitignore"
]
}
],
"metadata": {
"version": "0.0.1"
}
}
"#,
"gitignore",
);
let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf());
let config = loader
.find_language_configurations_at_path(gitignore_dir.path(), false)
.unwrap();
// this is just to validate that we can read the tree-sitter.json correctly
assert_eq!(config[0].scope.as_ref().unwrap(), "source.gitignore");
let file_name = gitignore_dir.path().join(".gitignore");
fs::write(&file_name, "").unwrap();
assert_eq!(
get_lang_scope(&loader, &file_name),
Some("source.gitignore".into())
);
}
#[test]
fn detect_language_without_file_extension() {
let ssh_config_dir = tree_sitter_dir(
r#"{
"grammars": [
{
"name": "ssh_config",
"path": ".",
"scope": "source.ssh_config",
"file-types": [
"ssh_config"
]
}
],
"metadata": {
"version": "0.0.1"
}
}
"#,
"ssh_config",
);
let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf());
let config = loader
.find_language_configurations_at_path(ssh_config_dir.path(), false)
.unwrap();
// this is just to validate that we can read the tree-sitter.json correctly
assert_eq!(config[0].scope.as_ref().unwrap(), "source.ssh_config");
let file_name = ssh_config_dir.path().join("ssh_config");
fs::write(&file_name, "").unwrap();
assert_eq!(
get_lang_scope(&loader, &file_name),
Some("source.ssh_config".into())
);
}
fn tree_sitter_dir(tree_sitter_json: &str, name: &str) -> tempfile::TempDir {
let temp_dir = tempfile::tempdir().unwrap();
fs::write(temp_dir.path().join("tree-sitter.json"), tree_sitter_json).unwrap();
fs::create_dir_all(temp_dir.path().join("src/tree_sitter")).unwrap();
fs::write(
temp_dir.path().join("src/grammar.json"),
format!(r#"{{"name":"{name}"}}"#),
)
.unwrap();
fs::write(
temp_dir.path().join("src/parser.c"),
format!(
r#"
#include "tree_sitter/parser.h"
#ifdef _WIN32
#define TS_PUBLIC __declspec(dllexport)
#else
#define TS_PUBLIC __attribute__((visibility("default")))
#endif
TS_PUBLIC const TSLanguage *tree_sitter_{name}() {{}}
"#
),
)
.unwrap();
fs::write(
temp_dir.path().join("src/tree_sitter/parser.h"),
include_str!("../../../../lib/src/parser.h"),
)
.unwrap();
temp_dir
}
// If we manage to get the language scope, it means we correctly detected the file-type
fn get_lang_scope(loader: &Loader, file_name: &Path) -> Option<String> {
loader
.language_configuration_for_file_name(file_name)
.ok()
.and_then(|config| {
if let Some((_, config)) = config {
config.scope.clone()
} else if let Ok(Some((_, config))) =
loader.language_configuration_for_first_line_regex(file_name)
{
config.scope.clone()
} else {
None
}
})
}

View file

@ -0,0 +1,4 @@
pub mod allocations;
pub mod edits;
pub(super) mod fixtures;
pub(super) mod query_helpers;

View file

@ -0,0 +1,121 @@
use std::{
collections::HashMap,
os::raw::c_void,
sync::{
atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst},
Mutex,
},
};
#[ctor::ctor]
unsafe fn initialize_allocation_recording() {
tree_sitter::set_allocator(
Some(ts_record_malloc),
Some(ts_record_calloc),
Some(ts_record_realloc),
Some(ts_record_free),
);
}
#[derive(Debug, PartialEq, Eq, Hash)]
struct Allocation(*const c_void);
unsafe impl Send for Allocation {}
unsafe impl Sync for Allocation {}
#[derive(Default)]
struct AllocationRecorder {
enabled: AtomicBool,
allocation_count: AtomicUsize,
outstanding_allocations: Mutex<HashMap<Allocation, usize>>,
}
thread_local! {
static RECORDER: AllocationRecorder = AllocationRecorder::default();
}
extern "C" {
fn malloc(size: usize) -> *mut c_void;
fn calloc(count: usize, size: usize) -> *mut c_void;
fn realloc(ptr: *mut c_void, size: usize) -> *mut c_void;
fn free(ptr: *mut c_void);
}
pub fn record<T>(f: impl FnOnce() -> T) -> T {
RECORDER.with(|recorder| {
recorder.enabled.store(true, SeqCst);
recorder.allocation_count.store(0, SeqCst);
recorder.outstanding_allocations.lock().unwrap().clear();
});
let value = f();
let outstanding_allocation_indices = RECORDER.with(|recorder| {
recorder.enabled.store(false, SeqCst);
recorder.allocation_count.store(0, SeqCst);
recorder
.outstanding_allocations
.lock()
.unwrap()
.drain()
.map(|e| e.1)
.collect::<Vec<_>>()
});
assert!(
outstanding_allocation_indices.is_empty(),
"Leaked allocation indices: {outstanding_allocation_indices:?}"
);
value
}
fn record_alloc(ptr: *mut c_void) {
RECORDER.with(|recorder| {
if recorder.enabled.load(SeqCst) {
let count = recorder.allocation_count.fetch_add(1, SeqCst);
recorder
.outstanding_allocations
.lock()
.unwrap()
.insert(Allocation(ptr), count);
}
});
}
fn record_dealloc(ptr: *mut c_void) {
RECORDER.with(|recorder| {
if recorder.enabled.load(SeqCst) {
recorder
.outstanding_allocations
.lock()
.unwrap()
.remove(&Allocation(ptr));
}
});
}
unsafe extern "C" fn ts_record_malloc(size: usize) -> *mut c_void {
let result = malloc(size);
record_alloc(result);
result
}
unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void {
let result = calloc(count, size);
record_alloc(result);
result
}
unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void {
let result = realloc(ptr, size);
if ptr.is_null() {
record_alloc(result);
} else if !core::ptr::eq(ptr, result) {
record_dealloc(ptr);
record_alloc(result);
}
result
}
unsafe extern "C" fn ts_record_free(ptr: *mut c_void) {
record_dealloc(ptr);
free(ptr);
}

View file

@ -0,0 +1,65 @@
pub static ROOT_DIR: LazyLock<PathBuf> = LazyLock::new(|| {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.parent()
.unwrap()
.to_owned()
});
pub static FIXTURES_DIR: LazyLock<PathBuf> =
LazyLock::new(|| ROOT_DIR.join("test").join("fixtures"));
pub static HEADER_DIR: LazyLock<PathBuf> = LazyLock::new(|| ROOT_DIR.join("lib").join("include"));
pub static GRAMMARS_DIR: LazyLock<PathBuf> =
LazyLock::new(|| ROOT_DIR.join("test").join("fixtures").join("grammars"));
pub static SCRATCH_BASE_DIR: LazyLock<PathBuf> = LazyLock::new(|| {
let result = ROOT_DIR.join("target").join("scratch");
fs::create_dir_all(&result).unwrap();
result
});
#[cfg(feature = "wasm")]
pub static WASM_DIR: LazyLock<PathBuf> = LazyLock::new(|| ROOT_DIR.join("target").join("release"));
pub static SCRATCH_DIR: LazyLock<PathBuf> = LazyLock::new(|| {
// https://doc.rust-lang.org/reference/conditional-compilation.html
let vendor = if cfg!(target_vendor = "apple") {
"apple"
} else if cfg!(target_vendor = "fortanix") {
"fortanix"
} else if cfg!(target_vendor = "pc") {
"pc"
} else {
"unknown"
};
let env = if cfg!(target_env = "gnu") {
"gnu"
} else if cfg!(target_env = "msvc") {
"msvc"
} else if cfg!(target_env = "musl") {
"musl"
} else if cfg!(target_env = "sgx") {
"sgx"
} else {
"unknown"
};
let endian = if cfg!(target_endian = "little") {
"little"
} else if cfg!(target_endian = "big") {
"big"
} else {
"unknown"
};
let machine = format!(
"{}-{}-{vendor}-{env}-{endian}",
std::env::consts::ARCH,
std::env::consts::OS
);
let result = SCRATCH_BASE_DIR.join(machine);
fs::create_dir_all(&result).unwrap();
result
});

View file

@ -0,0 +1,49 @@
use std::{ops::Range, str};
#[derive(Debug)]
pub struct ReadRecorder<'a> {
content: &'a [u8],
indices_read: Vec<usize>,
}
impl<'a> ReadRecorder<'a> {
#[must_use]
pub const fn new(content: &'a [u8]) -> Self {
Self {
content,
indices_read: Vec::new(),
}
}
pub fn read(&mut self, offset: usize) -> &'a [u8] {
if offset < self.content.len() {
if let Err(i) = self.indices_read.binary_search(&offset) {
self.indices_read.insert(i, offset);
}
&self.content[offset..(offset + 1)]
} else {
&[]
}
}
pub fn strings_read(&self) -> Vec<&'a str> {
let mut result = Vec::new();
let mut last_range = Option::<Range<usize>>::None;
for index in &self.indices_read {
if let Some(ref mut range) = &mut last_range {
if range.end == *index {
range.end += 1;
} else {
result.push(str::from_utf8(&self.content[range.clone()]).unwrap());
last_range = None;
}
} else {
last_range = Some(*index..(*index + 1));
}
}
if let Some(range) = last_range {
result.push(str::from_utf8(&self.content[range]).unwrap());
}
result
}
}

View file

@ -0,0 +1,140 @@
use std::{
env, fs,
path::{Path, PathBuf},
sync::LazyLock,
};
use anyhow::Context;
use tree_sitter::Language;
use tree_sitter_generate::{load_grammar_file, ALLOC_HEADER, ARRAY_HEADER};
use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_loader::{CompileConfig, Loader};
use tree_sitter_tags::TagsConfiguration;
use crate::tests::generate_parser;
include!("./dirs.rs");
static TEST_LOADER: LazyLock<Loader> = LazyLock::new(|| {
let mut loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone());
if env::var("TREE_SITTER_GRAMMAR_DEBUG").is_ok() {
loader.debug_build(true);
}
loader
});
pub fn test_loader() -> &'static Loader {
&TEST_LOADER
}
pub fn fixtures_dir() -> &'static Path {
&FIXTURES_DIR
}
pub fn scratch_dir() -> &'static Path {
&SCRATCH_DIR
}
pub fn get_language(name: &str) -> Language {
let src_dir = GRAMMARS_DIR.join(name).join("src");
let mut config = CompileConfig::new(&src_dir, None, None);
config.header_paths.push(&HEADER_DIR);
TEST_LOADER.load_language_at_path(config).unwrap()
}
pub fn get_test_fixture_language(name: &str) -> Language {
let grammar_dir_path = fixtures_dir().join("test_grammars").join(name);
let grammar_json = load_grammar_file(&grammar_dir_path.join("grammar.js"), None).unwrap();
let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap();
get_test_language(&parser_name, &parser_code, Some(&grammar_dir_path))
}
pub fn get_language_queries_path(language_name: &str) -> PathBuf {
GRAMMARS_DIR.join(language_name).join("queries")
}
pub fn get_highlight_config(
language_name: &str,
injection_query_filename: Option<&str>,
highlight_names: &[String],
) -> HighlightConfiguration {
let language = get_language(language_name);
let queries_path = get_language_queries_path(language_name);
let highlights_query = fs::read_to_string(queries_path.join("highlights.scm")).unwrap();
let injections_query =
injection_query_filename.map_or_else(String::new, |injection_query_filename| {
fs::read_to_string(queries_path.join(injection_query_filename)).unwrap()
});
let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or_default();
let mut result = HighlightConfiguration::new(
language,
language_name,
&highlights_query,
&injections_query,
&locals_query,
)
.unwrap();
result.configure(highlight_names);
result
}
pub fn get_tags_config(language_name: &str) -> TagsConfiguration {
let language = get_language(language_name);
let queries_path = get_language_queries_path(language_name);
let tags_query = fs::read_to_string(queries_path.join("tags.scm")).unwrap();
let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or_default();
TagsConfiguration::new(language, &tags_query, &locals_query).unwrap()
}
pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {
let src_dir = scratch_dir().join("src").join(name);
fs::create_dir_all(&src_dir).unwrap();
let parser_path = src_dir.join("parser.c");
if !fs::read_to_string(&parser_path).is_ok_and(|content| content == parser_code) {
fs::write(&parser_path, parser_code).unwrap();
}
let scanner_path = if let Some(path) = path {
let scanner_path = path.join("scanner.c");
if scanner_path.exists() {
let scanner_code = fs::read_to_string(&scanner_path).unwrap();
let scanner_copy_path = src_dir.join("scanner.c");
if !fs::read_to_string(&scanner_copy_path).is_ok_and(|content| content == scanner_code)
{
fs::write(&scanner_copy_path, scanner_code).unwrap();
}
Some(scanner_copy_path)
} else {
None
}
} else {
None
};
let header_path = src_dir.join("tree_sitter");
fs::create_dir_all(&header_path).unwrap();
for (file, content) in [
("alloc.h", ALLOC_HEADER),
("array.h", ARRAY_HEADER),
("parser.h", tree_sitter::PARSER_HEADER),
] {
let file = header_path.join(file);
fs::write(&file, content)
.with_context(|| format!("Failed to write {:?}", file.file_name().unwrap()))
.unwrap();
}
let paths_to_check = if let Some(scanner_path) = &scanner_path {
vec![parser_path, scanner_path.clone()]
} else {
vec![parser_path]
};
let mut config = CompileConfig::new(&src_dir, Some(&paths_to_check), None);
config.header_paths = vec![&HEADER_DIR];
config.name = name.to_string();
TEST_LOADER.load_language_at_path_with_name(config).unwrap()
}

View file

@ -0,0 +1,363 @@
use std::{cmp::Ordering, fmt::Write, ops::Range};
use rand::prelude::Rng;
use streaming_iterator::{IntoStreamingIterator, StreamingIterator};
use tree_sitter::{
Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryMatch, Tree, TreeCursor,
};
#[derive(Debug)]
pub struct Pattern {
kind: Option<&'static str>,
named: bool,
field: Option<&'static str>,
capture: Option<String>,
children: Vec<Pattern>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Match<'a, 'tree> {
pub captures: Vec<(&'a str, Node<'tree>)>,
pub last_node: Option<Node<'tree>>,
}
const CAPTURE_NAMES: &[&str] = &[
"one", "two", "three", "four", "five", "six", "seven", "eight",
];
impl Pattern {
pub fn random_pattern_in_tree(tree: &Tree, rng: &mut impl Rng) -> (Self, Range<Point>) {
let mut cursor = tree.walk();
// Descend to the node at a random byte offset and depth.
let mut max_depth = 0;
let byte_offset = rng.gen_range(0..cursor.node().end_byte());
while cursor.goto_first_child_for_byte(byte_offset).is_some() {
max_depth += 1;
}
let depth = rng.gen_range(0..=max_depth);
for _ in 0..depth {
cursor.goto_parent();
}
// Build a pattern that matches that node.
// Sometimes include subsequent siblings of the node.
let pattern_start = cursor.node().start_position();
let mut roots = vec![Self::random_pattern_for_node(&mut cursor, rng)];
while roots.len() < 5 && cursor.goto_next_sibling() {
if rng.gen_bool(0.2) {
roots.push(Self::random_pattern_for_node(&mut cursor, rng));
}
}
let pattern_end = cursor.node().end_position();
let mut pattern = Self {
kind: None,
named: true,
field: None,
capture: None,
children: roots,
};
if pattern.children.len() == 1 ||
// In a parenthesized list of sibling patterns, the first
// sibling can't be an anonymous `_` wildcard.
(pattern.children[0].kind == Some("_") && !pattern.children[0].named)
{
pattern = pattern.children.pop().unwrap();
}
// In a parenthesized list of sibling patterns, the first
// sibling can't have a field name.
else {
pattern.children[0].field = None;
}
(pattern, pattern_start..pattern_end)
}
fn random_pattern_for_node(cursor: &mut TreeCursor, rng: &mut impl Rng) -> Self {
let node = cursor.node();
// Sometimes specify the node's type, sometimes use a wildcard.
let (kind, named) = if rng.gen_bool(0.9) {
(Some(node.kind()), node.is_named())
} else {
(Some("_"), node.is_named() && rng.gen_bool(0.8))
};
// Sometimes specify the node's field.
let field = if rng.gen_bool(0.75) {
cursor.field_name()
} else {
None
};
// Sometimes capture the node.
let capture = if rng.gen_bool(0.7) {
Some(CAPTURE_NAMES[rng.gen_range(0..CAPTURE_NAMES.len())].to_string())
} else {
None
};
// Walk the children and include child patterns for some of them.
let mut children = Vec::new();
if named && cursor.goto_first_child() {
let max_children = rng.gen_range(0..4);
while cursor.goto_next_sibling() {
if rng.gen_bool(0.6) {
let child_ast = Self::random_pattern_for_node(cursor, rng);
children.push(child_ast);
if children.len() >= max_children {
break;
}
}
}
cursor.goto_parent();
}
Self {
kind,
named,
field,
capture,
children,
}
}
fn write_to_string(&self, string: &mut String, indent: usize) {
if let Some(field) = self.field {
write!(string, "{field}: ").unwrap();
}
if self.named {
string.push('(');
let mut has_contents = false;
if let Some(kind) = &self.kind {
write!(string, "{kind}").unwrap();
has_contents = true;
}
for child in &self.children {
let indent = indent + 2;
if has_contents {
string.push('\n');
string.push_str(&" ".repeat(indent));
}
child.write_to_string(string, indent);
has_contents = true;
}
string.push(')');
} else if self.kind == Some("_") {
string.push('_');
} else {
write!(string, "\"{}\"", self.kind.unwrap().replace('\"', "\\\"")).unwrap();
}
if let Some(capture) = &self.capture {
write!(string, " @{capture}").unwrap();
}
}
pub fn matches_in_tree<'tree>(&self, tree: &'tree Tree) -> Vec<Match<'_, 'tree>> {
let mut matches = Vec::new();
// Compute the matches naively: walk the tree and
// retry the entire pattern for each node.
let mut cursor = tree.walk();
let mut ascending = false;
loop {
if ascending {
if cursor.goto_next_sibling() {
ascending = false;
} else if !cursor.goto_parent() {
break;
}
} else {
let matches_here = self.match_node(&mut cursor);
matches.extend_from_slice(&matches_here);
if !cursor.goto_first_child() {
ascending = true;
}
}
}
matches.sort_unstable();
matches.iter_mut().for_each(|m| m.last_node = None);
matches.dedup();
matches
}
pub fn match_node<'tree>(&self, cursor: &mut TreeCursor<'tree>) -> Vec<Match<'_, 'tree>> {
let node = cursor.node();
// If a kind is specified, check that it matches the node.
if let Some(kind) = self.kind {
if kind == "_" {
if self.named && !node.is_named() {
return Vec::new();
}
} else if kind != node.kind() || self.named != node.is_named() {
return Vec::new();
}
}
// If a field is specified, check that it matches the node.
if let Some(field) = self.field {
if cursor.field_name() != Some(field) {
return Vec::new();
}
}
// Create a match for the current node.
let mat = Match {
captures: self
.capture
.as_ref()
.map_or_else(Vec::new, |name| vec![(name.as_str(), node)]),
last_node: Some(node),
};
// If there are no child patterns to match, then return this single match.
if self.children.is_empty() {
return vec![mat];
}
// Find every matching combination of child patterns and child nodes.
let mut finished_matches = Vec::<Match>::new();
if cursor.goto_first_child() {
let mut match_states = vec![(0, mat)];
loop {
let mut new_match_states = Vec::new();
for (pattern_index, mat) in &match_states {
let child_pattern = &self.children[*pattern_index];
let child_matches = child_pattern.match_node(cursor);
for child_match in child_matches {
let mut combined_match = mat.clone();
combined_match.last_node = child_match.last_node;
combined_match
.captures
.extend_from_slice(&child_match.captures);
if pattern_index + 1 < self.children.len() {
new_match_states.push((*pattern_index + 1, combined_match));
} else {
let mut existing = false;
for existing_match in &mut finished_matches {
if existing_match.captures == combined_match.captures {
if child_pattern.capture.is_some() {
existing_match.last_node = combined_match.last_node;
}
existing = true;
}
}
if !existing {
finished_matches.push(combined_match);
}
}
}
}
match_states.extend_from_slice(&new_match_states);
if !cursor.goto_next_sibling() {
break;
}
}
cursor.goto_parent();
}
finished_matches
}
}
impl std::fmt::Display for Pattern {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut result = String::new();
self.write_to_string(&mut result, 0);
write!(f, "{result}")
}
}
impl PartialOrd for Match<'_, '_> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for Match<'_, '_> {
// Tree-sitter returns matches in the order that they terminate
// during a depth-first walk of the tree. If multiple matches
// terminate on the same node, those matches are produced in the
// order that their captures were discovered.
fn cmp(&self, other: &Self) -> Ordering {
if let Some((last_node_a, last_node_b)) = self.last_node.zip(other.last_node) {
let cmp = compare_depth_first(last_node_a, last_node_b);
if cmp.is_ne() {
return cmp;
}
}
for (a, b) in self.captures.iter().zip(other.captures.iter()) {
let cmp = compare_depth_first(a.1, b.1);
if !cmp.is_eq() {
return cmp;
}
}
self.captures.len().cmp(&other.captures.len())
}
}
fn compare_depth_first(a: Node, b: Node) -> Ordering {
let a = a.byte_range();
let b = b.byte_range();
a.start.cmp(&b.start).then_with(|| b.end.cmp(&a.end))
}
pub fn assert_query_matches(
language: &Language,
query: &Query,
source: &str,
expected: &[(usize, Vec<(&str, &str)>)],
) {
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(query, tree.root_node(), source.as_bytes());
pretty_assertions::assert_eq!(expected, collect_matches(matches, query, source));
pretty_assertions::assert_eq!(false, cursor.did_exceed_match_limit());
}
pub fn collect_matches<'a>(
mut matches: impl StreamingIterator<Item = QueryMatch<'a, 'a>>,
query: &'a Query,
source: &'a str,
) -> Vec<(usize, Vec<(&'a str, &'a str)>)> {
let mut result = Vec::new();
while let Some(m) = matches.next() {
result.push((
m.pattern_index,
format_captures(m.captures.iter().into_streaming_iter_ref(), query, source),
));
}
result
}
pub fn collect_captures<'a>(
captures: impl StreamingIterator<Item = (QueryMatch<'a, 'a>, usize)>,
query: &'a Query,
source: &'a str,
) -> Vec<(&'a str, &'a str)> {
format_captures(captures.map(|(m, i)| m.captures[*i]), query, source)
}
fn format_captures<'a>(
mut captures: impl StreamingIterator<Item = QueryCapture<'a>>,
query: &'a Query,
source: &'a str,
) -> Vec<(&'a str, &'a str)> {
let mut result = Vec::new();
while let Some(capture) = captures.next() {
result.push((
query.capture_names()[capture.index as usize],
capture.node.utf8_text(source.as_bytes()).unwrap(),
));
}
result
}

View file

@ -0,0 +1,786 @@
use std::{
ffi::CString,
fs,
os::raw::c_char,
ptr, slice, str,
sync::{
atomic::{AtomicUsize, Ordering},
LazyLock,
},
};
use tree_sitter_highlight::{
c, Error, Highlight, HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer,
};
use super::helpers::fixtures::{get_highlight_config, get_language, get_language_queries_path};
static JS_HIGHLIGHT: LazyLock<HighlightConfiguration> =
LazyLock::new(|| get_highlight_config("javascript", Some("injections.scm"), &HIGHLIGHT_NAMES));
static JSDOC_HIGHLIGHT: LazyLock<HighlightConfiguration> =
LazyLock::new(|| get_highlight_config("jsdoc", None, &HIGHLIGHT_NAMES));
static HTML_HIGHLIGHT: LazyLock<HighlightConfiguration> =
LazyLock::new(|| get_highlight_config("html", Some("injections.scm"), &HIGHLIGHT_NAMES));
static EJS_HIGHLIGHT: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
get_highlight_config(
"embedded-template",
Some("injections-ejs.scm"),
&HIGHLIGHT_NAMES,
)
});
static RUST_HIGHLIGHT: LazyLock<HighlightConfiguration> =
LazyLock::new(|| get_highlight_config("rust", Some("injections.scm"), &HIGHLIGHT_NAMES));
static HIGHLIGHT_NAMES: LazyLock<Vec<String>> = LazyLock::new(|| {
[
"attribute",
"boolean",
"carriage-return",
"comment",
"constant",
"constant.builtin",
"constructor",
"embedded",
"function",
"function.builtin",
"keyword",
"module",
"number",
"operator",
"property",
"property.builtin",
"punctuation",
"punctuation.bracket",
"punctuation.delimiter",
"punctuation.special",
"string",
"string.special",
"tag",
"type",
"type.builtin",
"variable",
"variable.builtin",
"variable.parameter",
]
.iter()
.copied()
.map(String::from)
.collect()
});
static HTML_ATTRS: LazyLock<Vec<String>> = LazyLock::new(|| {
HIGHLIGHT_NAMES
.iter()
.map(|s| format!("class={s}"))
.collect()
});
#[test]
fn test_highlighting_javascript() {
let source = "const a = function(b) { return b + c; }";
assert_eq!(
&to_token_vector(source, &JS_HIGHLIGHT).unwrap(),
&[vec![
("const", vec!["keyword"]),
(" ", vec![]),
("a", vec!["function"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("function", vec!["keyword"]),
("(", vec!["punctuation.bracket"]),
("b", vec!["variable"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("{", vec!["punctuation.bracket"]),
(" ", vec![]),
("return", vec!["keyword"]),
(" ", vec![]),
("b", vec!["variable"]),
(" ", vec![]),
("+", vec!["operator"]),
(" ", vec![]),
("c", vec!["variable"]),
(";", vec!["punctuation.delimiter"]),
(" ", vec![]),
("}", vec!["punctuation.bracket"]),
]]
);
}
#[test]
fn test_highlighting_injected_html_in_javascript() {
let source = ["const s = html `<div>${a < b}</div>`;"].join("\n");
assert_eq!(
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
&[vec![
("const", vec!["keyword"]),
(" ", vec![]),
("s", vec!["variable"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("html", vec!["function"]),
(" ", vec![]),
("`", vec!["string"]),
("<", vec!["string", "punctuation.bracket"]),
("div", vec!["string", "tag"]),
(">", vec!["string", "punctuation.bracket"]),
("${", vec!["string", "embedded", "punctuation.special"]),
("a", vec!["string", "embedded", "variable"]),
(" ", vec!["string", "embedded"]),
("<", vec!["string", "embedded", "operator"]),
(" ", vec!["string", "embedded"]),
("b", vec!["string", "embedded", "variable"]),
("}", vec!["string", "embedded", "punctuation.special"]),
("</", vec!["string", "punctuation.bracket"]),
("div", vec!["string", "tag"]),
(">", vec!["string", "punctuation.bracket"]),
("`", vec!["string"]),
(";", vec!["punctuation.delimiter"]),
]]
);
}
#[test]
fn test_highlighting_injected_javascript_in_html_mini() {
let source = "<script>const x = new Thing();</script>";
assert_eq!(
&to_token_vector(source, &HTML_HIGHLIGHT).unwrap(),
&[vec![
("<", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
("const", vec!["keyword"]),
(" ", vec![]),
("x", vec!["variable"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("new", vec!["keyword"]),
(" ", vec![]),
("Thing", vec!["constructor"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
(";", vec!["punctuation.delimiter"]),
("</", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],]
);
}
#[test]
fn test_highlighting_injected_javascript_in_html() {
let source = [
"<body>",
" <script>",
" const x = new Thing();",
" </script>",
"</body>",
]
.join("\n");
assert_eq!(
&to_token_vector(&source, &HTML_HIGHLIGHT).unwrap(),
&[
vec![
("<", vec!["punctuation.bracket"]),
("body", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],
vec![
(" ", vec![]),
("<", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],
vec![
(" ", vec![]),
("const", vec!["keyword"]),
(" ", vec![]),
("x", vec!["variable"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("new", vec!["keyword"]),
(" ", vec![]),
("Thing", vec!["constructor"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
(";", vec!["punctuation.delimiter"]),
],
vec![
(" ", vec![]),
("</", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],
vec![
("</", vec!["punctuation.bracket"]),
("body", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],
]
);
}
#[test]
fn test_highlighting_multiline_nodes_to_html() {
let source = [
"const SOMETHING = `",
" one ${",
" two()",
" } three",
"`",
"",
]
.join("\n");
assert_eq!(
&to_html(&source, &JS_HIGHLIGHT).unwrap(),
&[
"<span class=keyword>const</span> <span class=constant>SOMETHING</span> <span class=operator>=</span> <span class=string>`</span>\n".to_string(),
"<span class=string> one <span class=embedded><span class=punctuation.special>${</span></span></span>\n".to_string(),
"<span class=string><span class=embedded> <span class=function>two</span><span class=punctuation.bracket>(</span><span class=punctuation.bracket>)</span></span></span>\n".to_string(),
"<span class=string><span class=embedded> <span class=punctuation.special>}</span></span> three</span>\n".to_string(),
"<span class=string>`</span>\n".to_string(),
]
);
}
#[test]
fn test_highlighting_with_local_variable_tracking() {
let source = [
"module.exports = function a(b) {",
" const module = c;",
" console.log(module, b);",
"}",
]
.join("\n");
assert_eq!(
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
&[
vec![
("module", vec!["variable.builtin"]),
(".", vec!["punctuation.delimiter"]),
("exports", vec!["function"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("function", vec!["keyword"]),
(" ", vec![]),
("a", vec!["function"]),
("(", vec!["punctuation.bracket"]),
("b", vec!["variable"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("{", vec!["punctuation.bracket"])
],
vec![
(" ", vec![]),
("const", vec!["keyword"]),
(" ", vec![]),
("module", vec!["variable"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("c", vec!["variable"]),
(";", vec!["punctuation.delimiter"])
],
vec![
(" ", vec![]),
("console", vec!["variable.builtin"]),
(".", vec!["punctuation.delimiter"]),
("log", vec!["function"]),
("(", vec!["punctuation.bracket"]),
// Not a builtin, because `module` was defined as a variable above.
("module", vec!["variable"]),
(",", vec!["punctuation.delimiter"]),
(" ", vec![]),
// A parameter, because `b` was defined as a parameter above.
("b", vec!["variable"]),
(")", vec!["punctuation.bracket"]),
(";", vec!["punctuation.delimiter"]),
],
vec![("}", vec!["punctuation.bracket"])]
],
);
}
#[test]
fn test_highlighting_empty_lines() {
let source = [
"class A {",
"",
" b(c) {",
"",
" d(e)",
"",
" }",
"",
"}",
]
.join("\n");
assert_eq!(
&to_html(&source, &JS_HIGHLIGHT).unwrap(),
&[
"<span class=keyword>class</span> <span class=constructor>A</span> <span class=punctuation.bracket>{</span>\n".to_string(),
"\n".to_string(),
" <span class=function>b</span><span class=punctuation.bracket>(</span><span class=variable>c</span><span class=punctuation.bracket>)</span> <span class=punctuation.bracket>{</span>\n".to_string(),
"\n".to_string(),
" <span class=function>d</span><span class=punctuation.bracket>(</span><span class=variable>e</span><span class=punctuation.bracket>)</span>\n".to_string(),
"\n".to_string(),
" <span class=punctuation.bracket>}</span>\n".to_string(),
"\n".to_string(),
"<span class=punctuation.bracket>}</span>\n".to_string(),
]
);
}
#[test]
fn test_highlighting_carriage_returns() {
let source = "a = \"a\rb\"\r\nb\r";
assert_eq!(
&to_html(source, &JS_HIGHLIGHT).unwrap(),
&[
"<span class=variable>a</span> <span class=operator>=</span> <span class=string>&quot;a<span class=carriage-return></span><span class=variable>b</span>&quot;</span>\n",
"<span class=variable>b</span><span class=carriage-return></span>\n",
],
);
}
#[test]
fn test_highlighting_ejs_with_html_and_javascript() {
let source = ["<div><% foo() %></div><script> bar() </script>"].join("\n");
assert_eq!(
&to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(),
&[[
("<", vec!["punctuation.bracket"]),
("div", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
("<%", vec!["keyword"]),
(" ", vec![]),
("foo", vec!["function"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("%>", vec!["keyword"]),
("</", vec!["punctuation.bracket"]),
("div", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
("<", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
(" ", vec![]),
("bar", vec!["function"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("</", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
]],
);
}
#[test]
fn test_highlighting_javascript_with_jsdoc() {
// Regression test: the middle comment has no highlights. This should not prevent
// later injections from highlighting properly.
let source = ["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n");
assert_eq!(
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
&[[
("a", vec!["variable"]),
(" ", vec![]),
("/* ", vec!["comment"]),
("@see", vec!["comment", "keyword"]),
(" a */", vec!["comment"]),
(" ", vec![]),
("b", vec!["variable"]),
(";", vec!["punctuation.delimiter"]),
(" ", vec![]),
("/* nothing */", vec!["comment"]),
(" ", vec![]),
("c", vec!["variable"]),
(";", vec!["punctuation.delimiter"]),
(" ", vec![]),
("/* ", vec!["comment"]),
("@see", vec!["comment", "keyword"]),
(" b */", vec!["comment"])
]],
);
}
#[test]
fn test_highlighting_with_content_children_included() {
let source = ["assert!(", " a.b.c() < D::e::<F>()", ");"].join("\n");
assert_eq!(
&to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(),
&[
vec![
("assert", vec!["function"]),
("!", vec!["function"]),
("(", vec!["punctuation.bracket"]),
],
vec![
(" a", vec![]),
(".", vec!["punctuation.delimiter"]),
("b", vec!["property"]),
(".", vec!["punctuation.delimiter"]),
("c", vec!["function"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
(" < ", vec![]),
("D", vec!["type"]),
("::", vec!["punctuation.delimiter"]),
("e", vec!["function"]),
("::", vec!["punctuation.delimiter"]),
("<", vec!["punctuation.bracket"]),
("F", vec!["type"]),
(">", vec!["punctuation.bracket"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
],
vec![
(")", vec!["punctuation.bracket"]),
(";", vec!["punctuation.delimiter"]),
]
],
);
}
#[test]
fn test_highlighting_cancellation() {
// An HTML document with a large injected JavaScript document:
let mut source = "<script>\n".to_string();
for _ in 0..500 {
source += "function a() { console.log('hi'); }\n";
}
source += "</script>\n";
// Cancel the highlighting before parsing the injected document.
let cancellation_flag = AtomicUsize::new(0);
let injection_callback = |name: &str| {
cancellation_flag.store(1, Ordering::SeqCst);
test_language_for_injection_string(name)
};
// The initial `highlight` call, which eagerly parses the outer document, should not fail.
let mut highlighter = Highlighter::new();
let events = highlighter
.highlight(
&HTML_HIGHLIGHT,
source.as_bytes(),
Some(&cancellation_flag),
injection_callback,
)
.unwrap();
// Iterating the scopes should not panic. It should return an error once the
// cancellation is detected.
for event in events {
if let Err(e) = event {
assert_eq!(e, Error::Cancelled);
return;
}
}
panic!("Expected an error while iterating highlighter");
}
#[test]
fn test_highlighting_via_c_api() {
let highlights = [
"class=tag\0",
"class=function\0",
"class=string\0",
"class=keyword\0",
];
let highlight_names = highlights
.iter()
.map(|h| h["class=".len()..].as_ptr().cast::<c_char>())
.collect::<Vec<_>>();
let highlight_attrs = highlights
.iter()
.map(|h| h.as_bytes().as_ptr().cast::<c_char>())
.collect::<Vec<_>>();
let highlighter = unsafe {
c::ts_highlighter_new(
std::ptr::addr_of!(highlight_names[0]),
std::ptr::addr_of!(highlight_attrs[0]),
highlights.len() as u32,
)
};
let source_code = c_string("<script>\nconst a = b('c');\nc.d();\n</script>");
let js_scope = c_string("source.js");
let js_injection_regex = c_string("^javascript");
let language = get_language("javascript");
let lang_name = c_string("javascript");
let queries = get_language_queries_path("javascript");
let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
let locals_query = fs::read_to_string(queries.join("locals.scm")).unwrap();
unsafe {
c::ts_highlighter_add_language(
highlighter,
lang_name.as_ptr(),
js_scope.as_ptr(),
js_injection_regex.as_ptr(),
language,
highlights_query.as_ptr().cast::<c_char>(),
injections_query.as_ptr().cast::<c_char>(),
locals_query.as_ptr().cast::<c_char>(),
highlights_query.len() as u32,
injections_query.len() as u32,
locals_query.len() as u32,
);
}
let html_scope = c_string("text.html.basic");
let html_injection_regex = c_string("^html");
let language = get_language("html");
let lang_name = c_string("html");
let queries = get_language_queries_path("html");
let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
unsafe {
c::ts_highlighter_add_language(
highlighter,
lang_name.as_ptr(),
html_scope.as_ptr(),
html_injection_regex.as_ptr(),
language,
highlights_query.as_ptr().cast::<c_char>(),
injections_query.as_ptr().cast::<c_char>(),
ptr::null(),
highlights_query.len() as u32,
injections_query.len() as u32,
0,
);
}
let buffer = c::ts_highlight_buffer_new();
unsafe {
c::ts_highlighter_highlight(
highlighter,
html_scope.as_ptr(),
source_code.as_ptr(),
source_code.as_bytes().len() as u32,
buffer,
ptr::null_mut(),
);
}
let output_bytes = unsafe { c::ts_highlight_buffer_content(buffer) };
let output_line_offsets = unsafe { c::ts_highlight_buffer_line_offsets(buffer) };
let output_len = unsafe { c::ts_highlight_buffer_len(buffer) };
let output_line_count = unsafe { c::ts_highlight_buffer_line_count(buffer) };
let output_bytes = unsafe { slice::from_raw_parts(output_bytes, output_len as usize) };
let output_line_offsets =
unsafe { slice::from_raw_parts(output_line_offsets, output_line_count as usize) };
let mut lines = Vec::new();
for i in 0..(output_line_count as usize) {
let line_start = output_line_offsets[i] as usize;
let line_end = output_line_offsets
.get(i + 1)
.map_or(output_bytes.len(), |x| *x as usize);
lines.push(str::from_utf8(&output_bytes[line_start..line_end]).unwrap());
}
assert_eq!(
lines,
vec![
"&lt;<span class=tag>script</span>&gt;\n",
"<span class=keyword>const</span> a = <span class=function>b</span>(<span class=string>&#39;c&#39;</span>);\n",
"c.<span class=function>d</span>();\n",
"&lt;/<span class=tag>script</span>&gt;\n",
]
);
unsafe {
c::ts_highlighter_delete(highlighter);
c::ts_highlight_buffer_delete(buffer);
}
}
#[test]
fn test_highlighting_with_all_captures_applied() {
let source = "fn main(a: u32, b: u32) -> { let c = a + b; }";
let language = get_language("rust");
let highlights_query = indoc::indoc! {"
[
\"fn\"
\"let\"
] @keyword
(identifier) @variable
(function_item name: (identifier) @function)
(parameter pattern: (identifier) @variable.parameter)
(primitive_type) @type.builtin
\"=\" @operator
[ \"->\" \":\" \";\" ] @punctuation.delimiter
[ \"{\" \"}\" \"(\" \")\" ] @punctuation.bracket
"};
let mut rust_highlight_reverse =
HighlightConfiguration::new(language, "rust", highlights_query, "", "").unwrap();
rust_highlight_reverse.configure(&HIGHLIGHT_NAMES);
assert_eq!(
&to_token_vector(source, &rust_highlight_reverse).unwrap(),
&[[
("fn", vec!["keyword"]),
(" ", vec![]),
("main", vec!["function"]),
("(", vec!["punctuation.bracket"]),
("a", vec!["variable.parameter"]),
(":", vec!["punctuation.delimiter"]),
(" ", vec![]),
("u32", vec!["type.builtin"]),
(", ", vec![]),
("b", vec!["variable.parameter"]),
(":", vec!["punctuation.delimiter"]),
(" ", vec![]),
("u32", vec!["type.builtin"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("->", vec!["punctuation.delimiter"]),
(" ", vec![]),
("{", vec!["punctuation.bracket"]),
(" ", vec![]),
("let", vec!["keyword"]),
(" ", vec![]),
("c", vec!["variable"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("a", vec!["variable"]),
(" + ", vec![]),
("b", vec!["variable"]),
(";", vec!["punctuation.delimiter"]),
(" ", vec![]),
("}", vec!["punctuation.bracket"])
]],
);
}
#[test]
fn test_decode_utf8_lossy() {
use tree_sitter::LossyUtf8;
let parts = LossyUtf8::new(b"hi").collect::<Vec<_>>();
assert_eq!(parts, vec!["hi"]);
let parts = LossyUtf8::new(b"hi\xc0\xc1bye").collect::<Vec<_>>();
assert_eq!(parts, vec!["hi", "\u{fffd}", "\u{fffd}", "bye"]);
let parts = LossyUtf8::new(b"\xc0\xc1bye").collect::<Vec<_>>();
assert_eq!(parts, vec!["\u{fffd}", "\u{fffd}", "bye"]);
let parts = LossyUtf8::new(b"hello\xc0\xc1").collect::<Vec<_>>();
assert_eq!(parts, vec!["hello", "\u{fffd}", "\u{fffd}"]);
}
fn c_string(s: &str) -> CString {
CString::new(s.as_bytes().to_vec()).unwrap()
}
fn test_language_for_injection_string<'a>(string: &str) -> Option<&'a HighlightConfiguration> {
match string {
"javascript" => Some(&JS_HIGHLIGHT),
"html" => Some(&HTML_HIGHLIGHT),
"rust" => Some(&RUST_HIGHLIGHT),
"jsdoc" => Some(&JSDOC_HIGHLIGHT),
_ => None,
}
}
fn to_html<'a>(
src: &'a str,
language_config: &'a HighlightConfiguration,
) -> Result<Vec<String>, Error> {
let src = src.as_bytes();
let mut renderer = HtmlRenderer::new();
let mut highlighter = Highlighter::new();
let events = highlighter.highlight(
language_config,
src,
None,
&test_language_for_injection_string,
)?;
renderer.set_carriage_return_highlight(
HIGHLIGHT_NAMES
.iter()
.position(|s| s == "carriage-return")
.map(Highlight),
);
renderer
.render(events, src, &|highlight, output| {
output.extend(HTML_ATTRS[highlight.0].as_bytes());
})
.unwrap();
Ok(renderer
.lines()
.map(std::string::ToString::to_string)
.collect())
}
#[allow(clippy::type_complexity)]
fn to_token_vector<'a>(
src: &'a str,
language_config: &'a HighlightConfiguration,
) -> Result<Vec<Vec<(&'a str, Vec<&'static str>)>>, Error> {
let src = src.as_bytes();
let mut highlighter = Highlighter::new();
let mut lines = Vec::new();
let mut highlights = Vec::new();
let mut line = Vec::new();
let events = highlighter.highlight(
language_config,
src,
None,
&test_language_for_injection_string,
)?;
for event in events {
match event? {
HighlightEvent::HighlightStart(s) => highlights.push(HIGHLIGHT_NAMES[s.0].as_str()),
HighlightEvent::HighlightEnd => {
highlights.pop();
}
HighlightEvent::Source { start, end } => {
let s = str::from_utf8(&src[start..end]).unwrap();
for (i, l) in s.split('\n').enumerate() {
let l = l.trim_end_matches('\r');
if i > 0 {
lines.push(std::mem::take(&mut line));
}
if !l.is_empty() {
line.push((l, highlights.clone()));
}
}
}
}
}
if !line.is_empty() {
lines.push(line);
}
Ok(lines)
}

View file

@ -0,0 +1,199 @@
use tree_sitter::{self, Parser};
use super::helpers::fixtures::get_language;
#[test]
fn test_lookahead_iterator() {
let mut parser = Parser::new();
let language = get_language("rust");
parser.set_language(&language).unwrap();
let tree = parser.parse("struct Stuff {}", None).unwrap();
let mut cursor = tree.walk();
assert!(cursor.goto_first_child()); // struct
assert!(cursor.goto_first_child()); // struct keyword
let next_state = cursor.node().next_parse_state();
assert_ne!(next_state, 0);
assert_eq!(
next_state,
language.next_state(cursor.node().parse_state(), cursor.node().grammar_id())
);
assert!((next_state as usize) < language.parse_state_count());
assert!(cursor.goto_next_sibling()); // type_identifier
assert_eq!(next_state, cursor.node().parse_state());
assert_eq!(cursor.node().grammar_name(), "identifier");
assert_ne!(cursor.node().grammar_id(), cursor.node().kind_id());
let expected_symbols = ["//", "/*", "identifier", "line_comment", "block_comment"];
let mut lookahead = language.lookahead_iterator(next_state).unwrap();
assert_eq!(*lookahead.language(), language);
assert!(lookahead.iter_names().eq(expected_symbols));
lookahead.reset_state(next_state);
assert!(lookahead.iter_names().eq(expected_symbols));
lookahead.reset(&language, next_state);
assert!(lookahead
.map(|s| language.node_kind_for_id(s).unwrap())
.eq(expected_symbols));
}
#[test]
fn test_lookahead_iterator_modifiable_only_by_mut() {
let mut parser = Parser::new();
let language = get_language("rust");
parser.set_language(&language).unwrap();
let tree = parser.parse("struct Stuff {}", None).unwrap();
let mut cursor = tree.walk();
assert!(cursor.goto_first_child()); // struct
assert!(cursor.goto_first_child()); // struct keyword
let next_state = cursor.node().next_parse_state();
assert_ne!(next_state, 0);
let mut lookahead = language.lookahead_iterator(next_state).unwrap();
let _ = lookahead.next();
let mut names = lookahead.iter_names();
let _ = names.next();
}
#[test]
fn test_symbol_metadata_checks() {
let language = get_language("rust");
for i in 0..language.node_kind_count() {
let sym = i as u16;
let name = language.node_kind_for_id(sym).unwrap();
match name {
"_type"
| "_expression"
| "_pattern"
| "_literal"
| "_literal_pattern"
| "_declaration_statement" => assert!(language.node_kind_is_supertype(sym)),
"_raw_string_literal_start"
| "_raw_string_literal_end"
| "_line_doc_comment"
| "_error_sentinel" => assert!(!language.node_kind_is_supertype(sym)),
"enum_item" | "struct_item" | "type_item" => {
assert!(language.node_kind_is_named(sym));
}
"=>" | "[" | "]" | "(" | ")" | "{" | "}" => {
assert!(language.node_kind_is_visible(sym));
}
_ => {}
}
}
}
#[test]
fn test_supertypes() {
let language = get_language("rust");
let supertypes = language.supertypes();
if language.abi_version() < 15 {
return;
}
assert_eq!(supertypes.len(), 5);
assert_eq!(
supertypes
.iter()
.filter_map(|&s| language.node_kind_for_id(s))
.map(|s| s.to_string())
.collect::<Vec<String>>(),
vec![
"_expression",
"_literal",
"_literal_pattern",
"_pattern",
"_type"
]
);
for &supertype in supertypes {
let mut subtypes = language
.subtypes_for_supertype(supertype)
.iter()
.filter_map(|symbol| language.node_kind_for_id(*symbol))
.collect::<Vec<&str>>();
subtypes.sort_unstable();
subtypes.dedup();
match language.node_kind_for_id(supertype) {
Some("_literal") => {
assert_eq!(
subtypes,
&[
"boolean_literal",
"char_literal",
"float_literal",
"integer_literal",
"raw_string_literal",
"string_literal"
]
);
}
Some("_pattern") => {
assert_eq!(
subtypes,
&[
"_",
"_literal_pattern",
"captured_pattern",
"const_block",
"generic_pattern",
"identifier",
"macro_invocation",
"mut_pattern",
"or_pattern",
"range_pattern",
"ref_pattern",
"reference_pattern",
"remaining_field_pattern",
"scoped_identifier",
"slice_pattern",
"struct_pattern",
"tuple_pattern",
"tuple_struct_pattern",
]
);
}
Some("_type") => {
assert_eq!(
subtypes,
&[
"abstract_type",
"array_type",
"bounded_type",
"dynamic_type",
"function_type",
"generic_type",
"macro_invocation",
"metavariable",
"never_type",
"pointer_type",
"primitive_type",
"reference_type",
"removed_trait_bound",
"scoped_type_identifier",
"tuple_type",
"type_identifier",
"unit_type"
]
);
}
_ => {}
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,104 @@
// For some reasons `Command::spawn` doesn't work in CI env for many exotic arches.
#![cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))]
use std::{
env::VarError,
process::{Command, Stdio},
};
use tree_sitter::Parser;
use tree_sitter_generate::load_grammar_file;
use super::generate_parser;
use crate::tests::helpers::fixtures::{fixtures_dir, get_test_language};
// The `sanitizing` cfg is required to don't run tests under specific sunitizer
// because they don't work well with subprocesses _(it's an assumption)_.
//
// Below are two alternative examples of how to disable tests for some arches
// if a way with excluding the whole mod from compilation wouldn't work well.
//
// XXX: Also may be it makes sense to keep such tests as ignored by default
// to omit surprises and enable them on CI by passing an extra option explicitly:
//
// > cargo test -- --include-ignored
//
// #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))]
// #[cfg_attr(not(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing))), ignore)]
//
#[test]
fn test_grammar_that_should_hang_and_not_segfault() {
let parent_sleep_millis = 1000;
let test_name = "test_grammar_that_should_hang_and_not_segfault";
let test_var = "CARGO_HANG_TEST";
eprintln!(" {test_name}");
let tests_exec_path = std::env::args()
.next()
.expect("Failed to get tests executable path");
match std::env::var(test_var) {
Ok(v) if v == test_name => {
eprintln!(" child process id {}", std::process::id());
hang_test();
}
Err(VarError::NotPresent) => {
eprintln!(" parent process id {}", std::process::id());
let mut command = Command::new(tests_exec_path);
command.arg(test_name).env(test_var, test_name);
if std::env::args().any(|x| x == "--nocapture") {
command.arg("--nocapture");
} else {
command.stdout(Stdio::null()).stderr(Stdio::null());
}
match command.spawn() {
Ok(mut child) => {
std::thread::sleep(std::time::Duration::from_millis(parent_sleep_millis));
match child.try_wait() {
Ok(Some(status)) if status.success() => {
panic!("Child didn't hang and exited successfully")
}
Ok(Some(status)) => panic!(
"Child didn't hang and exited with status code: {:?}",
status.code()
),
_ => (),
}
if let Err(e) = child.kill() {
eprintln!(
"Failed to kill hang test's process id: {}, error: {e}",
child.id()
);
}
}
Err(e) => panic!("{e}"),
}
}
Err(e) => panic!("Env var error: {e}"),
_ => unreachable!(),
}
}
fn hang_test() {
let test_grammar_dir = fixtures_dir()
.join("test_grammars")
.join("get_col_should_hang_not_crash");
let grammar_json = load_grammar_file(&test_grammar_dir.join("grammar.js"), None).unwrap();
let (parser_name, parser_code) = generate_parser(grammar_json.as_str()).unwrap();
let language = get_test_language(&parser_name, &parser_code, Some(test_grammar_dir.as_path()));
let mut parser = Parser::new();
parser.set_language(&language).unwrap();
let code_that_should_hang = "\nHello";
parser.parse(code_that_should_hang, None).unwrap();
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,15 @@
use tree_sitter::Parser;
use super::helpers::{allocations, fixtures::get_language};
#[test]
fn test_pathological_example_1() {
let language = "cpp";
let source = r#"*ss<s"ss<sqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<qssqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<sqss<sqss<s._<s<sq>(qqX<sqss<s.ss<sqsssq<(qss<sq&=ss<s<sqss<s._<s<sq<(qqX<sqss<s.ss<sqs"#;
allocations::record(|| {
let mut parser = Parser::new();
parser.set_language(&get_language(language)).unwrap();
parser.parse(source, None).unwrap();
});
}

View file

@ -0,0 +1,18 @@
[package]
name = "tree-sitter-tests-proc-macro"
version = "0.0.0"
edition.workspace = true
rust-version.workspace = true
publish = false
[lints]
workspace = true
[lib]
proc-macro = true
[dependencies]
proc-macro2 = "1.0.93"
quote = "1.0.38"
rand = "0.8.5"
syn = { version = "2.0.96", features = ["full"] }

View file

@ -0,0 +1,135 @@
use proc_macro::TokenStream;
use proc_macro2::Span;
use quote::quote;
use syn::{
parse::{Parse, ParseStream},
parse_macro_input, Error, Expr, Ident, ItemFn, LitInt, Token,
};
#[proc_macro_attribute]
pub fn retry(args: TokenStream, input: TokenStream) -> TokenStream {
let count = parse_macro_input!(args as LitInt);
let input = parse_macro_input!(input as ItemFn);
let attrs = &input.attrs;
let name = &input.sig.ident;
TokenStream::from(quote! {
#(#attrs),*
fn #name() {
#input
for i in 0..=#count {
let result = std::panic::catch_unwind(|| {
#name();
});
if result.is_ok() {
return;
}
if i == #count {
std::panic::resume_unwind(result.unwrap_err());
}
}
}
})
}
#[proc_macro_attribute]
pub fn test_with_seed(args: TokenStream, input: TokenStream) -> TokenStream {
struct Args {
retry: LitInt,
seed: Expr,
seed_fn: Option<Ident>,
}
impl Parse for Args {
fn parse(input: ParseStream) -> syn::Result<Self> {
let mut retry = None;
let mut seed = None;
let mut seed_fn = None;
while !input.is_empty() {
let name = input.parse::<Ident>()?;
match name.to_string().as_str() {
"retry" => {
input.parse::<Token![=]>()?;
retry.replace(input.parse()?);
}
"seed" => {
input.parse::<Token![=]>()?;
seed.replace(input.parse()?);
}
"seed_fn" => {
input.parse::<Token![=]>()?;
seed_fn.replace(input.parse()?);
}
x => {
return Err(Error::new(
name.span(),
format!("Unsupported parameter `{x}`"),
))
}
}
if !input.is_empty() {
input.parse::<Token![,]>()?;
}
}
if retry.is_none() {
retry.replace(LitInt::new("0", Span::mixed_site()));
}
Ok(Self {
retry: retry.expect("`retry` parameter is required"),
seed: seed.expect("`seed` parameter is required"),
seed_fn,
})
}
}
let Args {
retry,
seed,
seed_fn,
} = parse_macro_input!(args as Args);
let seed_fn = seed_fn.iter();
let func = parse_macro_input!(input as ItemFn);
let attrs = &func.attrs;
let name = &func.sig.ident;
TokenStream::from(quote! {
#[test]
#(#attrs),*
fn #name() {
#func
let mut seed = #seed;
for i in 0..=#retry {
let result = std::panic::catch_unwind(|| {
#name(seed);
});
if result.is_ok() {
return;
}
if i == #retry {
std::panic::resume_unwind(result.unwrap_err());
}
#(
seed = #seed_fn();
)*
if i < #retry {
println!("\nRetry {}/{} with a new seed {}", i + 1, #retry, seed);
}
}
}
})
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,448 @@
use std::{
ffi::{CStr, CString},
fs, ptr, slice, str,
};
use tree_sitter::Point;
use tree_sitter_tags::{c_lib as c, Error, TagsConfiguration, TagsContext};
use super::helpers::{
allocations,
fixtures::{get_language, get_language_queries_path},
};
const PYTHON_TAG_QUERY: &str = r#"
(
(function_definition
name: (identifier) @name
body: (block . (expression_statement (string) @doc))) @definition.function
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
)
(function_definition
name: (identifier) @name) @definition.function
(
(class_definition
name: (identifier) @name
body: (block
. (expression_statement (string) @doc))) @definition.class
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
)
(class_definition
name: (identifier) @name) @definition.class
(call
function: (identifier) @name) @reference.call
(call
function: (attribute
attribute: (identifier) @name)) @reference.call
"#;
const JS_TAG_QUERY: &str = r#"
(
(comment)* @doc .
(class_declaration
name: (identifier) @name) @definition.class
(#select-adjacent! @doc @definition.class)
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
)
(
(comment)* @doc .
(method_definition
name: (property_identifier) @name) @definition.method
(#select-adjacent! @doc @definition.method)
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
)
(
(comment)* @doc .
(function_declaration
name: (identifier) @name) @definition.function
(#select-adjacent! @doc @definition.function)
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
)
(call_expression
function: (identifier) @name) @reference.call
"#;
const RUBY_TAG_QUERY: &str = r"
(method
name: (_) @name) @definition.method
(call
method: (identifier) @name) @reference.call
(setter (identifier) @ignore)
((identifier) @name @reference.call
(#is-not? local))
";
#[test]
fn test_tags_python() {
let language = get_language("python");
let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
let mut tag_context = TagsContext::new();
let source = br#"
class Customer:
"""
Data about a customer
"""
def age(self):
'''
Get the customer's age
'''
compute_age(self.id)
}
"#;
let tags = tag_context
.generate_tags(&tags_config, source, None)
.unwrap()
.0
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
tags.iter()
.map(|t| (
substr(source, &t.name_range),
tags_config.syntax_type_name(t.syntax_type_id)
))
.collect::<Vec<_>>(),
&[
("Customer", "class"),
("age", "function"),
("compute_age", "call"),
]
);
assert_eq!(substr(source, &tags[0].line_range), "class Customer:");
assert_eq!(substr(source, &tags[1].line_range), "def age(self):");
assert_eq!(tags[0].docs.as_ref().unwrap(), "Data about a customer");
assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");
}
#[test]
fn test_tags_javascript() {
let language = get_language("javascript");
let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap();
let source = br"
// hi
// Data about a customer.
// bla bla bla
class Customer {
/*
* Get the customer's age
*/
getAge() {
}
}
// ok
class Agent {
}
";
let mut tag_context = TagsContext::new();
let tags = tag_context
.generate_tags(&tags_config, source, None)
.unwrap()
.0
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
tags.iter()
.map(|t| (
substr(source, &t.name_range),
t.span.clone(),
tags_config.syntax_type_name(t.syntax_type_id)
))
.collect::<Vec<_>>(),
&[
("Customer", Point::new(5, 10)..Point::new(5, 18), "class",),
("getAge", Point::new(9, 8)..Point::new(9, 14), "method",),
("Agent", Point::new(15, 10)..Point::new(15, 15), "class",)
]
);
assert_eq!(
tags[0].docs.as_ref().unwrap(),
"Data about a customer.\nbla bla bla"
);
assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");
assert_eq!(tags[2].docs, None);
}
#[test]
fn test_tags_columns_measured_in_utf16_code_units() {
let language = get_language("python");
let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
let mut tag_context = TagsContext::new();
let source = r#""❤️❤️❤️".hello_α_ω()"#.as_bytes();
let tag = tag_context
.generate_tags(&tags_config, source, None)
.unwrap()
.0
.next()
.unwrap()
.unwrap();
assert_eq!(substr(source, &tag.name_range), "hello_α");
assert_eq!(tag.span, Point::new(0, 21)..Point::new(0, 32));
assert_eq!(tag.utf16_column_range, 9..18);
}
#[test]
fn test_tags_ruby() {
let language = get_language("ruby");
let locals_query =
fs::read_to_string(get_language_queries_path("ruby").join("locals.scm")).unwrap();
let tags_config = TagsConfiguration::new(language, RUBY_TAG_QUERY, &locals_query).unwrap();
let source = strip_whitespace(
8,
"
b = 1
def foo=()
c = 1
# a is a method because it is not in scope
# b is a method because `b` doesn't capture variables from its containing scope
bar a, b, c
[1, 2, 3].each do |a|
# a is a parameter
# b is a method
# c is a variable, because the block captures variables from its containing scope.
baz a, b, c
end
end",
);
let mut tag_context = TagsContext::new();
let tags = tag_context
.generate_tags(&tags_config, source.as_bytes(), None)
.unwrap()
.0
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
tags.iter()
.map(|t| (
substr(source.as_bytes(), &t.name_range),
tags_config.syntax_type_name(t.syntax_type_id),
(t.span.start.row, t.span.start.column),
))
.collect::<Vec<_>>(),
&[
("foo=", "method", (2, 4)),
("bar", "call", (7, 4)),
("a", "call", (7, 8)),
("b", "call", (7, 11)),
("each", "call", (9, 14)),
("baz", "call", (13, 8)),
("b", "call", (13, 15),),
]
);
}
#[test]
fn test_tags_cancellation() {
use std::sync::atomic::{AtomicUsize, Ordering};
allocations::record(|| {
// Large javascript document
let source = (0..500)
.map(|_| "/* hi */ class A { /* ok */ b() {} }\n")
.collect::<String>();
let cancellation_flag = AtomicUsize::new(0);
let language = get_language("javascript");
let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap();
let mut tag_context = TagsContext::new();
let tags = tag_context
.generate_tags(&tags_config, source.as_bytes(), Some(&cancellation_flag))
.unwrap();
for (i, tag) in tags.0.enumerate() {
if i == 150 {
cancellation_flag.store(1, Ordering::SeqCst);
}
if let Err(e) = tag {
assert_eq!(e, Error::Cancelled);
return;
}
}
panic!("Expected to halt tagging with an error");
});
}
#[test]
fn test_invalid_capture() {
let language = get_language("python");
let e = TagsConfiguration::new(language, "(identifier) @method", "")
.expect_err("expected InvalidCapture error");
assert_eq!(e, Error::InvalidCapture("method".to_string()));
}
#[test]
fn test_tags_with_parse_error() {
let language = get_language("python");
let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
let mut tag_context = TagsContext::new();
let source = br"
class Fine: pass
class Bad
";
let (tags, failed) = tag_context
.generate_tags(&tags_config, source, None)
.unwrap();
let newtags = tags.collect::<Result<Vec<_>, _>>().unwrap();
assert!(failed, "syntax error should have been detected");
assert_eq!(
newtags
.iter()
.map(|t| (
substr(source, &t.name_range),
tags_config.syntax_type_name(t.syntax_type_id)
))
.collect::<Vec<_>>(),
&[("Fine", "class"),]
);
}
#[test]
fn test_tags_via_c_api() {
allocations::record(|| {
let tagger = c::ts_tagger_new();
let buffer = c::ts_tags_buffer_new();
let scope_name = "source.js";
let language = get_language("javascript");
let source_code = strip_whitespace(
12,
"
var a = 1;
// one
// two
// three
function b() {
}
// four
// five
class C extends D {
}
b(a);",
);
let c_scope_name = CString::new(scope_name).unwrap();
let result = unsafe {
c::ts_tagger_add_language(
tagger,
c_scope_name.as_ptr(),
language,
JS_TAG_QUERY.as_ptr(),
ptr::null(),
JS_TAG_QUERY.len() as u32,
0,
)
};
assert_eq!(result, c::TSTagsError::Ok);
let result = unsafe {
c::ts_tagger_tag(
tagger,
c_scope_name.as_ptr(),
source_code.as_ptr(),
source_code.len() as u32,
buffer,
ptr::null(),
)
};
assert_eq!(result, c::TSTagsError::Ok);
let tags = unsafe {
slice::from_raw_parts(
c::ts_tags_buffer_tags(buffer),
c::ts_tags_buffer_tags_len(buffer) as usize,
)
};
let docs = str::from_utf8(unsafe {
slice::from_raw_parts(
c::ts_tags_buffer_docs(buffer).cast::<u8>(),
c::ts_tags_buffer_docs_len(buffer) as usize,
)
})
.unwrap();
let syntax_types = unsafe {
let mut len = 0;
let ptr = c::ts_tagger_syntax_kinds_for_scope_name(
tagger,
c_scope_name.as_ptr(),
&raw mut len,
);
slice::from_raw_parts(ptr, len as usize)
.iter()
.map(|i| CStr::from_ptr(*i).to_str().unwrap())
.collect::<Vec<_>>()
};
assert_eq!(
tags.iter()
.map(|tag| (
syntax_types[tag.syntax_type_id as usize],
&source_code[tag.name_start_byte as usize..tag.name_end_byte as usize],
&source_code[tag.line_start_byte as usize..tag.line_end_byte as usize],
&docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize],
))
.collect::<Vec<_>>(),
&[
("function", "b", "function b() {", "one\ntwo\nthree"),
("class", "C", "class C extends D {", "four\nfive"),
("call", "b", "b(a);", "")
]
);
unsafe {
c::ts_tags_buffer_delete(buffer);
c::ts_tagger_delete(tagger);
}
});
}
fn substr<'a>(source: &'a [u8], range: &std::ops::Range<usize>) -> &'a str {
std::str::from_utf8(&source[range.clone()]).unwrap()
}
fn strip_whitespace(indent: usize, s: &str) -> String {
s.lines()
.skip(1)
.map(|line| &line[line.len().min(indent)..])
.collect::<Vec<_>>()
.join("\n")
}

View file

@ -0,0 +1,70 @@
use tree_sitter::Parser;
use tree_sitter_highlight::{Highlight, Highlighter};
use super::helpers::fixtures::{get_highlight_config, get_language, test_loader};
use crate::{
query_testing::{parse_position_comments, Assertion, Utf8Point},
test_highlight::get_highlight_positions,
};
#[test]
fn test_highlight_test_with_basic_test() {
let language = get_language("javascript");
let config = get_highlight_config(
"javascript",
Some("injections.scm"),
&[
"function".to_string(),
"variable".to_string(),
"keyword".to_string(),
],
);
let source = [
"// hi",
"var abc = function(d) {",
" // ^ function",
" // ^^^ keyword",
" return d + e;",
" // ^ variable",
" // ^ !variable",
"};",
"var y̆y̆y̆y̆ = function() {}",
" // ^ function",
" // ^ keyword",
]
.join("\n");
let assertions =
parse_position_comments(&mut Parser::new(), &language, source.as_bytes()).unwrap();
assert_eq!(
assertions,
&[
Assertion::new(1, 5, 1, false, String::from("function")),
Assertion::new(1, 11, 3, false, String::from("keyword")),
Assertion::new(4, 9, 1, false, String::from("variable")),
Assertion::new(4, 11, 1, true, String::from("variable")),
Assertion::new(8, 5, 1, false, String::from("function")),
Assertion::new(8, 11, 1, false, String::from("keyword")),
]
);
let mut highlighter = Highlighter::new();
let highlight_positions =
get_highlight_positions(test_loader(), &mut highlighter, &config, source.as_bytes())
.unwrap();
assert_eq!(
highlight_positions,
&[
(Utf8Point::new(1, 0), Utf8Point::new(1, 3), Highlight(2)), // "var"
(Utf8Point::new(1, 4), Utf8Point::new(1, 7), Highlight(0)), // "abc"
(Utf8Point::new(1, 10), Utf8Point::new(1, 18), Highlight(2)), // "function"
(Utf8Point::new(1, 19), Utf8Point::new(1, 20), Highlight(1)), // "d"
(Utf8Point::new(4, 2), Utf8Point::new(4, 8), Highlight(2)), // "return"
(Utf8Point::new(4, 9), Utf8Point::new(4, 10), Highlight(1)), // "d"
(Utf8Point::new(4, 13), Utf8Point::new(4, 14), Highlight(1)), // "e"
(Utf8Point::new(8, 0), Utf8Point::new(8, 3), Highlight(2)), // "var"
(Utf8Point::new(8, 4), Utf8Point::new(8, 8), Highlight(0)), // "y̆y̆y̆y̆"
(Utf8Point::new(8, 11), Utf8Point::new(8, 19), Highlight(2)), // "function"
]
);
}

View file

@ -0,0 +1,62 @@
use tree_sitter::Parser;
use tree_sitter_tags::TagsContext;
use super::helpers::fixtures::{get_language, get_tags_config};
use crate::{
query_testing::{parse_position_comments, Assertion, Utf8Point},
test_tags::get_tag_positions,
};
#[test]
fn test_tags_test_with_basic_test() {
let language = get_language("python");
let config = get_tags_config("python");
let source = [
"# hi",
"def abc(d):",
" # <- definition.function",
" e = fgh(d)",
" # ^ reference.call",
" return d(e)",
" # ^ reference.call",
" # ^ !variable.parameter",
"",
]
.join("\n");
let assertions =
parse_position_comments(&mut Parser::new(), &language, source.as_bytes()).unwrap();
assert_eq!(
assertions,
&[
Assertion::new(1, 4, 1, false, String::from("definition.function")),
Assertion::new(3, 9, 1, false, String::from("reference.call")),
Assertion::new(5, 11, 1, false, String::from("reference.call")),
Assertion::new(5, 13, 1, true, String::from("variable.parameter")),
]
);
let mut tags_context = TagsContext::new();
let tag_positions = get_tag_positions(&mut tags_context, &config, source.as_bytes()).unwrap();
assert_eq!(
tag_positions,
&[
(
Utf8Point::new(1, 4),
Utf8Point::new(1, 7),
"definition.function".to_string()
),
(
Utf8Point::new(3, 8),
Utf8Point::new(3, 11),
"reference.call".to_string()
),
(
Utf8Point::new(5, 11),
Utf8Point::new(5, 12),
"reference.call".to_string()
),
]
);
}

View file

@ -0,0 +1,174 @@
use std::{iter, sync::Arc};
use streaming_iterator::StreamingIterator;
use tree_sitter::{Language, Node, Parser, Point, Query, QueryCursor, TextProvider, Tree};
use crate::tests::helpers::fixtures::get_language;
fn parse_text(text: impl AsRef<[u8]>) -> (Tree, Language) {
let language = get_language("c");
let mut parser = Parser::new();
parser.set_language(&language).unwrap();
(parser.parse(text, None).unwrap(), language)
}
fn parse_text_with<T, F>(callback: &mut F) -> (Tree, Language)
where
T: AsRef<[u8]>,
F: FnMut(usize, Point) -> T,
{
let language = get_language("c");
let mut parser = Parser::new();
parser.set_language(&language).unwrap();
let tree = parser.parse_with_options(callback, None, None).unwrap();
// eprintln!("{}", tree.clone().root_node().to_sexp());
assert_eq!("comment", tree.root_node().child(0).unwrap().kind());
(tree, language)
}
fn tree_query<I: AsRef<[u8]>>(tree: &Tree, text: impl TextProvider<I>, language: &Language) {
let query = Query::new(language, "((comment) @c (#eq? @c \"// comment\"))").unwrap();
let mut cursor = QueryCursor::new();
let mut captures = cursor.captures(&query, tree.root_node(), text);
let (match_, idx) = captures.next().unwrap();
let capture = match_.captures[*idx];
assert_eq!(capture.index as usize, *idx);
assert_eq!("comment", capture.node.kind());
}
fn check_parsing<I: AsRef<[u8]>>(
parser_text: impl AsRef<[u8]>,
text_provider: impl TextProvider<I>,
) {
let (tree, language) = parse_text(parser_text);
tree_query(&tree, text_provider, &language);
}
fn check_parsing_callback<T, F, I: AsRef<[u8]>>(
parser_callback: &mut F,
text_provider: impl TextProvider<I>,
) where
T: AsRef<[u8]>,
F: FnMut(usize, Point) -> T,
{
let (tree, language) = parse_text_with(parser_callback);
tree_query(&tree, text_provider, &language);
}
#[test]
fn test_text_provider_for_str_slice() {
let text: &str = "// comment";
check_parsing(text, text.as_bytes());
check_parsing(text.as_bytes(), text.as_bytes());
}
#[test]
fn test_text_provider_for_string() {
let text: String = "// comment".to_owned();
check_parsing(text.clone(), text.as_bytes());
check_parsing(text.as_bytes(), text.as_bytes());
check_parsing(<_ as AsRef<[u8]>>::as_ref(&text), text.as_bytes());
}
#[test]
fn test_text_provider_for_box_of_str_slice() {
let text = "// comment".to_owned().into_boxed_str();
check_parsing(text.as_bytes(), text.as_bytes());
check_parsing(<_ as AsRef<str>>::as_ref(&text), text.as_bytes());
check_parsing(text.as_ref(), text.as_ref().as_bytes());
check_parsing(text.as_ref(), text.as_bytes());
}
#[test]
fn test_text_provider_for_box_of_bytes_slice() {
let text = "// comment".to_owned().into_boxed_str().into_boxed_bytes();
check_parsing(text.as_ref(), text.as_ref());
check_parsing(text.as_ref(), &*text);
check_parsing(&*text, &*text);
}
#[test]
fn test_text_provider_for_vec_of_bytes() {
let text = "// comment".to_owned().into_bytes();
check_parsing(&*text, &*text);
}
#[test]
fn test_text_provider_for_arc_of_bytes_slice() {
let text: Arc<[u8]> = Arc::from("// comment".to_owned().into_bytes());
check_parsing(&*text, &*text);
check_parsing(text.as_ref(), text.as_ref());
check_parsing(text.clone(), text.as_ref());
}
#[test]
fn test_text_provider_callback_with_str_slice() {
let text: &str = "// comment";
check_parsing(text, |_node: Node<'_>| iter::once(text));
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then_some(text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| iter::once(text),
);
}
#[test]
fn test_text_provider_callback_with_owned_string_slice() {
let text: &str = "// comment";
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then_some(text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| {
let slice: String = text.to_owned();
iter::once(slice)
},
);
}
#[test]
fn test_text_provider_callback_with_owned_bytes_vec_slice() {
let text: &str = "// comment";
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then_some(text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| {
let slice = text.to_owned().into_bytes();
iter::once(slice)
},
);
}
#[test]
fn test_text_provider_callback_with_owned_arc_of_bytes_slice() {
let text: &str = "// comment";
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then_some(text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| {
let slice: Arc<[u8]> = text.to_owned().into_bytes().into();
iter::once(slice)
},
);
}

View file

@ -0,0 +1,797 @@
use std::str;
use tree_sitter::{InputEdit, Parser, Point, Range, Tree};
use super::helpers::fixtures::get_language;
use crate::{
fuzz::edits::Edit,
parse::perform_edit,
tests::{helpers::fixtures::get_test_fixture_language, invert_edit},
};
#[test]
fn test_tree_edit() {
let mut parser = Parser::new();
parser.set_language(&get_language("javascript")).unwrap();
let tree = parser.parse(" abc !== def", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (expression_statement (binary_expression left: (identifier) right: (identifier))))"
);
// edit entirely within the tree's padding:
// resize the padding of the tree and its leftmost descendants.
{
let mut tree = tree.clone();
tree.edit(&InputEdit {
start_byte: 1,
old_end_byte: 1,
new_end_byte: 2,
start_position: Point::new(0, 1),
old_end_position: Point::new(0, 1),
new_end_position: Point::new(0, 2),
});
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
let child1 = expr.child(0).unwrap();
let child2 = expr.child(1).unwrap();
assert!(expr.has_changes());
assert_eq!(expr.start_byte(), 3);
assert_eq!(expr.end_byte(), 16);
assert!(child1.has_changes());
assert_eq!(child1.start_byte(), 3);
assert_eq!(child1.end_byte(), 6);
assert!(!child2.has_changes());
assert_eq!(child2.start_byte(), 8);
assert_eq!(child2.end_byte(), 11);
}
// edit starting in the tree's padding but extending into its content:
// shrink the content to compensate for the expanded padding.
{
let mut tree = tree.clone();
tree.edit(&InputEdit {
start_byte: 1,
old_end_byte: 4,
new_end_byte: 5,
start_position: Point::new(0, 1),
old_end_position: Point::new(0, 5),
new_end_position: Point::new(0, 5),
});
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
let child1 = expr.child(0).unwrap();
let child2 = expr.child(1).unwrap();
assert!(expr.has_changes());
assert_eq!(expr.start_byte(), 5);
assert_eq!(expr.end_byte(), 16);
assert!(child1.has_changes());
assert_eq!(child1.start_byte(), 5);
assert_eq!(child1.end_byte(), 6);
assert!(!child2.has_changes());
assert_eq!(child2.start_byte(), 8);
assert_eq!(child2.end_byte(), 11);
}
// insertion at the edge of a tree's padding:
// expand the tree's padding.
{
let mut tree = tree.clone();
tree.edit(&InputEdit {
start_byte: 2,
old_end_byte: 2,
new_end_byte: 4,
start_position: Point::new(0, 2),
old_end_position: Point::new(0, 2),
new_end_position: Point::new(0, 4),
});
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
let child1 = expr.child(0).unwrap();
let child2 = expr.child(1).unwrap();
assert!(expr.has_changes());
assert_eq!(expr.byte_range(), 4..17);
assert!(child1.has_changes());
assert_eq!(child1.byte_range(), 4..7);
assert!(!child2.has_changes());
assert_eq!(child2.byte_range(), 9..12);
}
// replacement starting at the edge of the tree's padding:
// resize the content and not the padding.
{
let mut tree = tree.clone();
tree.edit(&InputEdit {
start_byte: 2,
old_end_byte: 2,
new_end_byte: 4,
start_position: Point::new(0, 2),
old_end_position: Point::new(0, 2),
new_end_position: Point::new(0, 4),
});
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
let child1 = expr.child(0).unwrap();
let child2 = expr.child(1).unwrap();
assert!(expr.has_changes());
assert_eq!(expr.byte_range(), 4..17);
assert!(child1.has_changes());
assert_eq!(child1.byte_range(), 4..7);
assert!(!child2.has_changes());
assert_eq!(child2.byte_range(), 9..12);
}
// deletion that spans more than one child node:
// shrink subsequent child nodes.
{
let mut tree = tree.clone();
tree.edit(&InputEdit {
start_byte: 1,
old_end_byte: 11,
new_end_byte: 4,
start_position: Point::new(0, 1),
old_end_position: Point::new(0, 11),
new_end_position: Point::new(0, 4),
});
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
let child1 = expr.child(0).unwrap();
let child2 = expr.child(1).unwrap();
let child3 = expr.child(2).unwrap();
assert!(expr.has_changes());
assert_eq!(expr.byte_range(), 4..8);
assert!(child1.has_changes());
assert_eq!(child1.byte_range(), 4..4);
assert!(child2.has_changes());
assert_eq!(child2.byte_range(), 4..4);
assert!(child3.has_changes());
assert_eq!(child3.byte_range(), 5..8);
}
// insertion at the end of the tree:
// extend the tree's content.
{
let mut tree = tree.clone();
tree.edit(&InputEdit {
start_byte: 15,
old_end_byte: 15,
new_end_byte: 16,
start_position: Point::new(0, 15),
old_end_position: Point::new(0, 15),
new_end_position: Point::new(0, 16),
});
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
let child1 = expr.child(0).unwrap();
let child2 = expr.child(1).unwrap();
let child3 = expr.child(2).unwrap();
assert!(expr.has_changes());
assert_eq!(expr.byte_range(), 2..16);
assert!(!child1.has_changes());
assert_eq!(child1.byte_range(), 2..5);
assert!(!child2.has_changes());
assert_eq!(child2.byte_range(), 7..10);
assert!(child3.has_changes());
assert_eq!(child3.byte_range(), 12..16);
}
// replacement that starts within a token and extends beyond the end of the tree:
// resize the token and empty out any subsequent child nodes.
{
let mut tree = tree.clone();
tree.edit(&InputEdit {
start_byte: 3,
old_end_byte: 90,
new_end_byte: 4,
start_position: Point::new(0, 3),
old_end_position: Point::new(0, 90),
new_end_position: Point::new(0, 4),
});
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
let child1 = expr.child(0).unwrap();
let child2 = expr.child(1).unwrap();
let child3 = expr.child(2).unwrap();
assert_eq!(expr.byte_range(), 2..4);
assert!(expr.has_changes());
assert_eq!(child1.byte_range(), 2..4);
assert!(child1.has_changes());
assert_eq!(child2.byte_range(), 4..4);
assert!(child2.has_changes());
assert_eq!(child3.byte_range(), 4..4);
assert!(child3.has_changes());
}
// replacement that starts in whitespace and extends beyond the end of the tree:
// shift the token's start position and empty out its content.
{
let mut tree = tree;
tree.edit(&InputEdit {
start_byte: 6,
old_end_byte: 90,
new_end_byte: 8,
start_position: Point::new(0, 6),
old_end_position: Point::new(0, 90),
new_end_position: Point::new(0, 8),
});
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
let child1 = expr.child(0).unwrap();
let child2 = expr.child(1).unwrap();
let child3 = expr.child(2).unwrap();
assert_eq!(expr.byte_range(), 2..8);
assert!(expr.has_changes());
assert_eq!(child1.byte_range(), 2..5);
assert!(!child1.has_changes());
assert_eq!(child2.byte_range(), 8..8);
assert!(child2.has_changes());
assert_eq!(child3.byte_range(), 8..8);
assert!(child3.has_changes());
}
}
#[test]
fn test_tree_edit_with_included_ranges() {
let mut parser = Parser::new();
parser.set_language(&get_language("html")).unwrap();
let source = "<div><% if a %><span>a</span><% else %><span>b</span><% end %></div>";
let ranges = [0..5, 15..29, 39..53, 62..68];
parser
.set_included_ranges(
&ranges
.iter()
.map(|range| Range {
start_byte: range.start,
end_byte: range.end,
start_point: Point::new(0, range.start),
end_point: Point::new(0, range.end),
})
.collect::<Vec<_>>(),
)
.unwrap();
let mut tree = parser.parse(source, None).unwrap();
tree.edit(&InputEdit {
start_byte: 29,
old_end_byte: 53,
new_end_byte: 29,
start_position: Point::new(0, 29),
old_end_position: Point::new(0, 53),
new_end_position: Point::new(0, 29),
});
assert_eq!(
tree.included_ranges(),
&[
Range {
start_byte: 0,
end_byte: 5,
start_point: Point::new(0, 0),
end_point: Point::new(0, 5),
},
Range {
start_byte: 15,
end_byte: 29,
start_point: Point::new(0, 15),
end_point: Point::new(0, 29),
},
Range {
start_byte: 29,
end_byte: 29,
start_point: Point::new(0, 29),
end_point: Point::new(0, 29),
},
Range {
start_byte: 38,
end_byte: 44,
start_point: Point::new(0, 38),
end_point: Point::new(0, 44),
}
]
);
}
#[test]
fn test_tree_cursor() {
let mut parser = Parser::new();
parser.set_language(&get_language("rust")).unwrap();
let tree = parser
.parse(
"
struct Stuff {
a: A,
b: Option<B>,
}
",
None,
)
.unwrap();
let mut cursor = tree.walk();
assert_eq!(cursor.node().kind(), "source_file");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "struct_item");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "struct");
assert!(!cursor.node().is_named());
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "type_identifier");
assert!(cursor.node().is_named());
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "field_declaration_list");
assert!(cursor.node().is_named());
assert!(cursor.goto_last_child());
assert_eq!(cursor.node().kind(), "}");
assert!(!cursor.node().is_named());
assert_eq!(cursor.node().start_position(), Point { row: 4, column: 16 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), ",");
assert!(!cursor.node().is_named());
assert_eq!(cursor.node().start_position(), Point { row: 3, column: 32 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "field_declaration");
assert!(cursor.node().is_named());
assert_eq!(cursor.node().start_position(), Point { row: 3, column: 20 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), ",");
assert!(!cursor.node().is_named());
assert_eq!(cursor.node().start_position(), Point { row: 2, column: 24 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "field_declaration");
assert!(cursor.node().is_named());
assert_eq!(cursor.node().start_position(), Point { row: 2, column: 20 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "{");
assert!(!cursor.node().is_named());
assert_eq!(cursor.node().start_position(), Point { row: 1, column: 29 });
let mut copy = tree.walk();
copy.reset_to(&cursor);
assert_eq!(copy.node().kind(), "{");
assert!(!copy.node().is_named());
assert!(copy.goto_parent());
assert_eq!(copy.node().kind(), "field_declaration_list");
assert!(copy.node().is_named());
assert!(copy.goto_parent());
assert_eq!(copy.node().kind(), "struct_item");
}
#[test]
fn test_tree_cursor_previous_sibling_with_aliases() {
let mut parser = Parser::new();
parser
.set_language(&get_test_fixture_language("aliases_in_root"))
.unwrap();
let text = "# comment\n# \nfoo foo";
let tree = parser.parse(text, None).unwrap();
let mut cursor = tree.walk();
assert_eq!(cursor.node().kind(), "document");
cursor.goto_first_child();
assert_eq!(cursor.node().kind(), "comment");
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "comment");
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "bar");
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "comment");
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "comment");
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "comment");
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "bar");
}
#[test]
fn test_tree_cursor_previous_sibling() {
let mut parser = Parser::new();
parser.set_language(&get_language("rust")).unwrap();
let text = "
// Hi there
// This is fun!
// Another one!
";
let tree = parser.parse(text, None).unwrap();
let mut cursor = tree.walk();
assert_eq!(cursor.node().kind(), "source_file");
assert!(cursor.goto_last_child());
assert_eq!(cursor.node().kind(), "line_comment");
assert_eq!(
cursor.node().utf8_text(text.as_bytes()).unwrap(),
"// Another one!"
);
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "line_comment");
assert_eq!(
cursor.node().utf8_text(text.as_bytes()).unwrap(),
"// This is fun!"
);
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "line_comment");
assert_eq!(
cursor.node().utf8_text(text.as_bytes()).unwrap(),
"// Hi there"
);
assert!(!cursor.goto_previous_sibling());
}
#[test]
fn test_tree_cursor_fields() {
let mut parser = Parser::new();
parser.set_language(&get_language("javascript")).unwrap();
let tree = parser
.parse("function /*1*/ bar /*2*/ () {}", None)
.unwrap();
let mut cursor = tree.walk();
assert_eq!(cursor.node().kind(), "program");
cursor.goto_first_child();
assert_eq!(cursor.node().kind(), "function_declaration");
assert_eq!(cursor.field_name(), None);
cursor.goto_first_child();
assert_eq!(cursor.node().kind(), "function");
assert_eq!(cursor.field_name(), None);
cursor.goto_next_sibling();
assert_eq!(cursor.node().kind(), "comment");
assert_eq!(cursor.field_name(), None);
cursor.goto_next_sibling();
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(cursor.field_name(), Some("name"));
cursor.goto_next_sibling();
assert_eq!(cursor.node().kind(), "comment");
assert_eq!(cursor.field_name(), None);
cursor.goto_next_sibling();
assert_eq!(cursor.node().kind(), "formal_parameters");
assert_eq!(cursor.field_name(), Some("parameters"));
}
#[test]
fn test_tree_cursor_child_for_point() {
let mut parser = Parser::new();
parser.set_language(&get_language("javascript")).unwrap();
let source = &"
[
one,
{
two: tree
},
four, five, six
];"[1..];
let tree = parser.parse(source, None).unwrap();
let mut c = tree.walk();
assert_eq!(c.node().kind(), "program");
assert_eq!(c.goto_first_child_for_point(Point::new(7, 0)), None);
assert_eq!(c.goto_first_child_for_point(Point::new(6, 7)), None);
assert_eq!(c.node().kind(), "program");
// descend to expression statement
assert_eq!(c.goto_first_child_for_point(Point::new(6, 5)), Some(0));
assert_eq!(c.node().kind(), "expression_statement");
// step into ';' and back up
assert_eq!(c.goto_first_child_for_point(Point::new(7, 0)), None);
assert_eq!(c.goto_first_child_for_point(Point::new(6, 6)), None);
assert_eq!(c.goto_first_child_for_point(Point::new(6, 5)), Some(1));
assert_eq!(
(c.node().kind(), c.node().start_position()),
(";", Point::new(6, 5))
);
assert!(c.goto_parent());
// descend into array
assert_eq!(c.goto_first_child_for_point(Point::new(6, 4)), Some(0));
assert_eq!(
(c.node().kind(), c.node().start_position()),
("array", Point::new(0, 4))
);
// step into '[' and back up
assert_eq!(c.goto_first_child_for_point(Point::new(0, 4)), Some(0));
assert_eq!(
(c.node().kind(), c.node().start_position()),
("[", Point::new(0, 4))
);
assert!(c.goto_parent());
// step into identifier 'one' and back up
assert_eq!(c.goto_first_child_for_point(Point::new(1, 0)), Some(1));
assert_eq!(
(c.node().kind(), c.node().start_position()),
("identifier", Point::new(1, 8))
);
assert!(c.goto_parent());
assert_eq!(c.goto_first_child_for_point(Point::new(1, 10)), Some(1));
assert_eq!(
(c.node().kind(), c.node().start_position()),
("identifier", Point::new(1, 8))
);
assert!(c.goto_parent());
// step into first ',' and back up
assert_eq!(c.goto_first_child_for_point(Point::new(1, 11)), Some(2));
assert_eq!(
(c.node().kind(), c.node().start_position()),
(",", Point::new(1, 11))
);
assert!(c.goto_parent());
// step into identifier 'four' and back up
assert_eq!(c.goto_first_child_for_point(Point::new(5, 0)), Some(5));
assert_eq!(
(c.node().kind(), c.node().start_position()),
("identifier", Point::new(5, 8))
);
assert!(c.goto_parent());
assert_eq!(c.goto_first_child_for_point(Point::new(5, 0)), Some(5));
assert_eq!(
(c.node().kind(), c.node().start_position()),
("identifier", Point::new(5, 8))
);
assert!(c.goto_parent());
// step into ']' and back up
assert_eq!(c.goto_first_child_for_point(Point::new(6, 0)), Some(10));
assert_eq!(
(c.node().kind(), c.node().start_position()),
("]", Point::new(6, 4))
);
assert!(c.goto_parent());
assert_eq!(c.goto_first_child_for_point(Point::new(6, 0)), Some(10));
assert_eq!(
(c.node().kind(), c.node().start_position()),
("]", Point::new(6, 4))
);
assert!(c.goto_parent());
// descend into object
assert_eq!(c.goto_first_child_for_point(Point::new(2, 0)), Some(3));
assert_eq!(
(c.node().kind(), c.node().start_position()),
("object", Point::new(2, 8))
);
}
#[test]
fn test_tree_node_equality() {
let mut parser = Parser::new();
parser.set_language(&get_language("rust")).unwrap();
let tree = parser.parse("struct A {}", None).unwrap();
let node1 = tree.root_node();
let node2 = tree.root_node();
assert_eq!(node1, node2);
assert_eq!(node1.child(0).unwrap(), node2.child(0).unwrap());
assert_ne!(node1.child(0).unwrap(), node2);
}
#[test]
fn test_get_changed_ranges() {
let source_code = b"{a: null};\n".to_vec();
let mut parser = Parser::new();
parser.set_language(&get_language("javascript")).unwrap();
let tree = parser.parse(&source_code, None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (expression_statement (object (pair key: (property_identifier) value: (null)))))"
);
// Updating one token
{
let mut tree = tree.clone();
let mut source_code = source_code.clone();
// Replace `null` with `nothing` - that token has changed syntax
let edit = Edit {
position: index_of(&source_code, "ull"),
deleted_length: 3,
inserted_text: b"othing".to_vec(),
};
let inverse_edit = invert_edit(&source_code, &edit);
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit);
assert_eq!(ranges, vec![range_of(&source_code, "nothing")]);
// Replace `nothing` with `null` - that token has changed syntax
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit);
assert_eq!(ranges, vec![range_of(&source_code, "null")]);
}
// Changing only leading whitespace
{
let mut tree = tree.clone();
let mut source_code = source_code.clone();
// Insert leading newline - no changed ranges
let edit = Edit {
position: 0,
deleted_length: 0,
inserted_text: b"\n".to_vec(),
};
let inverse_edit = invert_edit(&source_code, &edit);
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit);
assert_eq!(ranges, vec![]);
// Remove leading newline - no changed ranges
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit);
assert_eq!(ranges, vec![]);
}
// Inserting elements
{
let mut tree = tree.clone();
let mut source_code = source_code.clone();
// Insert a key-value pair before the `}` - those tokens are changed
let edit1 = Edit {
position: index_of(&source_code, "}"),
deleted_length: 0,
inserted_text: b", b: false".to_vec(),
};
let inverse_edit1 = invert_edit(&source_code, &edit1);
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit1);
assert_eq!(ranges, vec![range_of(&source_code, ", b: false")]);
let edit2 = Edit {
position: index_of(&source_code, ", b"),
deleted_length: 0,
inserted_text: b", c: 1".to_vec(),
};
let inverse_edit2 = invert_edit(&source_code, &edit2);
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit2);
assert_eq!(ranges, vec![range_of(&source_code, ", c: 1")]);
// Remove the middle pair
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit2);
assert_eq!(ranges, vec![]);
// Remove the second pair
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit1);
assert_eq!(ranges, vec![]);
}
// Wrapping elements in larger expressions
{
let mut tree = tree;
let mut source_code = source_code.clone();
// Replace `null` with the binary expression `b === null`
let edit1 = Edit {
position: index_of(&source_code, "null"),
deleted_length: 0,
inserted_text: b"b === ".to_vec(),
};
let inverse_edit1 = invert_edit(&source_code, &edit1);
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit1);
assert_eq!(ranges, vec![range_of(&source_code, "b === null")]);
// Undo
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit1);
assert_eq!(ranges, vec![range_of(&source_code, "null")]);
}
}
#[test]
fn test_consistency_with_mid_codepoint_edit() {
let mut parser = Parser::new();
parser.set_language(&get_language("php/php")).unwrap();
let mut source_code =
b"\n<?php\n\n<<<'\xE5\xAD\x97\xE6\xBC\xA2'\n T\n\xE5\xAD\x97\xE6\xBC\xA2;".to_vec();
let mut tree = parser.parse(&source_code, None).unwrap();
let edit = Edit {
position: 17,
deleted_length: 0,
inserted_text: vec![46],
};
perform_edit(&mut tree, &mut source_code, &edit).unwrap();
let mut tree2 = parser.parse(&source_code, Some(&tree)).unwrap();
let inverted = invert_edit(&source_code, &edit);
perform_edit(&mut tree2, &mut source_code, &inverted).unwrap();
let tree3 = parser.parse(&source_code, Some(&tree2)).unwrap();
assert_eq!(tree3.root_node().to_sexp(), tree.root_node().to_sexp());
}
#[test]
fn test_tree_cursor_on_aliased_root_with_extra_child() {
let source = r"
fn main() {
C/* hi */::<D>::E;
}
";
let mut parser = Parser::new();
parser.set_language(&get_language("rust")).unwrap();
let tree = parser.parse(source, None).unwrap();
let function = tree.root_node().child(0).unwrap();
let block = function.child(3).unwrap();
let expression_statement = block.child(1).unwrap();
let scoped_identifier = expression_statement.child(0).unwrap();
let generic_type = scoped_identifier.child(0).unwrap();
assert_eq!(generic_type.kind(), "generic_type");
let mut cursor = generic_type.walk();
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "type_identifier");
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "block_comment");
}
fn index_of(text: &[u8], substring: &str) -> usize {
str::from_utf8(text).unwrap().find(substring).unwrap()
}
fn range_of(text: &[u8], substring: &str) -> Range {
let start_byte = index_of(text, substring);
let end_byte = start_byte + substring.len();
Range {
start_byte,
end_byte,
start_point: Point::new(0, start_byte),
end_point: Point::new(0, end_byte),
}
}
fn get_changed_ranges(
parser: &mut Parser,
tree: &mut Tree,
source_code: &mut Vec<u8>,
edit: &Edit,
) -> Vec<Range> {
perform_edit(tree, source_code, edit).unwrap();
let new_tree = parser.parse(source_code, Some(tree)).unwrap();
let result = tree.changed_ranges(&new_tree).collect();
*tree = new_tree;
result
}

View file

@ -0,0 +1,273 @@
use std::{fs, sync::LazyLock};
use streaming_iterator::StreamingIterator;
use tree_sitter::{
wasmtime::Engine, Parser, Query, QueryCursor, WasmError, WasmErrorKind, WasmStore,
};
use crate::tests::helpers::{allocations, fixtures::WASM_DIR};
static ENGINE: LazyLock<Engine> = LazyLock::new(Engine::default);
#[test]
fn test_wasm_stdlib_symbols() {
let symbols = tree_sitter::wasm_stdlib_symbols().collect::<Vec<_>>();
assert_eq!(
symbols,
{
let mut symbols = symbols.clone();
symbols.sort_unstable();
symbols
},
"symbols aren't sorted"
);
assert!(symbols.contains(&"malloc"));
assert!(symbols.contains(&"free"));
assert!(symbols.contains(&"memset"));
assert!(symbols.contains(&"memcpy"));
}
#[test]
fn test_load_wasm_ruby_language() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-ruby.wasm")).unwrap();
let language = store.load_language("ruby", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("class A; end", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (class name: (constant)))"
);
});
}
#[test]
fn test_load_wasm_html_language() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap();
let language = store.load_language("html", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser
.parse("<div><span></span><p></p></div>", None)
.unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(document (element (start_tag (tag_name)) (element (start_tag (tag_name)) (end_tag (tag_name))) (element (start_tag (tag_name)) (end_tag (tag_name))) (end_tag (tag_name))))"
);
});
}
#[test]
fn test_load_wasm_rust_language() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
let language = store.load_language("rust", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("fn main() {}", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
});
}
#[test]
fn test_load_wasm_javascript_language() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-javascript.wasm")).unwrap();
let language = store.load_language("javascript", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("const a = b\nconst c = d", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(program (lexical_declaration (variable_declarator name: (identifier) value: (identifier))) (lexical_declaration (variable_declarator name: (identifier) value: (identifier))))");
});
}
#[test]
fn test_load_multiple_wasm_languages() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let wasm_cpp = fs::read(WASM_DIR.join("tree-sitter-cpp.wasm")).unwrap();
let wasm_rs = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
let wasm_rb = fs::read(WASM_DIR.join("tree-sitter-ruby.wasm")).unwrap();
let wasm_typescript = fs::read(WASM_DIR.join("tree-sitter-typescript.wasm")).unwrap();
let language_rust = store.load_language("rust", &wasm_rs).unwrap();
let language_cpp = store.load_language("cpp", &wasm_cpp).unwrap();
let language_ruby = store.load_language("ruby", &wasm_rb).unwrap();
let language_typescript = store.load_language("typescript", &wasm_typescript).unwrap();
parser.set_wasm_store(store).unwrap();
let mut parser2 = Parser::new();
parser2
.set_wasm_store(WasmStore::new(&ENGINE).unwrap())
.unwrap();
let mut query_cursor = QueryCursor::new();
// First, parse with the store that originally loaded the languages.
// Then parse with a new parser and wasm store, so that the languages
// are added one-by-one, in between parses.
for mut parser in [parser, parser2] {
for _ in 0..2 {
let query_rust = Query::new(&language_rust, "(const_item) @foo").unwrap();
let query_typescript =
Query::new(&language_typescript, "(class_declaration) @foo").unwrap();
parser.set_language(&language_cpp).unwrap();
let tree = parser.parse("A<B> c = d();", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(translation_unit (declaration type: (template_type name: (type_identifier) arguments: (template_argument_list (type_descriptor type: (type_identifier)))) declarator: (init_declarator declarator: (identifier) value: (call_expression function: (identifier) arguments: (argument_list)))))"
);
parser.set_language(&language_rust).unwrap();
let source = "const A: B = c();";
let tree = parser.parse(source, None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(source_file (const_item name: (identifier) type: (type_identifier) value: (call_expression function: (identifier) arguments: (arguments))))"
);
assert_eq!(
query_cursor
.matches(&query_rust, tree.root_node(), source.as_bytes())
.count(),
1
);
parser.set_language(&language_ruby).unwrap();
let tree = parser.parse("class A; end", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (class name: (constant)))"
);
parser.set_language(&language_typescript).unwrap();
let tree = parser.parse("class A {}", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (class_declaration name: (type_identifier) body: (class_body)))"
);
assert_eq!(
query_cursor
.matches(&query_typescript, tree.root_node(), source.as_bytes())
.count(),
1
);
}
}
});
}
#[test]
fn test_load_and_reload_wasm_language() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let wasm_rust = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
let wasm_typescript = fs::read(WASM_DIR.join("tree-sitter-typescript.wasm")).unwrap();
let language_rust = store.load_language("rust", &wasm_rust).unwrap();
let language_typescript = store.load_language("typescript", &wasm_typescript).unwrap();
assert_eq!(store.language_count(), 2);
// When a language is dropped, stores can release their instances of that language.
drop(language_rust);
assert_eq!(store.language_count(), 1);
let language_rust = store.load_language("rust", &wasm_rust).unwrap();
assert_eq!(store.language_count(), 2);
drop(language_rust);
drop(language_typescript);
assert_eq!(store.language_count(), 0);
});
}
#[test]
fn test_reset_wasm_store() {
allocations::record(|| {
let mut language_store = WasmStore::new(&ENGINE).unwrap();
let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
let language = language_store.load_language("rust", &wasm).unwrap();
let mut parser = Parser::new();
let parser_store = WasmStore::new(&ENGINE).unwrap();
parser.set_wasm_store(parser_store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("fn main() {}", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
let parser_store = WasmStore::new(&ENGINE).unwrap();
parser.set_wasm_store(parser_store).unwrap();
let tree = parser.parse("fn main() {}", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
});
}
#[test]
fn test_load_wasm_errors() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
let bad_wasm = &wasm[1..];
assert_eq!(
store.load_language("rust", bad_wasm).unwrap_err(),
WasmError {
kind: WasmErrorKind::Parse,
message: "failed to parse dylink section of wasm module".into(),
}
);
assert_eq!(
store.load_language("not_rust", &wasm).unwrap_err(),
WasmError {
kind: WasmErrorKind::Instantiate,
message: "module did not contain language function: tree_sitter_not_rust".into(),
}
);
let mut bad_wasm = wasm.clone();
bad_wasm[300..500].iter_mut().for_each(|b| *b = 0);
assert_eq!(
store.load_language("rust", &bad_wasm).unwrap_err().kind,
WasmErrorKind::Compile,
);
});
}
#[test]
fn test_wasm_oom() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap();
let language = store.load_language("html", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tag_name = "a-b".repeat(2 * 1024 * 1024);
let code = format!("<{tag_name}>hello world</{tag_name}>");
assert!(parser.parse(&code, None).is_none());
let tag_name = "a-b".repeat(20);
let code = format!("<{tag_name}>hello world</{tag_name}>");
parser.set_language(&language).unwrap();
let tree = parser.parse(&code, None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(document (element (start_tag (tag_name)) (text) (end_tag (tag_name))))"
);
});
}