Reorganize rust crates into a flat crates directory, simplify some CI steps (#4496)

* Move all rust crates (except lib) into crates dir, w/o nesting

* Remove stale path from .gitattributes

* Rename lib.rs files for easier navigation

* Rename mod.rs file for easier navigation

* Fix emscripten-version path

* Fix fixtures dir paths

* Use the default rustfmt settings

* Don't use nightly on CI
This commit is contained in:
Max Brunsfeld 2025-06-06 14:25:37 -07:00 committed by GitHub
parent a6e530b33d
commit 0fdf569571
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
163 changed files with 69 additions and 89 deletions

394
crates/cli/src/fuzz.rs Normal file
View file

@ -0,0 +1,394 @@
use std::{
collections::HashMap,
env, fs,
path::{Path, PathBuf},
sync::LazyLock,
};
use rand::Rng;
use regex::Regex;
use tree_sitter::{Language, Parser};
pub mod allocations;
pub mod corpus_test;
pub mod edits;
pub mod random;
pub mod scope_sequence;
use crate::{
fuzz::{
corpus_test::{
check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
},
edits::{get_random_edit, invert_edit},
random::Rand,
},
parse::perform_edit,
test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
};
pub static LOG_ENABLED: LazyLock<bool> = LazyLock::new(|| env::var("TREE_SITTER_LOG").is_ok());
pub static LOG_GRAPH_ENABLED: LazyLock<bool> =
LazyLock::new(|| env::var("TREE_SITTER_LOG_GRAPHS").is_ok());
pub static LANGUAGE_FILTER: LazyLock<Option<String>> =
LazyLock::new(|| env::var("TREE_SITTER_LANGUAGE").ok());
pub static EXAMPLE_INCLUDE: LazyLock<Option<Regex>> =
LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_INCLUDE"));
pub static EXAMPLE_EXCLUDE: LazyLock<Option<Regex>> =
LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_EXCLUDE"));
pub static START_SEED: LazyLock<usize> = LazyLock::new(new_seed);
pub static EDIT_COUNT: LazyLock<usize> =
LazyLock::new(|| int_env_var("TREE_SITTER_EDITS").unwrap_or(3));
pub static ITERATION_COUNT: LazyLock<usize> =
LazyLock::new(|| int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10));
fn int_env_var(name: &'static str) -> Option<usize> {
env::var(name).ok().and_then(|e| e.parse().ok())
}
fn regex_env_var(name: &'static str) -> Option<Regex> {
env::var(name).ok().and_then(|e| Regex::new(&e).ok())
}
#[must_use]
pub fn new_seed() -> usize {
int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
let mut rng = rand::thread_rng();
let seed = rng.gen::<usize>();
eprintln!("Seed: {seed}");
seed
})
}
pub struct FuzzOptions {
pub skipped: Option<Vec<String>>,
pub subdir: Option<PathBuf>,
pub edits: usize,
pub iterations: usize,
pub include: Option<Regex>,
pub exclude: Option<Regex>,
pub log_graphs: bool,
pub log: bool,
}
pub fn fuzz_language_corpus(
language: &Language,
language_name: &str,
start_seed: usize,
grammar_dir: &Path,
options: &mut FuzzOptions,
) {
fn retain(entry: &mut TestEntry, language_name: &str) -> bool {
match entry {
TestEntry::Example { attributes, .. } => {
attributes.languages[0].is_empty()
|| attributes
.languages
.iter()
.any(|lang| lang.as_ref() == language_name)
}
TestEntry::Group {
ref mut children, ..
} => {
children.retain_mut(|child| retain(child, language_name));
!children.is_empty()
}
}
}
let subdir = options.subdir.take().unwrap_or_default();
let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus");
if !corpus_dir.exists() || !corpus_dir.is_dir() {
eprintln!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file.");
return;
}
if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 {
eprintln!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory.");
return;
}
let mut main_tests = parse_tests(&corpus_dir).unwrap();
match main_tests {
TestEntry::Group {
ref mut children, ..
} => {
children.retain_mut(|child| retain(child, language_name));
}
TestEntry::Example { .. } => unreachable!(),
}
let tests = flatten_tests(
main_tests,
options.include.as_ref(),
options.exclude.as_ref(),
);
let get_test_name = |test: &FlattenedTest| format!("{language_name} - {}", test.name);
let mut skipped = options
.skipped
.take()
.unwrap_or_default()
.into_iter()
.chain(tests.iter().filter(|x| x.skip).map(get_test_name))
.map(|x| (x, 0))
.collect::<HashMap<String, usize>>();
let mut failure_count = 0;
let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
if log_seed {
println!(" start seed: {start_seed}");
}
println!();
for (test_index, test) in tests.iter().enumerate() {
let test_name = get_test_name(test);
if let Some(counter) = skipped.get_mut(test_name.as_str()) {
println!(" {test_index}. {test_name} - SKIPPED");
*counter += 1;
continue;
}
println!(" {test_index}. {test_name}");
let passed = allocations::record(|| {
let mut log_session = None;
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(language).unwrap();
set_included_ranges(&mut parser, &test.input, test.template_delimiters);
let tree = parser.parse(&test.input, None).unwrap();
if test.error {
return true;
}
let mut actual_output = tree.root_node().to_sexp();
if !test.has_fields {
actual_output = strip_sexp_fields(&actual_output);
}
if actual_output != test.output {
println!("Incorrect initial parse for {test_name}");
print_diff_key();
print_diff(&actual_output, &test.output, true);
println!();
return false;
}
true
})
.unwrap_or_else(|e| {
eprintln!("Error: {e}");
false
});
if !passed {
failure_count += 1;
continue;
}
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(&test.input, None).unwrap();
drop(parser);
for trial in 0..options.iterations {
let seed = start_seed + trial;
let passed = allocations::record(|| {
let mut rand = Rand::new(seed);
let mut log_session = None;
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(language).unwrap();
let mut tree = tree.clone();
let mut input = test.input.clone();
if options.log_graphs {
eprintln!("{}\n", String::from_utf8_lossy(&input));
}
// Perform a random series of edits and reparse.
let mut undo_stack = Vec::new();
for _ in 0..=rand.unsigned(*EDIT_COUNT) {
let edit = get_random_edit(&mut rand, &input);
undo_stack.push(invert_edit(&input, &edit));
perform_edit(&mut tree, &mut input, &edit).unwrap();
}
if log_seed {
println!(" {test_index}.{trial:<2} seed: {seed}");
}
if dump_edits {
fs::create_dir_all("fuzz").unwrap();
fs::write(
Path::new("fuzz")
.join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
&input,
)
.unwrap();
}
if options.log_graphs {
eprintln!("{}\n", String::from_utf8_lossy(&input));
}
set_included_ranges(&mut parser, &input, test.template_delimiters);
let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
// Check that the new tree is consistent.
check_consistent_sizes(&tree2, &input);
if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
return false;
}
// Undo all of the edits and re-parse again.
while let Some(edit) = undo_stack.pop() {
perform_edit(&mut tree2, &mut input, &edit).unwrap();
}
if options.log_graphs {
eprintln!("{}\n", String::from_utf8_lossy(&input));
}
set_included_ranges(&mut parser, &test.input, test.template_delimiters);
let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
// Verify that the final tree matches the expectation from the corpus.
let mut actual_output = tree3.root_node().to_sexp();
if !test.has_fields {
actual_output = strip_sexp_fields(&actual_output);
}
if actual_output != test.output && !test.error {
println!("Incorrect parse for {test_name} - seed {seed}");
print_diff_key();
print_diff(&actual_output, &test.output, true);
println!();
return false;
}
// Check that the edited tree is consistent.
check_consistent_sizes(&tree3, &input);
if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
return false;
}
true
}).unwrap_or_else(|e| {
eprintln!("Error: {e}");
false
});
if !passed {
failure_count += 1;
break;
}
}
}
if failure_count != 0 {
eprintln!("{failure_count} {language_name} corpus tests failed fuzzing");
}
skipped.retain(|_, v| *v == 0);
if !skipped.is_empty() {
println!("Non matchable skip definitions:");
for k in skipped.keys() {
println!(" {k}");
}
panic!("Non matchable skip definitions needs to be removed");
}
}
pub struct FlattenedTest {
pub name: String,
pub input: Vec<u8>,
pub output: String,
pub languages: Vec<Box<str>>,
pub error: bool,
pub skip: bool,
pub has_fields: bool,
pub template_delimiters: Option<(&'static str, &'static str)>,
}
#[must_use]
pub fn flatten_tests(
test: TestEntry,
include: Option<&Regex>,
exclude: Option<&Regex>,
) -> Vec<FlattenedTest> {
fn helper(
test: TestEntry,
include: Option<&Regex>,
exclude: Option<&Regex>,
is_root: bool,
prefix: &str,
result: &mut Vec<FlattenedTest>,
) {
match test {
TestEntry::Example {
mut name,
input,
output,
has_fields,
attributes,
..
} => {
if !prefix.is_empty() {
name.insert_str(0, " - ");
name.insert_str(0, prefix);
}
if let Some(include) = include {
if !include.is_match(&name) {
return;
}
} else if let Some(exclude) = exclude {
if exclude.is_match(&name) {
return;
}
}
result.push(FlattenedTest {
name,
input,
output,
has_fields,
languages: attributes.languages,
error: attributes.error,
skip: attributes.skip,
template_delimiters: None,
});
}
TestEntry::Group {
mut name, children, ..
} => {
if !is_root && !prefix.is_empty() {
name.insert_str(0, " - ");
name.insert_str(0, prefix);
}
for child in children {
helper(child, include, exclude, false, &name, result);
}
}
}
}
let mut result = Vec::new();
helper(test, include, exclude, true, "", &mut result);
result
}

View file

@ -0,0 +1,122 @@
use std::{
collections::HashMap,
os::raw::c_void,
sync::{
atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst},
Mutex,
},
};
#[ctor::ctor]
unsafe fn initialize_allocation_recording() {
tree_sitter::set_allocator(
Some(ts_record_malloc),
Some(ts_record_calloc),
Some(ts_record_realloc),
Some(ts_record_free),
);
}
#[derive(Debug, PartialEq, Eq, Hash)]
struct Allocation(*const c_void);
unsafe impl Send for Allocation {}
unsafe impl Sync for Allocation {}
#[derive(Default)]
struct AllocationRecorder {
enabled: AtomicBool,
allocation_count: AtomicUsize,
outstanding_allocations: Mutex<HashMap<Allocation, usize>>,
}
thread_local! {
static RECORDER: AllocationRecorder = AllocationRecorder::default();
}
extern "C" {
fn malloc(size: usize) -> *mut c_void;
fn calloc(count: usize, size: usize) -> *mut c_void;
fn realloc(ptr: *mut c_void, size: usize) -> *mut c_void;
fn free(ptr: *mut c_void);
}
pub fn record<T>(f: impl FnOnce() -> T) -> Result<T, String> {
RECORDER.with(|recorder| {
recorder.enabled.store(true, SeqCst);
recorder.allocation_count.store(0, SeqCst);
recorder.outstanding_allocations.lock().unwrap().clear();
});
let value = f();
let outstanding_allocation_indices = RECORDER.with(|recorder| {
recorder.enabled.store(false, SeqCst);
recorder.allocation_count.store(0, SeqCst);
recorder
.outstanding_allocations
.lock()
.unwrap()
.drain()
.map(|e| e.1)
.collect::<Vec<_>>()
});
if !outstanding_allocation_indices.is_empty() {
return Err(format!(
"Leaked allocation indices: {outstanding_allocation_indices:?}",
));
}
Ok(value)
}
fn record_alloc(ptr: *mut c_void) {
RECORDER.with(|recorder| {
if recorder.enabled.load(SeqCst) {
let count = recorder.allocation_count.fetch_add(1, SeqCst);
recorder
.outstanding_allocations
.lock()
.unwrap()
.insert(Allocation(ptr), count);
}
});
}
fn record_dealloc(ptr: *mut c_void) {
RECORDER.with(|recorder| {
if recorder.enabled.load(SeqCst) {
recorder
.outstanding_allocations
.lock()
.unwrap()
.remove(&Allocation(ptr));
}
});
}
unsafe extern "C" fn ts_record_malloc(size: usize) -> *mut c_void {
let result = malloc(size);
record_alloc(result);
result
}
unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void {
let result = calloc(count, size);
record_alloc(result);
result
}
unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void {
let result = realloc(ptr, size);
if ptr.is_null() {
record_alloc(result);
} else if !core::ptr::eq(ptr, result) {
record_dealloc(ptr);
record_alloc(result);
}
result
}
unsafe extern "C" fn ts_record_free(ptr: *mut c_void) {
record_dealloc(ptr);
free(ptr);
}

View file

@ -0,0 +1,147 @@
use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};
use super::{scope_sequence::ScopeSequence, LOG_ENABLED, LOG_GRAPH_ENABLED};
use crate::util;
pub fn check_consistent_sizes(tree: &Tree, input: &[u8]) {
fn check(node: Node, line_offsets: &[usize]) {
let start_byte = node.start_byte();
let end_byte = node.end_byte();
let start_point = node.start_position();
let end_point = node.end_position();
assert!(start_byte <= end_byte);
assert!(start_point <= end_point);
assert_eq!(
start_byte,
line_offsets[start_point.row] + start_point.column
);
assert_eq!(end_byte, line_offsets[end_point.row] + end_point.column);
let mut last_child_end_byte = start_byte;
let mut last_child_end_point = start_point;
let mut some_child_has_changes = false;
let mut actual_named_child_count = 0;
for i in 0..node.child_count() {
let child = node.child(i).unwrap();
assert!(child.start_byte() >= last_child_end_byte);
assert!(child.start_position() >= last_child_end_point);
check(child, line_offsets);
if child.has_changes() {
some_child_has_changes = true;
}
if child.is_named() {
actual_named_child_count += 1;
}
last_child_end_byte = child.end_byte();
last_child_end_point = child.end_position();
}
assert_eq!(actual_named_child_count, node.named_child_count());
if node.child_count() > 0 {
assert!(end_byte >= last_child_end_byte);
assert!(end_point >= last_child_end_point);
}
if some_child_has_changes {
assert!(node.has_changes());
}
}
let mut line_offsets = vec![0];
for (i, c) in input.iter().enumerate() {
if *c == b'\n' {
line_offsets.push(i + 1);
}
}
check(tree.root_node(), &line_offsets);
}
pub fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &[u8]) -> Result<(), String> {
let changed_ranges = old_tree.changed_ranges(new_tree).collect::<Vec<_>>();
let old_scope_sequence = ScopeSequence::new(old_tree);
let new_scope_sequence = ScopeSequence::new(new_tree);
let old_range = old_tree.root_node().range();
let new_range = new_tree.root_node().range();
let byte_range =
old_range.start_byte.min(new_range.start_byte)..old_range.end_byte.max(new_range.end_byte);
let point_range = old_range.start_point.min(new_range.start_point)
..old_range.end_point.max(new_range.end_point);
for range in &changed_ranges {
if range.end_byte > byte_range.end || range.end_point > point_range.end {
return Err(format!(
"changed range extends outside of the old and new trees {range:?}",
));
}
}
old_scope_sequence.check_changes(&new_scope_sequence, input, &changed_ranges)
}
pub fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&str, &str)>) {
if let Some((start, end)) = delimiters {
let mut ranges = Vec::new();
let mut ix = 0;
while ix < input.len() {
let Some(mut start_ix) = input[ix..]
.windows(2)
.position(|win| win == start.as_bytes())
else {
break;
};
start_ix += ix + start.len();
let end_ix = input[start_ix..]
.windows(2)
.position(|win| win == end.as_bytes())
.map_or(input.len(), |ix| start_ix + ix);
ix = end_ix;
ranges.push(Range {
start_byte: start_ix,
end_byte: end_ix,
start_point: point_for_offset(input, start_ix),
end_point: point_for_offset(input, end_ix),
});
}
parser.set_included_ranges(&ranges).unwrap();
} else {
parser.set_included_ranges(&[]).unwrap();
}
}
fn point_for_offset(text: &[u8], offset: usize) -> Point {
let mut point = Point::default();
for byte in &text[..offset] {
if *byte == b'\n' {
point.row += 1;
point.column = 0;
} else {
point.column += 1;
}
}
point
}
pub fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser {
let mut parser = Parser::new();
if *LOG_ENABLED {
parser.set_logger(Some(Box::new(|log_type, msg| {
if log_type == LogType::Lex {
eprintln!(" {msg}");
} else {
eprintln!("{msg}");
}
})));
}
if *LOG_GRAPH_ENABLED {
*session = Some(util::log_graphs(&mut parser, log_filename, false).unwrap());
}
parser
}

View file

@ -0,0 +1,61 @@
use super::random::Rand;
#[derive(Debug)]
pub struct Edit {
pub position: usize,
pub deleted_length: usize,
pub inserted_text: Vec<u8>,
}
#[must_use]
pub fn invert_edit(input: &[u8], edit: &Edit) -> Edit {
let position = edit.position;
let removed_content = &input[position..(position + edit.deleted_length)];
Edit {
position,
deleted_length: edit.inserted_text.len(),
inserted_text: removed_content.to_vec(),
}
}
pub fn get_random_edit(rand: &mut Rand, input: &[u8]) -> Edit {
let choice = rand.unsigned(10);
if choice < 2 {
// Insert text at end
let inserted_text = rand.words(3);
Edit {
position: input.len(),
deleted_length: 0,
inserted_text,
}
} else if choice < 5 {
// Delete text from the end
let deleted_length = rand.unsigned(30).min(input.len());
Edit {
position: input.len() - deleted_length,
deleted_length,
inserted_text: vec![],
}
} else if choice < 8 {
// Insert at a random position
let position = rand.unsigned(input.len());
let word_count = 1 + rand.unsigned(3);
let inserted_text = rand.words(word_count);
Edit {
position,
deleted_length: 0,
inserted_text,
}
} else {
// Replace at random position
let position = rand.unsigned(input.len());
let deleted_length = rand.unsigned(input.len() - position);
let word_count = 1 + rand.unsigned(3);
let inserted_text = rand.words(word_count);
Edit {
position,
deleted_length,
inserted_text,
}
}
}

View file

@ -0,0 +1,44 @@
use rand::{
distributions::Alphanumeric,
prelude::{Rng, SeedableRng, StdRng},
};
const OPERATORS: &[char] = &[
'+', '-', '<', '>', '(', ')', '*', '/', '&', '|', '!', ',', '.', '%',
];
pub struct Rand(StdRng);
impl Rand {
#[must_use]
pub fn new(seed: usize) -> Self {
Self(StdRng::seed_from_u64(seed as u64))
}
pub fn unsigned(&mut self, max: usize) -> usize {
self.0.gen_range(0..=max)
}
pub fn words(&mut self, max_count: usize) -> Vec<u8> {
let mut result = Vec::new();
let word_count = self.unsigned(max_count);
for i in 0..word_count {
if i > 0 {
if self.unsigned(5) == 0 {
result.push(b'\n');
} else {
result.push(b' ');
}
}
if self.unsigned(3) == 0 {
let index = self.unsigned(OPERATORS.len() - 1);
result.push(OPERATORS[index] as u8);
} else {
for _ in 0..self.unsigned(8) {
result.push(self.0.sample(Alphanumeric));
}
}
}
result
}
}

View file

@ -0,0 +1,91 @@
use tree_sitter::{Point, Range, Tree};
#[derive(Debug)]
pub struct ScopeSequence(Vec<ScopeStack>);
type ScopeStack = Vec<&'static str>;
impl ScopeSequence {
#[must_use]
pub fn new(tree: &Tree) -> Self {
let mut result = Self(Vec::new());
let mut scope_stack = Vec::new();
let mut cursor = tree.walk();
let mut visited_children = false;
loop {
let node = cursor.node();
for _ in result.0.len()..node.start_byte() {
result.0.push(scope_stack.clone());
}
if visited_children {
for _ in result.0.len()..node.end_byte() {
result.0.push(scope_stack.clone());
}
scope_stack.pop();
if cursor.goto_next_sibling() {
visited_children = false;
} else if !cursor.goto_parent() {
break;
}
} else {
scope_stack.push(cursor.node().kind());
if !cursor.goto_first_child() {
visited_children = true;
}
}
}
result
}
pub fn check_changes(
&self,
other: &Self,
text: &[u8],
known_changed_ranges: &[Range],
) -> Result<(), String> {
let mut position = Point { row: 0, column: 0 };
for i in 0..(self.0.len().max(other.0.len())) {
let stack = &self.0.get(i);
let other_stack = &other.0.get(i);
if *stack != *other_stack && ![b'\r', b'\n'].contains(&text[i]) {
let containing_range = known_changed_ranges
.iter()
.find(|range| range.start_point <= position && position < range.end_point);
if containing_range.is_none() {
let line = &text[(i - position.column)..]
.split(|c| *c == b'\n')
.next()
.unwrap();
return Err(format!(
concat!(
"Position: {}\n",
"Byte offset: {}\n",
"Line: {}\n",
"{}^\n",
"Old scopes: {:?}\n",
"New scopes: {:?}\n",
"Invalidated ranges: {:?}",
),
position,
i,
String::from_utf8_lossy(line),
String::from(" ").repeat(position.column + "Line: ".len()),
stack,
other_stack,
known_changed_ranges,
));
}
}
if text[i] == b'\n' {
position.row += 1;
position.column = 0;
} else {
position.column += 1;
}
}
Ok(())
}
}

510
crates/cli/src/highlight.rs Normal file
View file

@ -0,0 +1,510 @@
use std::{
collections::{BTreeMap, HashSet},
fmt::Write,
fs,
io::{self, Write as _},
path::{self, Path, PathBuf},
str,
sync::{atomic::AtomicUsize, Arc},
time::Instant,
};
use ansi_colours::{ansi256_from_rgb, rgb_from_ansi256};
use anstyle::{Ansi256Color, AnsiColor, Color, Effects, RgbColor};
use anyhow::Result;
use serde::{ser::SerializeMap, Deserialize, Deserializer, Serialize, Serializer};
use serde_json::{json, Value};
use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer};
use tree_sitter_loader::Loader;
pub const HTML_HEAD_HEADER: &str = "
<!doctype HTML>
<head>
<title>Tree-sitter Highlighting</title>
<style>
body {
font-family: monospace
}
.line-number {
user-select: none;
text-align: right;
color: rgba(27,31,35,.3);
padding: 0 10px;
}
.line {
white-space: pre;
}
</style>";
pub const HTML_BODY_HEADER: &str = "
</head>
<body>
";
pub const HTML_FOOTER: &str = "
</body>
";
#[derive(Debug, Default)]
pub struct Style {
pub ansi: anstyle::Style,
pub css: Option<String>,
}
#[derive(Debug)]
pub struct Theme {
pub styles: Vec<Style>,
pub highlight_names: Vec<String>,
}
#[derive(Default, Deserialize, Serialize)]
pub struct ThemeConfig {
#[serde(default)]
pub theme: Theme,
}
impl Theme {
pub fn load(path: &path::Path) -> io::Result<Self> {
let json = fs::read_to_string(path)?;
Ok(serde_json::from_str(&json).unwrap_or_default())
}
#[must_use]
pub fn default_style(&self) -> Style {
Style::default()
}
}
impl<'de> Deserialize<'de> for Theme {
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let mut styles = Vec::new();
let mut highlight_names = Vec::new();
if let Ok(colors) = BTreeMap::<String, Value>::deserialize(deserializer) {
highlight_names.reserve(colors.len());
styles.reserve(colors.len());
for (name, style_value) in colors {
let mut style = Style::default();
parse_style(&mut style, style_value);
highlight_names.push(name);
styles.push(style);
}
}
Ok(Self {
styles,
highlight_names,
})
}
}
impl Serialize for Theme {
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut map = serializer.serialize_map(Some(self.styles.len()))?;
for (name, style) in self.highlight_names.iter().zip(&self.styles) {
let style = &style.ansi;
let color = style.get_fg_color().map(|color| match color {
Color::Ansi(color) => match color {
AnsiColor::Black => json!("black"),
AnsiColor::Blue => json!("blue"),
AnsiColor::Cyan => json!("cyan"),
AnsiColor::Green => json!("green"),
AnsiColor::Magenta => json!("purple"),
AnsiColor::Red => json!("red"),
AnsiColor::White => json!("white"),
AnsiColor::Yellow => json!("yellow"),
_ => unreachable!(),
},
Color::Ansi256(Ansi256Color(n)) => json!(n),
Color::Rgb(RgbColor(r, g, b)) => json!(format!("#{r:x?}{g:x?}{b:x?}")),
});
let effects = style.get_effects();
if effects.contains(Effects::BOLD)
|| effects.contains(Effects::ITALIC)
|| effects.contains(Effects::UNDERLINE)
{
let mut style_json = BTreeMap::new();
if let Some(color) = color {
style_json.insert("color", color);
}
if effects.contains(Effects::BOLD) {
style_json.insert("bold", Value::Bool(true));
}
if effects.contains(Effects::ITALIC) {
style_json.insert("italic", Value::Bool(true));
}
if effects.contains(Effects::UNDERLINE) {
style_json.insert("underline", Value::Bool(true));
}
map.serialize_entry(&name, &style_json)?;
} else if let Some(color) = color {
map.serialize_entry(&name, &color)?;
} else {
map.serialize_entry(&name, &Value::Null)?;
}
}
map.end()
}
}
impl Default for Theme {
fn default() -> Self {
serde_json::from_value(json!({
"attribute": {"color": 124, "italic": true},
"comment": {"color": 245, "italic": true},
"constant": 94,
"constant.builtin": {"color": 94, "bold": true},
"constructor": 136,
"embedded": null,
"function": 26,
"function.builtin": {"color": 26, "bold": true},
"keyword": 56,
"module": 136,
"number": {"color": 94, "bold": true},
"operator": {"color": 239, "bold": true},
"property": 124,
"property.builtin": {"color": 124, "bold": true},
"punctuation": 239,
"punctuation.bracket": 239,
"punctuation.delimiter": 239,
"punctuation.special": 239,
"string": 28,
"string.special": 30,
"tag": 18,
"type": 23,
"type.builtin": {"color": 23, "bold": true},
"variable": 252,
"variable.builtin": {"color": 252, "bold": true},
"variable.parameter": {"color": 252, "underline": true}
}))
.unwrap()
}
}
fn parse_style(style: &mut Style, json: Value) {
if let Value::Object(entries) = json {
for (property_name, value) in entries {
match property_name.as_str() {
"bold" => {
if value == Value::Bool(true) {
style.ansi = style.ansi.bold();
}
}
"italic" => {
if value == Value::Bool(true) {
style.ansi = style.ansi.italic();
}
}
"underline" => {
if value == Value::Bool(true) {
style.ansi = style.ansi.underline();
}
}
"color" => {
if let Some(color) = parse_color(value) {
style.ansi = style.ansi.fg_color(Some(color));
}
}
_ => {}
}
}
style.css = Some(style_to_css(style.ansi));
} else if let Some(color) = parse_color(json) {
style.ansi = style.ansi.fg_color(Some(color));
style.css = Some(style_to_css(style.ansi));
} else {
style.css = None;
}
if let Some(Color::Rgb(RgbColor(red, green, blue))) = style.ansi.get_fg_color() {
if !terminal_supports_truecolor() {
let ansi256 = Color::Ansi256(Ansi256Color(ansi256_from_rgb((red, green, blue))));
style.ansi = style.ansi.fg_color(Some(ansi256));
}
}
}
fn parse_color(json: Value) -> Option<Color> {
match json {
Value::Number(n) => n.as_u64().map(|n| Color::Ansi256(Ansi256Color(n as u8))),
Value::String(s) => match s.to_lowercase().as_str() {
"black" => Some(Color::Ansi(AnsiColor::Black)),
"blue" => Some(Color::Ansi(AnsiColor::Blue)),
"cyan" => Some(Color::Ansi(AnsiColor::Cyan)),
"green" => Some(Color::Ansi(AnsiColor::Green)),
"purple" => Some(Color::Ansi(AnsiColor::Magenta)),
"red" => Some(Color::Ansi(AnsiColor::Red)),
"white" => Some(Color::Ansi(AnsiColor::White)),
"yellow" => Some(Color::Ansi(AnsiColor::Yellow)),
s => {
if let Some((red, green, blue)) = hex_string_to_rgb(s) {
Some(Color::Rgb(RgbColor(red, green, blue)))
} else {
None
}
}
},
_ => None,
}
}
fn hex_string_to_rgb(s: &str) -> Option<(u8, u8, u8)> {
if s.starts_with('#') && s.len() >= 7 {
if let (Ok(red), Ok(green), Ok(blue)) = (
u8::from_str_radix(&s[1..3], 16),
u8::from_str_radix(&s[3..5], 16),
u8::from_str_radix(&s[5..7], 16),
) {
Some((red, green, blue))
} else {
None
}
} else {
None
}
}
fn style_to_css(style: anstyle::Style) -> String {
let mut result = String::new();
let effects = style.get_effects();
if effects.contains(Effects::UNDERLINE) {
write!(&mut result, "text-decoration: underline;").unwrap();
}
if effects.contains(Effects::BOLD) {
write!(&mut result, "font-weight: bold;").unwrap();
}
if effects.contains(Effects::ITALIC) {
write!(&mut result, "font-style: italic;").unwrap();
}
if let Some(color) = style.get_fg_color() {
write_color(&mut result, color);
}
result
}
fn write_color(buffer: &mut String, color: Color) {
match color {
Color::Ansi(color) => match color {
AnsiColor::Black => write!(buffer, "color: black").unwrap(),
AnsiColor::Red => write!(buffer, "color: red").unwrap(),
AnsiColor::Green => write!(buffer, "color: green").unwrap(),
AnsiColor::Yellow => write!(buffer, "color: yellow").unwrap(),
AnsiColor::Blue => write!(buffer, "color: blue").unwrap(),
AnsiColor::Magenta => write!(buffer, "color: purple").unwrap(),
AnsiColor::Cyan => write!(buffer, "color: cyan").unwrap(),
AnsiColor::White => write!(buffer, "color: white").unwrap(),
_ => unreachable!(),
},
Color::Ansi256(Ansi256Color(n)) => {
let (r, g, b) = rgb_from_ansi256(n);
write!(buffer, "color: #{r:02x}{g:02x}{b:02x}").unwrap();
}
Color::Rgb(RgbColor(r, g, b)) => write!(buffer, "color: #{r:02x}{g:02x}{b:02x}").unwrap(),
}
}
fn terminal_supports_truecolor() -> bool {
std::env::var("COLORTERM")
.is_ok_and(|truecolor| truecolor == "truecolor" || truecolor == "24bit")
}
pub struct HighlightOptions {
pub theme: Theme,
pub check: bool,
pub captures_path: Option<PathBuf>,
pub inline_styles: bool,
pub html: bool,
pub quiet: bool,
pub print_time: bool,
pub cancellation_flag: Arc<AtomicUsize>,
}
pub fn highlight(
loader: &Loader,
path: &Path,
name: &str,
config: &HighlightConfiguration,
print_name: bool,
opts: &HighlightOptions,
) -> Result<()> {
if opts.check {
let names = if let Some(path) = opts.captures_path.as_deref() {
let file = fs::read_to_string(path)?;
let capture_names = file
.lines()
.filter_map(|line| {
if line.trim().is_empty() || line.trim().starts_with(';') {
return None;
}
line.split(';').next().map(|s| s.trim().trim_matches('"'))
})
.collect::<HashSet<_>>();
config.nonconformant_capture_names(&capture_names)
} else {
config.nonconformant_capture_names(&HashSet::new())
};
if names.is_empty() {
eprintln!("All highlight captures conform to standards.");
} else {
eprintln!(
"Non-standard highlight {} detected:",
if names.len() > 1 {
"captures"
} else {
"capture"
}
);
for name in names {
eprintln!("* {name}");
}
}
}
let source = fs::read(path)?;
let stdout = io::stdout();
let mut stdout = stdout.lock();
let time = Instant::now();
let mut highlighter = Highlighter::new();
let events =
highlighter.highlight(config, &source, Some(&opts.cancellation_flag), |string| {
loader.highlight_config_for_injection_string(string)
})?;
let theme = &opts.theme;
if !opts.quiet && print_name {
writeln!(&mut stdout, "{name}")?;
}
if opts.html {
if !opts.quiet {
writeln!(&mut stdout, "{HTML_HEAD_HEADER}")?;
writeln!(&mut stdout, " <style>")?;
let names = theme.highlight_names.iter();
let styles = theme.styles.iter();
for (name, style) in names.zip(styles) {
if let Some(css) = &style.css {
writeln!(&mut stdout, " .{name} {{ {css}; }}")?;
}
}
writeln!(&mut stdout, " </style>")?;
writeln!(&mut stdout, "{HTML_BODY_HEADER}")?;
}
let mut renderer = HtmlRenderer::new();
renderer.render(events, &source, &move |highlight, output| {
if opts.inline_styles {
output.extend(b"style='");
output.extend(
theme.styles[highlight.0]
.css
.as_ref()
.map_or_else(|| "".as_bytes(), |css_style| css_style.as_bytes()),
);
output.extend(b"'");
} else {
output.extend(b"class='");
let mut parts = theme.highlight_names[highlight.0].split('.').peekable();
while let Some(part) = parts.next() {
output.extend(part.as_bytes());
if parts.peek().is_some() {
output.extend(b" ");
}
}
output.extend(b"'");
}
})?;
if !opts.quiet {
writeln!(&mut stdout, "<table>")?;
for (i, line) in renderer.lines().enumerate() {
writeln!(
&mut stdout,
"<tr><td class=line-number>{}</td><td class=line>{line}</td></tr>",
i + 1,
)?;
}
writeln!(&mut stdout, "</table>")?;
writeln!(&mut stdout, "{HTML_FOOTER}")?;
}
} else {
let mut style_stack = vec![theme.default_style().ansi];
for event in events {
match event? {
HighlightEvent::HighlightStart(highlight) => {
style_stack.push(theme.styles[highlight.0].ansi);
}
HighlightEvent::HighlightEnd => {
style_stack.pop();
}
HighlightEvent::Source { start, end } => {
let style = style_stack.last().unwrap();
write!(&mut stdout, "{style}").unwrap();
stdout.write_all(&source[start..end])?;
write!(&mut stdout, "{style:#}").unwrap();
}
}
}
}
if opts.print_time {
eprintln!("Time: {}ms", time.elapsed().as_millis());
}
Ok(())
}
#[cfg(test)]
mod tests {
use std::env;
use super::*;
const JUNGLE_GREEN: &str = "#26A69A";
const DARK_CYAN: &str = "#00AF87";
#[test]
fn test_parse_style() {
let original_environment_variable = env::var("COLORTERM");
let mut style = Style::default();
assert_eq!(style.ansi.get_fg_color(), None);
assert_eq!(style.css, None);
// darkcyan is an ANSI color and is preserved
env::set_var("COLORTERM", "");
parse_style(&mut style, Value::String(DARK_CYAN.to_string()));
assert_eq!(
style.ansi.get_fg_color(),
Some(Color::Ansi256(Ansi256Color(36)))
);
assert_eq!(style.css, Some("color: #00af87".to_string()));
// junglegreen is not an ANSI color and is preserved when the terminal supports it
env::set_var("COLORTERM", "truecolor");
parse_style(&mut style, Value::String(JUNGLE_GREEN.to_string()));
assert_eq!(
style.ansi.get_fg_color(),
Some(Color::Rgb(RgbColor(38, 166, 154)))
);
assert_eq!(style.css, Some("color: #26a69a".to_string()));
// junglegreen gets approximated as cadetblue when the terminal does not support it
env::set_var("COLORTERM", "");
parse_style(&mut style, Value::String(JUNGLE_GREEN.to_string()));
assert_eq!(
style.ansi.get_fg_color(),
Some(Color::Ansi256(Ansi256Color(72)))
);
assert_eq!(style.css, Some("color: #26a69a".to_string()));
if let Ok(environment_variable) = original_environment_variable {
env::set_var("COLORTERM", environment_variable);
} else {
env::remove_var("COLORTERM");
}
}
}

1052
crates/cli/src/init.rs Normal file

File diff suppressed because it is too large Load diff

187
crates/cli/src/input.rs Normal file
View file

@ -0,0 +1,187 @@
use std::{
fs,
io::{Read, Write},
path::{Path, PathBuf},
sync::{
atomic::{AtomicUsize, Ordering},
mpsc, Arc,
},
};
use anyhow::{anyhow, bail, Context, Result};
use glob::glob;
use crate::test::{parse_tests, TestEntry};
pub enum CliInput {
Paths(Vec<PathBuf>),
Test {
name: String,
contents: Vec<u8>,
languages: Vec<Box<str>>,
},
Stdin(Vec<u8>),
}
pub fn get_input(
paths_file: Option<&Path>,
paths: Option<Vec<PathBuf>>,
test_number: Option<u32>,
cancellation_flag: &Arc<AtomicUsize>,
) -> Result<CliInput> {
if let Some(paths_file) = paths_file {
return Ok(CliInput::Paths(
fs::read_to_string(paths_file)
.with_context(|| format!("Failed to read paths file {}", paths_file.display()))?
.trim()
.lines()
.map(PathBuf::from)
.collect::<Vec<_>>(),
));
}
if let Some(test_number) = test_number {
let current_dir = std::env::current_dir().unwrap();
let test_dir = current_dir.join("test").join("corpus");
if !test_dir.exists() {
return Err(anyhow!(
"Test corpus directory not found in current directory, see https://tree-sitter.github.io/tree-sitter/creating-parsers/5-writing-tests"
));
}
let test_entry = parse_tests(&test_dir)?;
let mut test_num = 0;
let Some((name, contents, languages)) =
get_test_info(&test_entry, test_number.max(1) - 1, &mut test_num)
else {
return Err(anyhow!("Failed to fetch contents of test #{test_number}"));
};
return Ok(CliInput::Test {
name,
contents,
languages,
});
}
if let Some(paths) = paths {
let mut result = Vec::new();
let mut incorporate_path = |path: PathBuf, positive| {
if positive {
result.push(path);
} else if let Some(index) = result.iter().position(|p| *p == path) {
result.remove(index);
}
};
for mut path in paths {
let mut positive = true;
if path.starts_with("!") {
positive = false;
path = path.strip_prefix("!").unwrap().to_path_buf();
}
if path.exists() {
incorporate_path(path, positive);
} else {
let Some(path_str) = path.to_str() else {
bail!("Invalid path: {}", path.display());
};
let paths = glob(path_str)
.with_context(|| format!("Invalid glob pattern {}", path.display()))?;
for path in paths {
incorporate_path(path?, positive);
}
}
}
if result.is_empty() {
return Err(anyhow!(
"No files were found at or matched by the provided pathname/glob"
));
}
return Ok(CliInput::Paths(result));
}
let reader_flag = cancellation_flag.clone();
let (tx, rx) = mpsc::channel();
// Spawn a thread to read from stdin, until ctrl-c or EOF is received
std::thread::spawn(move || {
let mut input = Vec::new();
let stdin = std::io::stdin();
let mut handle = stdin.lock();
// Read in chunks, so we can check the ctrl-c flag
loop {
if reader_flag.load(Ordering::Relaxed) == 1 {
break;
}
let mut buffer = [0; 1024];
match handle.read(&mut buffer) {
Ok(0) | Err(_) => break,
Ok(n) => input.extend_from_slice(&buffer[..n]),
}
}
// Signal to the main thread that we're done
tx.send(input).ok();
});
loop {
// If we've received a ctrl-c signal, exit
if cancellation_flag.load(Ordering::Relaxed) == 1 {
bail!("\n");
}
// If we're done receiving input from stdin, return it
if let Ok(input) = rx.try_recv() {
return Ok(CliInput::Stdin(input));
}
std::thread::sleep(std::time::Duration::from_millis(50));
}
}
#[allow(clippy::type_complexity)]
pub fn get_test_info(
test_entry: &TestEntry,
target_test: u32,
test_num: &mut u32,
) -> Option<(String, Vec<u8>, Vec<Box<str>>)> {
match test_entry {
TestEntry::Example {
name,
input,
attributes,
..
} => {
if *test_num == target_test {
return Some((name.clone(), input.clone(), attributes.languages.clone()));
}
*test_num += 1;
}
TestEntry::Group { children, .. } => {
for child in children {
if let Some((name, input, languages)) = get_test_info(child, target_test, test_num)
{
return Some((name, input, languages));
}
}
}
}
None
}
/// Writes `contents` to a temporary file and returns the path to that file.
pub fn get_tmp_source_file(contents: &[u8]) -> Result<PathBuf> {
let parse_path = std::env::temp_dir().join(".tree-sitter-temp");
let mut parse_file = std::fs::File::create(&parse_path)?;
parse_file.write_all(contents)?;
Ok(parse_path)
}

30
crates/cli/src/logger.rs Normal file
View file

@ -0,0 +1,30 @@
use log::{LevelFilter, Log, Metadata, Record};
#[allow(dead_code)]
struct Logger {
pub filter: Option<String>,
}
impl Log for Logger {
fn enabled(&self, _: &Metadata) -> bool {
true
}
fn log(&self, record: &Record) {
eprintln!(
"[{}] {}",
record
.module_path()
.unwrap_or_default()
.trim_start_matches("rust_tree_sitter_cli::"),
record.args()
);
}
fn flush(&self) {}
}
pub fn init() {
log::set_boxed_logger(Box::new(Logger { filter: None })).unwrap();
log::set_max_level(LevelFilter::Info);
}

1803
crates/cli/src/main.rs Normal file

File diff suppressed because it is too large Load diff

1110
crates/cli/src/parse.rs Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,410 @@
<head>
<meta charset="utf-8">
<title>tree-sitter THE_LANGUAGE_NAME</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/codemirror/6.65.7/codemirror.min.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/clusterize.js/0.19.0/clusterize.min.css">
<link rel="icon" type="image/png" href="https://tree-sitter.github.io/tree-sitter/assets/images/favicon-32x32.png"
sizes="32x32" />
<link rel="icon" type="image/png" href="https://tree-sitter.github.io/tree-sitter/assets/images/favicon-16x16.png"
sizes="16x16" />
</head>
<body>
<div id="playground-container" style="visibility: hidden;">
<header>
<div class="header-item">
<span class="language-name">Language: THE_LANGUAGE_NAME</span>
</div>
<div class="header-item">
<input id="logging-checkbox" type="checkbox">
<label for="logging-checkbox">log</label>
</div>
<div class="header-item">
<input id="anonymous-nodes-checkbox" type="checkbox">
<label for="anonymous-nodes-checkbox">show anonymous nodes</label>
</div>
<div class="header-item">
<input id="query-checkbox" type="checkbox">
<label for="query-checkbox">query</label>
</div>
<div class="header-item">
<input id="accessibility-checkbox" type="checkbox">
<label for="accessibility-checkbox">accessibility</label>
</div>
<div class="header-item">
<label for="update-time">parse time: </label>
<span id="update-time"></span>
</div>
<div class="header-item">
<a href="https://tree-sitter.github.io/tree-sitter/7-playground.html#about">(?)</a>
</div>
<select id="language-select" style="display: none;">
<option value="parser">Parser</option>
</select>
<div class="header-item">
<button id="theme-toggle" class="theme-toggle" aria-label="Toggle theme">
<svg class="sun-icon" viewBox="0 0 24 24" width="16" height="16">
<path fill="currentColor"
d="M12 17.5a5.5 5.5 0 1 0 0-11 5.5 5.5 0 0 0 0 11zm0 1.5a7 7 0 1 1 0-14 7 7 0 0 1 0 14zm0-16a1 1 0 0 1 1 1v2a1 1 0 1 1-2 0V4a1 1 0 0 1 1-1zm0 15a1 1 0 0 1 1 1v2a1 1 0 1 1-2 0v-2a1 1 0 0 1 1-1zm9-9a1 1 0 0 1-1 1h-2a1 1 0 1 1 0-2h2a1 1 0 0 1 1 1zM4 12a1 1 0 0 1-1 1H1a1 1 0 1 1 0-2h2a1 1 0 0 1 1 1z" />
</svg>
<svg class="moon-icon" viewBox="0 0 24 24" width="16" height="16">
<path fill="currentColor"
d="M12.1 22c-5.5 0-10-4.5-10-10s4.5-10 10-10c.2 0 .3 0 .5.1-1.3 1.4-2 3.2-2 5.2 0 4.1 3.4 7.5 7.5 7.5 2 0 3.8-.7 5.2-2 .1.2.1.3.1.5 0 5.4-4.5 9.7-10 9.7z" />
</svg>
</button>
</div>
</header>
<main>
<div id="input-pane">
<div class="panel-header">Code</div>
<div id="code-container">
<textarea id="code-input"></textarea>
</div>
<div id="query-container" style="visibility: hidden; position: absolute;">
<div class="panel-header">Query</div>
<textarea id="query-input"></textarea>
</div>
</div>
<div id="output-container-scroll">
<div class="panel-header">Tree</div>
<pre id="output-container" class="highlight"></pre>
</div>
</main>
</div>
<script src="https://code.jquery.com/jquery-3.3.1.min.js" crossorigin="anonymous">
</script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/codemirror/6.65.7/codemirror.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/clusterize.js/0.19.0/clusterize.min.js"></script>
<script>LANGUAGE_BASE_URL = "";</script>
<script type="module" src="playground.js"></script>
<script type="module">
import * as TreeSitter from './web-tree-sitter.js';
window.TreeSitter = TreeSitter;
setTimeout(() => window.initializePlayground({local: true}), 1)
</script>
<style>
/* Base Variables */
:root {
--light-bg: #f9f9f9;
--light-border: #e0e0e0;
--light-text: #333;
--light-hover-border: #c1c1c1;
--light-scrollbar-track: #f1f1f1;
--light-scrollbar-thumb: #c1c1c1;
--light-scrollbar-thumb-hover: #a8a8a8;
--dark-bg: #1d1f21;
--dark-border: #2d2d2d;
--dark-text: #c5c8c6;
--dark-panel-bg: #252526;
--dark-code-bg: #1e1e1e;
--dark-scrollbar-track: #25282c;
--dark-scrollbar-thumb: #4a4d51;
--dark-scrollbar-thumb-hover: #5a5d61;
--primary-color: #0550ae;
--primary-color-alpha: rgba(5, 80, 174, 0.1);
--primary-color-alpha-dark: rgba(121, 192, 255, 0.1);
--selection-color: rgba(39, 95, 255, 0.3);
}
/* Theme Colors */
[data-theme="dark"] {
--bg-color: var(--dark-bg);
--border-color: var(--dark-border);
--text-color: var(--dark-text);
--panel-bg: var(--dark-panel-bg);
--code-bg: var(--dark-code-bg);
}
[data-theme="light"] {
--bg-color: var(--light-bg);
--border-color: var(--light-border);
--text-color: var(--light-text);
--panel-bg: white;
--code-bg: white;
}
/* Base Styles */
body {
margin: 0;
padding: 0;
font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
background-color: var(--bg-color);
color: var(--text-color);
}
/* Layout */
#playground-container {
width: 100%;
height: 100vh;
display: flex;
flex-direction: column;
background-color: var(--bg-color);
}
header {
padding: 16px 24px;
border-bottom: 1px solid var(--border-color);
display: flex;
align-items: center;
gap: 20px;
background-color: var(--panel-bg);
font-size: 14px;
}
.header-item {
display: flex;
align-items: center;
gap: 8px;
}
.language-name {
font-weight: 600;
}
main {
flex: 1;
display: flex;
overflow: hidden;
}
#input-pane {
width: 50%;
display: flex;
flex-direction: column;
border-right: 1px solid var(--border-color);
background-color: var(--panel-bg);
overflow: hidden;
}
#code-container {
flex: 1;
min-height: 0;
position: relative;
border-bottom: 1px solid var(--border-color);
display: flex;
flex-direction: column;
}
#query-container:not([style*="visibility: hidden"]) {
flex: 1;
min-height: 0;
display: flex;
flex-direction: column;
}
#query-container .panel-header {
flex: 0 0 auto;
}
#query-container .CodeMirror {
flex: 1;
position: relative;
min-height: 0;
}
#output-container-scroll {
width: 50%;
overflow: auto;
background-color: var(--panel-bg);
padding: 0;
display: flex;
flex-direction: column;
}
#output-container {
font-family: ui-monospace, "SF Mono", Menlo, Consolas, monospace;
line-height: 1.5;
margin: 0;
padding: 16px;
}
.panel-header {
padding: 8px 16px;
font-weight: 600;
font-size: 14px;
border-bottom: 1px solid var(--border-color);
background-color: var(--panel-bg);
}
.CodeMirror {
position: absolute;
top: 0;
left: 0;
right: 0;
bottom: 0;
height: 100%;
font-family: ui-monospace, "SF Mono", Menlo, Consolas, monospace;
font-size: 14px;
line-height: 1.6;
background-color: var(--code-bg) !important;
color: var(--text-color) !important;
}
.query-error {
text-decoration: underline red dashed;
-webkit-text-decoration: underline red dashed;
}
/* Scrollbars */
::-webkit-scrollbar {
width: 8px;
height: 8px;
}
::-webkit-scrollbar-track {
border-radius: 4px;
background: var(--light-scrollbar-track);
}
::-webkit-scrollbar-thumb {
border-radius: 4px;
background: var(--light-scrollbar-thumb);
}
::-webkit-scrollbar-thumb:hover {
background: var(--light-scrollbar-thumb-hover);
}
[data-theme="dark"] {
::-webkit-scrollbar-track {
background: var(--dark-scrollbar-track) !important;
}
::-webkit-scrollbar-thumb {
background: var(--dark-scrollbar-thumb) !important;
}
::-webkit-scrollbar-thumb:hover {
background: var(--dark-scrollbar-thumb-hover) !important;
}
}
/* Theme Toggle */
.theme-toggle {
background: none;
border: 1px solid var(--border-color);
border-radius: 4px;
padding: 6px;
cursor: pointer;
color: var(--text-color);
}
.theme-toggle:hover {
background-color: var(--primary-color-alpha);
}
[data-theme="light"] .moon-icon,
[data-theme="dark"] .sun-icon {
display: none;
}
/* Form Elements */
input[type="checkbox"] {
margin-right: 6px;
vertical-align: middle;
}
label {
font-size: 14px;
margin-right: 16px;
cursor: pointer;
}
#output-container a {
cursor: pointer;
text-decoration: none;
color: #040404;
padding: 2px;
}
#output-container a:hover {
text-decoration: underline;
}
#output-container a.node-link.named {
color: #0550ae;
}
#output-container a.node-link.anonymous {
color: #116329;
}
#output-container a.node-link.anonymous:before {
content: '"';
}
#output-container a.node-link.anonymous:after {
content: '"';
}
#output-container a.node-link.error {
color: #cf222e;
}
#output-container a.highlighted {
background-color: #d9d9d9;
color: red;
border-radius: 3px;
text-decoration: underline;
}
/* Dark Theme Node Colors */
[data-theme="dark"] {
& #output-container a {
color: #d4d4d4;
}
& #output-container a.node-link.named {
color: #79c0ff;
}
& #output-container a.node-link.anonymous {
color: #7ee787;
}
& #output-container a.node-link.error {
color: #ff7b72;
}
& #output-container a.highlighted {
background-color: #373b41;
color: red;
}
& .CodeMirror {
background-color: var(--dark-code-bg) !important;
color: var(--dark-text) !important;
}
& .CodeMirror-gutters {
background-color: var(--dark-panel-bg) !important;
border-color: var(--dark-border) !important;
}
& .CodeMirror-cursor {
border-color: var(--dark-text) !important;
}
& .CodeMirror-selected {
background-color: rgba(255, 255, 255, 0.1) !important;
}
}
</style>
</body>

View file

@ -0,0 +1,144 @@
use std::{
borrow::Cow,
env, fs,
net::TcpListener,
path::{Path, PathBuf},
str::{self, FromStr as _},
};
use anyhow::{anyhow, Context, Result};
use tiny_http::{Header, Response, Server};
use super::wasm;
macro_rules! optional_resource {
($name:tt, $path:tt) => {
#[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
fn $name(tree_sitter_dir: Option<&Path>) -> Cow<'static, [u8]> {
if let Some(tree_sitter_dir) = tree_sitter_dir {
Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap())
} else {
Cow::Borrowed(include_bytes!(concat!("../../", $path)))
}
}
#[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))]
fn $name(tree_sitter_dir: Option<&Path>) -> Cow<'static, [u8]> {
if let Some(tree_sitter_dir) = tree_sitter_dir {
Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap())
} else {
Cow::Borrowed(&[])
}
}
};
}
optional_resource!(get_playground_js, "docs/src/assets/js/playground.js");
optional_resource!(get_lib_js, "lib/binding_web/web-tree-sitter.js");
optional_resource!(get_lib_wasm, "lib/binding_web/web-tree-sitter.wasm");
fn get_main_html(tree_sitter_dir: Option<&Path>) -> Cow<'static, [u8]> {
tree_sitter_dir.map_or(
Cow::Borrowed(include_bytes!("playground.html")),
|tree_sitter_dir| {
Cow::Owned(fs::read(tree_sitter_dir.join("cli/src/playground.html")).unwrap())
},
)
}
pub fn serve(grammar_path: &Path, open_in_browser: bool) -> Result<()> {
let server = get_server()?;
let (grammar_name, language_wasm) = wasm::load_language_wasm_file(grammar_path)?;
let url = format!("http://{}", server.server_addr());
println!("Started playground on: {url}");
if open_in_browser && webbrowser::open(&url).is_err() {
eprintln!("Failed to open '{url}' in a web browser");
}
let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok();
let main_html = str::from_utf8(&get_main_html(tree_sitter_dir.as_deref()))
.unwrap()
.replace("THE_LANGUAGE_NAME", &grammar_name)
.into_bytes();
let playground_js = get_playground_js(tree_sitter_dir.as_deref());
let lib_js = get_lib_js(tree_sitter_dir.as_deref());
let lib_wasm = get_lib_wasm(tree_sitter_dir.as_deref());
let html_header = Header::from_str("Content-Type: text/html").unwrap();
let js_header = Header::from_str("Content-Type: application/javascript").unwrap();
let wasm_header = Header::from_str("Content-Type: application/wasm").unwrap();
for request in server.incoming_requests() {
let res = match request.url() {
"/" => response(&main_html, &html_header),
"/tree-sitter-parser.wasm" => response(&language_wasm, &wasm_header),
"/playground.js" => {
if playground_js.is_empty() {
redirect("https://tree-sitter.github.io/tree-sitter/assets/js/playground.js")
} else {
response(&playground_js, &js_header)
}
}
"/web-tree-sitter.js" => {
if lib_js.is_empty() {
redirect("https://tree-sitter.github.io/web-tree-sitter.js")
} else {
response(&lib_js, &js_header)
}
}
"/web-tree-sitter.wasm" => {
if lib_wasm.is_empty() {
redirect("https://tree-sitter.github.io/web-tree-sitter.wasm")
} else {
response(&lib_wasm, &wasm_header)
}
}
_ => response(b"Not found", &html_header).with_status_code(404),
};
request
.respond(res)
.with_context(|| "Failed to write HTTP response")?;
}
Ok(())
}
fn redirect(url: &str) -> Response<&[u8]> {
Response::empty(302)
.with_data("".as_bytes(), Some(0))
.with_header(Header::from_bytes("Location", url.as_bytes()).unwrap())
}
fn response<'a>(data: &'a [u8], header: &Header) -> Response<&'a [u8]> {
Response::empty(200)
.with_data(data, Some(data.len()))
.with_header(header.clone())
}
fn get_server() -> Result<Server> {
let addr = env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or_else(|_| "127.0.0.1".to_owned());
let port = env::var("TREE_SITTER_PLAYGROUND_PORT")
.map(|v| {
v.parse::<u16>()
.with_context(|| "Invalid port specification")
})
.ok();
let listener = match port {
Some(port) => {
bind_to(&addr, port?).with_context(|| "Failed to bind to the specified port")?
}
None => get_listener_on_available_port(&addr)
.with_context(|| "Failed to find a free port to bind to it")?,
};
let server =
Server::from_listener(listener, None).map_err(|_| anyhow!("Failed to start web server"))?;
Ok(server)
}
fn get_listener_on_available_port(addr: &str) -> Option<TcpListener> {
(8000..12000).find_map(|port| bind_to(addr, port))
}
fn bind_to(addr: &str, port: u16) -> Option<TcpListener> {
TcpListener::bind(format!("{addr}:{port}")).ok()
}

148
crates/cli/src/query.rs Normal file
View file

@ -0,0 +1,148 @@
use std::{
fs,
io::{self, Write},
ops::Range,
path::Path,
time::Instant,
};
use anstyle::AnsiColor;
use anyhow::{Context, Result};
use streaming_iterator::StreamingIterator;
use tree_sitter::{Language, Parser, Point, Query, QueryCursor};
use crate::{
query_testing::{self, to_utf8_point},
test::paint,
};
#[allow(clippy::too_many_arguments)]
pub fn query_file_at_path(
language: &Language,
path: &Path,
name: &str,
query_path: &Path,
ordered_captures: bool,
byte_range: Option<Range<usize>>,
point_range: Option<Range<Point>>,
should_test: bool,
quiet: bool,
print_time: bool,
stdin: bool,
) -> Result<()> {
let stdout = io::stdout();
let mut stdout = stdout.lock();
let query_source = fs::read_to_string(query_path)
.with_context(|| format!("Error reading query file {}", query_path.display()))?;
let query = Query::new(language, &query_source).with_context(|| "Query compilation failed")?;
let mut query_cursor = QueryCursor::new();
if let Some(range) = byte_range {
query_cursor.set_byte_range(range);
}
if let Some(range) = point_range {
query_cursor.set_point_range(range);
}
let mut parser = Parser::new();
parser.set_language(language)?;
let mut results = Vec::new();
if !should_test && !stdin {
writeln!(&mut stdout, "{name}")?;
}
let source_code =
fs::read(path).with_context(|| format!("Error reading source file {}", path.display()))?;
let tree = parser.parse(&source_code, None).unwrap();
let start = Instant::now();
if ordered_captures {
let mut captures = query_cursor.captures(&query, tree.root_node(), source_code.as_slice());
while let Some((mat, capture_index)) = captures.next() {
let capture = mat.captures[*capture_index];
let capture_name = &query.capture_names()[capture.index as usize];
if !quiet && !should_test {
writeln!(
&mut stdout,
" pattern: {:>2}, capture: {} - {capture_name}, start: {}, end: {}, text: `{}`",
mat.pattern_index,
capture.index,
capture.node.start_position(),
capture.node.end_position(),
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
}
results.push(query_testing::CaptureInfo {
name: (*capture_name).to_string(),
start: to_utf8_point(capture.node.start_position(), source_code.as_slice()),
end: to_utf8_point(capture.node.end_position(), source_code.as_slice()),
});
}
} else {
let mut matches = query_cursor.matches(&query, tree.root_node(), source_code.as_slice());
while let Some(m) = matches.next() {
if !quiet && !should_test {
writeln!(&mut stdout, " pattern: {}", m.pattern_index)?;
}
for capture in m.captures {
let start = capture.node.start_position();
let end = capture.node.end_position();
let capture_name = &query.capture_names()[capture.index as usize];
if !quiet && !should_test {
if end.row == start.row {
writeln!(
&mut stdout,
" capture: {} - {capture_name}, start: {start}, end: {end}, text: `{}`",
capture.index,
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
} else {
writeln!(
&mut stdout,
" capture: {capture_name}, start: {start}, end: {end}",
)?;
}
}
results.push(query_testing::CaptureInfo {
name: (*capture_name).to_string(),
start: to_utf8_point(capture.node.start_position(), source_code.as_slice()),
end: to_utf8_point(capture.node.end_position(), source_code.as_slice()),
});
}
}
}
if query_cursor.did_exceed_match_limit() {
writeln!(
&mut stdout,
" WARNING: Query exceeded maximum number of in-progress captures!"
)?;
}
if should_test {
let path_name = if stdin {
"stdin"
} else {
Path::new(&path).file_name().unwrap().to_str().unwrap()
};
match query_testing::assert_expected_captures(&results, path, &mut parser, language) {
Ok(assertion_count) => {
println!(
" ✓ {} ({} assertions)",
paint(Some(AnsiColor::Green), path_name),
assertion_count
);
}
Err(e) => {
println!("{}", paint(Some(AnsiColor::Red), path_name));
return Err(e);
}
}
}
if print_time {
writeln!(&mut stdout, "{:?}", start.elapsed())?;
}
Ok(())
}

View file

@ -0,0 +1,254 @@
use std::{fs, path::Path, sync::LazyLock};
use anyhow::{anyhow, Result};
use bstr::{BStr, ByteSlice};
use regex::Regex;
use tree_sitter::{Language, Parser, Point};
static CAPTURE_NAME_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new("[\\w_\\-.]+").unwrap());
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Utf8Point {
pub row: usize,
pub column: usize,
}
impl std::fmt::Display for Utf8Point {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "({}, {})", self.row, self.column)
}
}
impl Utf8Point {
#[must_use]
pub const fn new(row: usize, column: usize) -> Self {
Self { row, column }
}
}
#[must_use]
pub fn to_utf8_point(point: Point, source: &[u8]) -> Utf8Point {
if point.column == 0 {
return Utf8Point::new(point.row, 0);
}
let bstr = BStr::new(source);
let line = bstr.lines_with_terminator().nth(point.row).unwrap();
let mut utf8_column = 0;
for (_, grapheme_end, _) in line.grapheme_indices() {
utf8_column += 1;
if grapheme_end >= point.column {
break;
}
}
Utf8Point {
row: point.row,
column: utf8_column,
}
}
#[derive(Debug, Eq, PartialEq)]
pub struct CaptureInfo {
pub name: String,
pub start: Utf8Point,
pub end: Utf8Point,
}
#[derive(Debug, PartialEq, Eq)]
pub struct Assertion {
pub position: Utf8Point,
pub length: usize,
pub negative: bool,
pub expected_capture_name: String,
}
impl Assertion {
#[must_use]
pub const fn new(
row: usize,
col: usize,
length: usize,
negative: bool,
expected_capture_name: String,
) -> Self {
Self {
position: Utf8Point::new(row, col),
length,
negative,
expected_capture_name,
}
}
}
/// Parse the given source code, finding all of the comments that contain
/// highlighting assertions. Return a vector of (position, expected highlight name)
/// pairs.
pub fn parse_position_comments(
parser: &mut Parser,
language: &Language,
source: &[u8],
) -> Result<Vec<Assertion>> {
let mut result = Vec::new();
let mut assertion_ranges = Vec::new();
// Parse the code.
parser.set_included_ranges(&[]).unwrap();
parser.set_language(language).unwrap();
let tree = parser.parse(source, None).unwrap();
// Walk the tree, finding comment nodes that contain assertions.
let mut ascending = false;
let mut cursor = tree.root_node().walk();
loop {
if ascending {
let node = cursor.node();
// Find every comment node.
if node.kind().to_lowercase().contains("comment") {
if let Ok(text) = node.utf8_text(source) {
let mut position = node.start_position();
if position.row > 0 {
// Find the arrow character ("^" or "<-") in the comment. A left arrow
// refers to the column where the comment node starts. An up arrow refers
// to its own column.
let mut has_left_caret = false;
let mut has_arrow = false;
let mut negative = false;
let mut arrow_end = 0;
let mut arrow_count = 1;
for (i, c) in text.char_indices() {
arrow_end = i + 1;
if c == '-' && has_left_caret {
has_arrow = true;
break;
}
if c == '^' {
has_arrow = true;
position.column += i;
// Continue counting remaining arrows and update their end column
for (_, c) in text[arrow_end..].char_indices() {
if c != '^' {
arrow_end += arrow_count - 1;
break;
}
arrow_count += 1;
}
break;
}
has_left_caret = c == '<';
}
// find any ! after arrows but before capture name
if has_arrow {
for (i, c) in text[arrow_end..].char_indices() {
if c == '!' {
negative = true;
arrow_end += i + 1;
break;
} else if !c.is_whitespace() {
break;
}
}
}
// If the comment node contains an arrow and a highlight name, record the
// highlight name and the position.
if let (true, Some(mat)) =
(has_arrow, CAPTURE_NAME_REGEX.find(&text[arrow_end..]))
{
assertion_ranges.push((node.start_position(), node.end_position()));
result.push(Assertion {
position: to_utf8_point(position, source),
length: arrow_count,
negative,
expected_capture_name: mat.as_str().to_string(),
});
}
}
}
}
// Continue walking the tree.
if cursor.goto_next_sibling() {
ascending = false;
} else if !cursor.goto_parent() {
break;
}
} else if !cursor.goto_first_child() {
ascending = true;
}
}
// Adjust the row number in each assertion's position to refer to the line of
// code *above* the assertion. There can be multiple lines of assertion comments and empty
// lines, so the positions may have to be decremented by more than one row.
let mut i = 0;
let lines = source.lines_with_terminator().collect::<Vec<_>>();
for assertion in &mut result {
let original_position = assertion.position;
loop {
let on_assertion_line = assertion_ranges[i..]
.iter()
.any(|(start, _)| start.row == assertion.position.row);
let on_empty_line = lines[assertion.position.row].len() <= assertion.position.column;
if on_assertion_line || on_empty_line {
if assertion.position.row > 0 {
assertion.position.row -= 1;
} else {
return Err(anyhow!(
"Error: could not find a line that corresponds to the assertion `{}` located at {original_position}",
assertion.expected_capture_name
));
}
} else {
while i < assertion_ranges.len()
&& assertion_ranges[i].0.row < assertion.position.row
{
i += 1;
}
break;
}
}
}
// The assertions can end up out of order due to the line adjustments.
result.sort_unstable_by_key(|a| a.position);
Ok(result)
}
pub fn assert_expected_captures(
infos: &[CaptureInfo],
path: &Path,
parser: &mut Parser,
language: &Language,
) -> Result<usize> {
let contents = fs::read_to_string(path)?;
let pairs = parse_position_comments(parser, language, contents.as_bytes())?;
for assertion in &pairs {
if let Some(found) = &infos.iter().find(|p| {
assertion.position >= p.start
&& (assertion.position.row < p.end.row
|| assertion.position.column + assertion.length - 1 < p.end.column)
}) {
if assertion.expected_capture_name != found.name && found.name != "name" {
return Err(anyhow!(
"Assertion failed: at {}, found {}, expected {}",
found.start,
found.name,
assertion.expected_capture_name,
));
}
} else {
return Err(anyhow!(
"Assertion failed: could not match {} at row {}, column {}",
assertion.expected_capture_name,
assertion.position.row,
assertion.position.column + assertion.length - 1,
));
}
}
Ok(pairs.len())
}

78
crates/cli/src/tags.rs Normal file
View file

@ -0,0 +1,78 @@
use std::{
fs,
io::{self, Write},
path::Path,
str,
sync::{atomic::AtomicUsize, Arc},
time::Instant,
};
use anyhow::Result;
use tree_sitter_tags::{TagsConfiguration, TagsContext};
pub struct TagsOptions {
pub scope: Option<String>,
pub quiet: bool,
pub print_time: bool,
pub cancellation_flag: Arc<AtomicUsize>,
}
pub fn generate_tags(
path: &Path,
name: &str,
config: &TagsConfiguration,
indent: bool,
opts: &TagsOptions,
) -> Result<()> {
let mut context = TagsContext::new();
let stdout = io::stdout();
let mut stdout = stdout.lock();
let indent_str = if indent {
if !opts.quiet {
writeln!(&mut stdout, "{name}")?;
}
"\t"
} else {
""
};
let source = fs::read(path)?;
let start = Instant::now();
for tag in context
.generate_tags(config, &source, Some(&opts.cancellation_flag))?
.0
{
let tag = tag?;
if !opts.quiet {
write!(
&mut stdout,
"{indent_str}{:<10}\t | {:<8}\t{} {} - {} `{}`",
str::from_utf8(&source[tag.name_range]).unwrap_or(""),
&config.syntax_type_name(tag.syntax_type_id),
if tag.is_definition { "def" } else { "ref" },
tag.span.start,
tag.span.end,
str::from_utf8(&source[tag.line_range]).unwrap_or(""),
)?;
if let Some(docs) = tag.docs {
if docs.len() > 120 {
write!(&mut stdout, "\t{:?}...", docs.get(0..120).unwrap_or(""))?;
} else {
write!(&mut stdout, "\t{:?}", &docs)?;
}
}
writeln!(&mut stdout)?;
}
}
if opts.print_time {
writeln!(
&mut stdout,
"{indent_str}time: {}ms",
start.elapsed().as_millis(),
)?;
}
Ok(())
}

View file

@ -0,0 +1,46 @@
root = true
[*]
charset = utf-8
[*.{json,toml,yml,gyp}]
indent_style = space
indent_size = 2
[*.js]
indent_style = space
indent_size = 2
[*.scm]
indent_style = space
indent_size = 2
[*.{c,cc,h}]
indent_style = space
indent_size = 4
[*.rs]
indent_style = space
indent_size = 4
[*.{py,pyi}]
indent_style = space
indent_size = 4
[*.swift]
indent_style = space
indent_size = 4
[*.go]
indent_style = tab
indent_size = 8
[Makefile]
indent_style = tab
indent_size = 8
[parser.c]
indent_size = 2
[{alloc,array,parser}.h]
indent_size = 2

View file

@ -0,0 +1,16 @@
#ifndef TREE_SITTER_UPPER_PARSER_NAME_H_
#define TREE_SITTER_UPPER_PARSER_NAME_H_
typedef struct TSLanguage TSLanguage;
#ifdef __cplusplus
extern "C" {
#endif
const TSLanguage *tree_sitter_PARSER_NAME(void);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_UPPER_PARSER_NAME_H_

View file

@ -0,0 +1,10 @@
prefix=@CMAKE_INSTALL_PREFIX@
libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@
includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
Name: tree-sitter-PARSER_NAME
Description: @PROJECT_DESCRIPTION@
URL: @PROJECT_HOMEPAGE_URL@
Version: @PROJECT_VERSION@
Libs: -L${libdir} -ltree-sitter-PARSER_NAME
Cflags: -I${includedir}

View file

@ -0,0 +1,42 @@
"""PARSER_DESCRIPTION"""
from importlib.resources import files as _files
from ._binding import language
def _get_query(name, file):
query = _files(f"{__package__}.queries") / file
globals()[name] = query.read_text()
return globals()[name]
def __getattr__(name):
# NOTE: uncomment these to include any queries that this grammar contains:
# if name == "HIGHLIGHTS_QUERY":
# return _get_query("HIGHLIGHTS_QUERY", "highlights.scm")
# if name == "INJECTIONS_QUERY":
# return _get_query("INJECTIONS_QUERY", "injections.scm")
# if name == "LOCALS_QUERY":
# return _get_query("LOCALS_QUERY", "locals.scm")
# if name == "TAGS_QUERY":
# return _get_query("TAGS_QUERY", "tags.scm")
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
__all__ = [
"language",
# "HIGHLIGHTS_QUERY",
# "INJECTIONS_QUERY",
# "LOCALS_QUERY",
# "TAGS_QUERY",
]
def __dir__():
return sorted(__all__ + [
"__all__", "__builtins__", "__cached__", "__doc__", "__file__",
"__loader__", "__name__", "__package__", "__path__", "__spec__",
])

View file

@ -0,0 +1,10 @@
from typing import Final
# NOTE: uncomment these to include any queries that this grammar contains:
# HIGHLIGHTS_QUERY: Final[str]
# INJECTIONS_QUERY: Final[str]
# LOCALS_QUERY: Final[str]
# TAGS_QUERY: Final[str]
def language() -> object: ...

View file

@ -0,0 +1,34 @@
[package]
name = "tree-sitter-PARSER_NAME"
description = "PARSER_DESCRIPTION"
version = "PARSER_VERSION"
authors = ["PARSER_AUTHOR_NAME PARSER_AUTHOR_EMAIL"]
license = "PARSER_LICENSE"
readme = "README.md"
keywords = ["incremental", "parsing", "tree-sitter", "PARSER_NAME"]
categories = ["parser-implementations", "parsing", "text-editors"]
repository = "PARSER_URL"
edition = "2021"
autoexamples = false
build = "bindings/rust/build.rs"
include = [
"bindings/rust/*",
"grammar.js",
"queries/*",
"src/*",
"tree-sitter.json",
"LICENSE",
]
[lib]
path = "bindings/rust/lib.rs"
[dependencies]
tree-sitter-language = "0.1"
[build-dependencies]
cc = "1.2"
[dev-dependencies]
tree-sitter = "RUST_BINDING_VERSION"

View file

@ -0,0 +1,15 @@
package tree_sitter_LOWER_PARSER_NAME
// #cgo CFLAGS: -std=c11 -fPIC
// #include "../../src/parser.c"
// #if __has_include("../../src/scanner.c")
// #include "../../src/scanner.c"
// #endif
import "C"
import "unsafe"
// Get the tree-sitter Language for this grammar.
func Language() unsafe.Pointer {
return unsafe.Pointer(C.tree_sitter_LOWER_PARSER_NAME())
}

View file

@ -0,0 +1,35 @@
{
"targets": [
{
"target_name": "tree_sitter_PARSER_NAME_binding",
"dependencies": [
"<!(node -p \"require('node-addon-api').targets\"):node_addon_api_except",
],
"include_dirs": [
"src",
],
"sources": [
"bindings/node/binding.cc",
"src/parser.c",
],
"variables": {
"has_scanner": "<!(node -p \"fs.existsSync('src/scanner.c')\")"
},
"conditions": [
["has_scanner=='true'", {
"sources+": ["src/scanner.c"],
}],
["OS!='win'", {
"cflags_c": [
"-std=c11",
],
}, { # OS == "win"
"cflags_c": [
"/std:c11",
"/utf-8",
],
}],
],
}
]
}

View file

@ -0,0 +1,15 @@
package tree_sitter_LOWER_PARSER_NAME_test
import (
"testing"
tree_sitter "github.com/tree-sitter/go-tree-sitter"
tree_sitter_LOWER_PARSER_NAME "PARSER_URL_STRIPPED/bindings/go"
)
func TestCanLoadGrammar(t *testing.T) {
language := tree_sitter.NewLanguage(tree_sitter_LOWER_PARSER_NAME.Language())
if language == nil {
t.Errorf("Error loading TITLE_PARSER_NAME grammar")
}
}

View file

@ -0,0 +1,9 @@
const assert = require("node:assert");
const { test } = require("node:test");
const Parser = require("tree-sitter");
test("can load grammar", () => {
const parser = new Parser();
assert.doesNotThrow(() => parser.setLanguage(require(".")));
});

View file

@ -0,0 +1,21 @@
fn main() {
let src_dir = std::path::Path::new("src");
let mut c_config = cc::Build::new();
c_config.std("c11").include(src_dir);
#[cfg(target_env = "msvc")]
c_config.flag("-utf-8");
let parser_path = src_dir.join("parser.c");
c_config.file(&parser_path);
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
let scanner_path = src_dir.join("scanner.c");
if scanner_path.exists() {
c_config.file(&scanner_path);
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
}
c_config.compile("tree-sitter-KEBAB_PARSER_NAME");
}

View file

@ -0,0 +1,79 @@
const std = @import("std");
pub fn build(b: *std.Build) !void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
const shared = b.option(bool, "build-shared", "Build a shared library") orelse true;
const reuse_alloc = b.option(bool, "reuse-allocator", "Reuse the library allocator") orelse false;
const lib: *std.Build.Step.Compile = if (shared) b.addSharedLibrary(.{
.name = "tree-sitter-PARSER_NAME",
.pic = true,
.target = target,
.optimize = optimize,
.link_libc = true,
}) else b.addStaticLibrary(.{
.name = "tree-sitter-PARSER_NAME",
.target = target,
.optimize = optimize,
.link_libc = true,
});
lib.addCSourceFile(.{
.file = b.path("src/parser.c"),
.flags = &.{"-std=c11"},
});
if (hasScanner(b.build_root.handle)) {
lib.addCSourceFile(.{
.file = b.path("src/scanner.c"),
.flags = &.{"-std=c11"},
});
}
if (reuse_alloc) {
lib.root_module.addCMacro("TREE_SITTER_REUSE_ALLOCATOR", "");
}
if (optimize == .Debug) {
lib.root_module.addCMacro("TREE_SITTER_DEBUG", "");
}
lib.addIncludePath(b.path("src"));
b.installArtifact(lib);
b.installFile("src/node-types.json", "node-types.json");
b.installDirectory(.{ .source_dir = b.path("queries"), .install_dir = .prefix, .install_subdir = "queries", .include_extensions = &.{"scm"} });
const module = b.addModule("tree-sitter-PARSER_NAME", .{
.root_source_file = b.path("bindings/zig/root.zig"),
.target = target,
.optimize = optimize,
});
module.linkLibrary(lib);
const ts_dep = b.dependency("tree-sitter", .{});
const ts_mod = ts_dep.module("tree-sitter");
module.addImport("tree-sitter", ts_mod);
//
// Tests
//
const tests = b.addTest(.{
.root_source_file = b.path("bindings/zig/root.zig"),
.target = target,
.optimize = optimize,
});
tests.linkLibrary(lib);
tests.root_module.addImport("tree-sitter", ts_mod);
const run_tests = b.addRunArtifact(tests);
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&run_tests.step);
}
inline fn hasScanner(dir: std.fs.Dir) bool {
dir.access("src/scanner.c", .{}) catch return false;
return true;
}

View file

@ -0,0 +1,17 @@
.{
.name = "tree-sitter-PARSER_NAME",
.version = "PARSER_VERSION",
.dependencies = .{ .@"tree-sitter" = .{
.url = "https://github.com/tree-sitter/zig-tree-sitter/archive/refs/tags/v0.25.0.tar.gz",
.hash = "12201a8d5e840678bbbf5128e605519c4024af422295d68e2ba2090e675328e5811d",
} },
.paths = .{
"build.zig",
"build.zig.zon",
"bindings/zig",
"src",
"queries",
"LICENSE",
"README.md",
},
}

View file

@ -0,0 +1,66 @@
cmake_minimum_required(VERSION 3.13)
project(tree-sitter-KEBAB_PARSER_NAME
VERSION "PARSER_VERSION"
DESCRIPTION "PARSER_DESCRIPTION"
HOMEPAGE_URL "PARSER_URL"
LANGUAGES C)
option(BUILD_SHARED_LIBS "Build using shared libraries" ON)
option(TREE_SITTER_REUSE_ALLOCATOR "Reuse the library allocator" OFF)
set(TREE_SITTER_ABI_VERSION ABI_VERSION_MAX CACHE STRING "Tree-sitter ABI version")
if(NOT ${TREE_SITTER_ABI_VERSION} MATCHES "^[0-9]+$")
unset(TREE_SITTER_ABI_VERSION CACHE)
message(FATAL_ERROR "TREE_SITTER_ABI_VERSION must be an integer")
endif()
include(GNUInstallDirs)
find_program(TREE_SITTER_CLI tree-sitter DOC "Tree-sitter CLI")
add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/src/parser.c"
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/grammar.json"
COMMAND "${TREE_SITTER_CLI}" generate src/grammar.json
--abi=${TREE_SITTER_ABI_VERSION}
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
COMMENT "Generating parser.c")
add_library(tree-sitter-KEBAB_PARSER_NAME src/parser.c)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/src/scanner.c)
target_sources(tree-sitter-KEBAB_PARSER_NAME PRIVATE src/scanner.c)
endif()
target_include_directories(tree-sitter-KEBAB_PARSER_NAME
PRIVATE src
INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/bindings/c>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
target_compile_definitions(tree-sitter-KEBAB_PARSER_NAME PRIVATE
$<$<BOOL:${TREE_SITTER_REUSE_ALLOCATOR}>:TREE_SITTER_REUSE_ALLOCATOR>
$<$<CONFIG:Debug>:TREE_SITTER_DEBUG>)
set_target_properties(tree-sitter-KEBAB_PARSER_NAME
PROPERTIES
C_STANDARD 11
POSITION_INDEPENDENT_CODE ON
SOVERSION "${TREE_SITTER_ABI_VERSION}.${PROJECT_VERSION_MAJOR}"
DEFINE_SYMBOL "")
configure_file(bindings/c/tree-sitter-KEBAB_PARSER_NAME.pc.in
"${CMAKE_CURRENT_BINARY_DIR}/tree-sitter-KEBAB_PARSER_NAME.pc" @ONLY)
install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/bindings/c/tree_sitter"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
FILES_MATCHING PATTERN "*.h")
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/tree-sitter-KEBAB_PARSER_NAME.pc"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
install(TARGETS tree-sitter-KEBAB_PARSER_NAME
LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}")
file(GLOB QUERIES queries/*.scm)
install(FILES ${QUERIES}
DESTINATION "${CMAKE_INSTALL_DATADIR}/tree-sitter/queries/KEBAB_PARSER_NAME")
add_custom_target(ts-test "${TREE_SITTER_CLI}" test
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
COMMENT "tree-sitter test")

View file

@ -0,0 +1,41 @@
* text=auto eol=lf
# Generated source files
src/*.json linguist-generated
src/parser.c linguist-generated
src/tree_sitter/* linguist-generated
# C bindings
bindings/c/** linguist-generated
CMakeLists.txt linguist-generated
Makefile linguist-generated
# Rust bindings
bindings/rust/* linguist-generated
Cargo.toml linguist-generated
Cargo.lock linguist-generated
# Node.js bindings
bindings/node/* linguist-generated
binding.gyp linguist-generated
package.json linguist-generated
package-lock.json linguist-generated
# Python bindings
bindings/python/** linguist-generated
setup.py linguist-generated
pyproject.toml linguist-generated
# Go bindings
bindings/go/* linguist-generated
go.mod linguist-generated
go.sum linguist-generated
# Swift bindings
bindings/swift/** linguist-generated
Package.swift linguist-generated
Package.resolved linguist-generated
# Zig bindings
build.zig linguist-generated
build.zig.zon linguist-generated

View file

@ -0,0 +1,50 @@
# Rust artifacts
target/
Cargo.lock
# Node artifacts
build/
prebuilds/
node_modules/
package-lock.json
# Swift artifacts
.build/
Package.resolved
# Go artifacts
_obj/
# Python artifacts
.venv/
dist/
*.egg-info
*.whl
# C artifacts
*.a
*.so
*.so.*
*.dylib
*.dll
*.pc
*.exp
*.lib
# Zig artifacts
.zig-cache/
zig-cache/
zig-out/
# Example dirs
/examples/*/
# Grammar volatiles
*.wasm
*.obj
*.o
# Archives
*.tar.gz
*.tgz
*.zip

View file

@ -0,0 +1,5 @@
module PARSER_URL_STRIPPED
go 1.22
require github.com/tree-sitter/go-tree-sitter v0.24.0

View file

@ -0,0 +1,17 @@
/**
* @file PARSER_DESCRIPTION
* @author PARSER_AUTHOR_NAME PARSER_AUTHOR_EMAIL
* @license PARSER_LICENSE
*/
/// <reference types="tree-sitter-cli/dsl" />
// @ts-check
module.exports = grammar({
name: "LOWER_PARSER_NAME",
rules: {
// TODO: add the actual grammar rules
source_file: $ => "hello"
}
});

27
crates/cli/src/templates/index.d.ts vendored Normal file
View file

@ -0,0 +1,27 @@
type BaseNode = {
type: string;
named: boolean;
};
type ChildNode = {
multiple: boolean;
required: boolean;
types: BaseNode[];
};
type NodeInfo =
| (BaseNode & {
subtypes: BaseNode[];
})
| (BaseNode & {
fields: { [name: string]: ChildNode };
children: ChildNode[];
});
type Language = {
language: unknown;
nodeTypeInfo: NodeInfo[];
};
declare const language: Language;
export = language;

View file

@ -0,0 +1,11 @@
const root = require("path").join(__dirname, "..", "..");
module.exports =
typeof process.versions.bun === "string"
// Support `bun build --compile` by being statically analyzable enough to find the .node file at build-time
? require(`../../prebuilds/${process.platform}-${process.arch}/tree-sitter-KEBAB_PARSER_NAME.node`)
: require("node-gyp-build")(root);
try {
module.exports.nodeTypeInfo = require("../../src/node-types.json");
} catch (_) {}

View file

@ -0,0 +1,19 @@
#include <napi.h>
typedef struct TSLanguage TSLanguage;
extern "C" TSLanguage *tree_sitter_PARSER_NAME();
// "tree-sitter", "language" hashed with BLAKE2
const napi_type_tag LANGUAGE_TYPE_TAG = {
0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16
};
Napi::Object Init(Napi::Env env, Napi::Object exports) {
auto language = Napi::External<TSLanguage>::New(env, tree_sitter_PARSER_NAME());
language.TypeTag(&LANGUAGE_TYPE_TAG);
exports["language"] = language;
return exports;
}
NODE_API_MODULE(tree_sitter_PARSER_NAME_binding, Init)

View file

@ -0,0 +1,51 @@
//! This crate provides CAMEL_PARSER_NAME language support for the [tree-sitter] parsing library.
//!
//! Typically, you will use the [`LANGUAGE`] constant to add this language to a
//! tree-sitter [`Parser`], and then use the parser to parse some code:
//!
//! ```
//! let code = r#"
//! "#;
//! let mut parser = tree_sitter::Parser::new();
//! let language = tree_sitter_PARSER_NAME::LANGUAGE;
//! parser
//! .set_language(&language.into())
//! .expect("Error loading TITLE_PARSER_NAME parser");
//! let tree = parser.parse(code, None).unwrap();
//! assert!(!tree.root_node().has_error());
//! ```
//!
//! [`Parser`]: https://docs.rs/tree-sitter/RUST_BINDING_VERSION/tree_sitter/struct.Parser.html
//! [tree-sitter]: https://tree-sitter.github.io/
use tree_sitter_language::LanguageFn;
extern "C" {
fn tree_sitter_PARSER_NAME() -> *const ();
}
/// The tree-sitter [`LanguageFn`] for this grammar.
pub const LANGUAGE: LanguageFn = unsafe { LanguageFn::from_raw(tree_sitter_PARSER_NAME) };
/// The content of the [`node-types.json`] file for this grammar.
///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers/6-static-node-types
pub const NODE_TYPES: &str = include_str!("../../src/node-types.json");
// NOTE: uncomment these to include any queries that this grammar contains:
// pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm");
// pub const INJECTIONS_QUERY: &str = include_str!("../../queries/injections.scm");
// pub const LOCALS_QUERY: &str = include_str!("../../queries/locals.scm");
// pub const TAGS_QUERY: &str = include_str!("../../queries/tags.scm");
#[cfg(test)]
mod tests {
#[test]
fn test_can_load_grammar() {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&super::LANGUAGE.into())
.expect("Error loading TITLE_PARSER_NAME parser");
}
}

View file

@ -0,0 +1,99 @@
LANGUAGE_NAME := tree-sitter-KEBAB_PARSER_NAME
HOMEPAGE_URL := PARSER_URL
VERSION := PARSER_VERSION
# repository
SRC_DIR := src
TS ?= tree-sitter
# install directory layout
PREFIX ?= /usr/local
DATADIR ?= $(PREFIX)/share
INCLUDEDIR ?= $(PREFIX)/include
LIBDIR ?= $(PREFIX)/lib
PCLIBDIR ?= $(LIBDIR)/pkgconfig
# source/object files
PARSER := $(SRC_DIR)/parser.c
EXTRAS := $(filter-out $(PARSER),$(wildcard $(SRC_DIR)/*.c))
OBJS := $(patsubst %.c,%.o,$(PARSER) $(EXTRAS))
# flags
ARFLAGS ?= rcs
override CFLAGS += -I$(SRC_DIR) -std=c11 -fPIC
# ABI versioning
SONAME_MAJOR = $(shell sed -n 's/\#define LANGUAGE_VERSION //p' $(PARSER))
SONAME_MINOR = $(word 1,$(subst ., ,$(VERSION)))
# OS-specific bits
ifeq ($(shell uname),Darwin)
SOEXT = dylib
SOEXTVER_MAJOR = $(SONAME_MAJOR).$(SOEXT)
SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).$(SOEXT)
LINKSHARED = -dynamiclib -Wl,-install_name,$(LIBDIR)/lib$(LANGUAGE_NAME).$(SOEXTVER),-rpath,@executable_path/../Frameworks
else ifneq ($(findstring mingw32,$(shell $(CC) -dumpmachine)),)
SOEXT = dll
LINKSHARED += -s -shared -Wl,--out-implib,$(@:dll=lib)
lib$(LANGUAGE_NAME).lib: lib$(LANGUAGE_NAME).$(SOEXT)
else
SOEXT = so
SOEXTVER_MAJOR = $(SOEXT).$(SONAME_MAJOR)
SOEXTVER = $(SOEXT).$(SONAME_MAJOR).$(SONAME_MINOR)
LINKSHARED = -shared -Wl,-soname,lib$(LANGUAGE_NAME).$(SOEXTVER)
endif
ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),)
PCLIBDIR := $(PREFIX)/libdata/pkgconfig
endif
all: lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) $(LANGUAGE_NAME).pc
lib$(LANGUAGE_NAME).a: $(OBJS)
$(AR) $(ARFLAGS) $@ $^
lib$(LANGUAGE_NAME).$(SOEXT): $(OBJS)
$(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@
ifneq ($(STRIP),)
$(STRIP) $@
endif
$(LANGUAGE_NAME).pc: bindings/c/$(LANGUAGE_NAME).pc.in
sed -e 's|@PROJECT_VERSION@|$(VERSION)|' \
-e 's|@CMAKE_INSTALL_LIBDIR@|$(LIBDIR:$(PREFIX)/%=%)|' \
-e 's|@CMAKE_INSTALL_INCLUDEDIR@|$(INCLUDEDIR:$(PREFIX)/%=%)|' \
-e 's|@PROJECT_DESCRIPTION@|$(DESCRIPTION)|' \
-e 's|@PROJECT_HOMEPAGE_URL@|$(HOMEPAGE_URL)|' \
-e 's|@CMAKE_INSTALL_PREFIX@|$(PREFIX)|' $< > $@
$(PARSER): $(SRC_DIR)/grammar.json
$(TS) generate $^
install: all
install -d '$(DESTDIR)$(DATADIR)'/tree-sitter/queries/KEBAB_PARSER_NAME '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)'
install -m644 bindings/c/tree_sitter/$(LANGUAGE_NAME).h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h
install -m644 $(LANGUAGE_NAME).pc '$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc
install -m644 lib$(LANGUAGE_NAME).a '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a
install -m755 lib$(LANGUAGE_NAME).$(SOEXT) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER)
ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR)
ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT)
ifneq ($(wildcard queries/*.scm),)
install -m644 queries/*.scm '$(DESTDIR)$(DATADIR)'/tree-sitter/queries/KEBAB_PARSER_NAME
endif
uninstall:
$(RM) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a \
'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) \
'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) \
'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT) \
'$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h \
'$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc
$(RM) -r '$(DESTDIR)$(DATADIR)'/tree-sitter/queries/KEBAB_PARSER_NAME
clean:
$(RM) $(OBJS) $(LANGUAGE_NAME).pc lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) lib$(LANGUAGE_NAME).lib
test:
$(TS) test
.PHONY: all install uninstall clean test

View file

@ -0,0 +1,53 @@
{
"name": "tree-sitter-PARSER_NAME",
"version": "PARSER_VERSION",
"description": "PARSER_DESCRIPTION",
"repository": "PARSER_URL",
"funding": "FUNDING_URL",
"license": "PARSER_LICENSE",
"author": {
"name": "PARSER_AUTHOR_NAME",
"email": "PARSER_AUTHOR_EMAIL",
"url": "PARSER_AUTHOR_URL"
},
"main": "bindings/node",
"types": "bindings/node",
"keywords": [
"incremental",
"parsing",
"tree-sitter",
"LOWER_PARSER_NAME"
],
"files": [
"grammar.js",
"tree-sitter.json",
"binding.gyp",
"prebuilds/**",
"bindings/node/*",
"queries/*",
"src/**",
"*.wasm"
],
"dependencies": {
"node-addon-api": "^8.2.1",
"node-gyp-build": "^4.8.2"
},
"devDependencies": {
"prebuildify": "^6.0.1",
"tree-sitter-cli": "^CLI_VERSION"
},
"peerDependencies": {
"tree-sitter": "^0.21.1"
},
"peerDependenciesMeta": {
"tree-sitter": {
"optional": true
}
},
"scripts": {
"install": "node-gyp-build",
"prestart": "tree-sitter build --wasm",
"start": "tree-sitter playground",
"test": "node --test bindings/node/*_test.js"
}
}

View file

@ -0,0 +1,41 @@
// swift-tools-version:5.3
import Foundation
import PackageDescription
var sources = ["src/parser.c"]
if FileManager.default.fileExists(atPath: "src/scanner.c") {
sources.append("src/scanner.c")
}
let package = Package(
name: "PARSER_CLASS_NAME",
products: [
.library(name: "PARSER_CLASS_NAME", targets: ["PARSER_CLASS_NAME"]),
],
dependencies: [
.package(url: "https://github.com/tree-sitter/swift-tree-sitter", from: "0.8.0"),
],
targets: [
.target(
name: "PARSER_CLASS_NAME",
dependencies: [],
path: ".",
sources: sources,
resources: [
.copy("queries")
],
publicHeadersPath: "bindings/swift",
cSettings: [.headerSearchPath("src")]
),
.testTarget(
name: "PARSER_CLASS_NAMETests",
dependencies: [
"SwiftTreeSitter",
"PARSER_CLASS_NAME",
],
path: "bindings/swift/PARSER_CLASS_NAMETests"
)
],
cLanguageStandard: .c11
)

View file

@ -0,0 +1,35 @@
#include <Python.h>
typedef struct TSLanguage TSLanguage;
TSLanguage *tree_sitter_LOWER_PARSER_NAME(void);
static PyObject* _binding_language(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) {
return PyCapsule_New(tree_sitter_LOWER_PARSER_NAME(), "tree_sitter.Language", NULL);
}
static struct PyModuleDef_Slot slots[] = {
#ifdef Py_GIL_DISABLED
{Py_mod_gil, Py_MOD_GIL_NOT_USED},
#endif
{0, NULL}
};
static PyMethodDef methods[] = {
{"language", _binding_language, METH_NOARGS,
"Get the tree-sitter language for this grammar."},
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef module = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = "_binding",
.m_doc = NULL,
.m_size = 0,
.m_methods = methods,
.m_slots = slots,
};
PyMODINIT_FUNC PyInit__binding(void) {
return PyModuleDef_Init(&module);
}

View file

@ -0,0 +1,30 @@
[build-system]
requires = ["setuptools>=42", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "tree-sitter-PARSER_NAME"
description = "PARSER_DESCRIPTION"
version = "PARSER_VERSION"
keywords = ["incremental", "parsing", "tree-sitter", "PARSER_NAME"]
classifiers = [
"Intended Audience :: Developers",
"Topic :: Software Development :: Compilers",
"Topic :: Text Processing :: Linguistic",
"Typing :: Typed",
]
authors = [{ name = "PARSER_AUTHOR_NAME", email = "PARSER_AUTHOR_EMAIL" }]
requires-python = ">=3.10"
license.text = "PARSER_LICENSE"
readme = "README.md"
[project.urls]
Homepage = "PARSER_URL"
Funding = "FUNDING_URL"
[project.optional-dependencies]
core = ["tree-sitter~=0.24"]
[tool.cibuildwheel]
build = "cp310-*"
build-frontend = "build"

View file

@ -0,0 +1,19 @@
const testing = @import("std").testing;
const ts = @import("tree-sitter");
const Language = ts.Language;
const Parser = ts.Parser;
pub extern fn tree_sitter_PARSER_NAME() callconv(.C) *const Language;
pub export fn language() *const Language {
return tree_sitter_PARSER_NAME();
}
test "can load grammar" {
const parser = Parser.create();
defer parser.destroy();
try testing.expectEqual(parser.setLanguage(language()), void{});
try testing.expectEqual(parser.getLanguage(), tree_sitter_PARSER_NAME());
}

View file

@ -0,0 +1,77 @@
from os import path
from platform import system
from sysconfig import get_config_var
from setuptools import Extension, find_packages, setup
from setuptools.command.build import build
from setuptools.command.egg_info import egg_info
from wheel.bdist_wheel import bdist_wheel
sources = [
"bindings/python/tree_sitter_LOWER_PARSER_NAME/binding.c",
"src/parser.c",
]
if path.exists("src/scanner.c"):
sources.append("src/scanner.c")
macros: list[tuple[str, str | None]] = [
("PY_SSIZE_T_CLEAN", None),
("TREE_SITTER_HIDE_SYMBOLS", None),
]
if limited_api := not get_config_var("Py_GIL_DISABLED"):
macros.append(("Py_LIMITED_API", "0x030A0000"))
if system() != "Windows":
cflags = ["-std=c11", "-fvisibility=hidden"]
else:
cflags = ["/std:c11", "/utf-8"]
class Build(build):
def run(self):
if path.isdir("queries"):
dest = path.join(self.build_lib, "tree_sitter_PARSER_NAME", "queries")
self.copy_tree("queries", dest)
super().run()
class BdistWheel(bdist_wheel):
def get_tag(self):
python, abi, platform = super().get_tag()
if python.startswith("cp"):
python, abi = "cp310", "abi3"
return python, abi, platform
class EggInfo(egg_info):
def find_sources(self):
super().find_sources()
self.filelist.recursive_include("queries", "*.scm")
self.filelist.include("src/tree_sitter/*.h")
setup(
packages=find_packages("bindings/python"),
package_dir={"": "bindings/python"},
package_data={
"tree_sitter_LOWER_PARSER_NAME": ["*.pyi", "py.typed"],
"tree_sitter_LOWER_PARSER_NAME.queries": ["*.scm"],
},
ext_package="tree_sitter_LOWER_PARSER_NAME",
ext_modules=[
Extension(
name="_binding",
sources=sources,
extra_compile_args=cflags,
define_macros=macros,
include_dirs=["src"],
py_limited_api=limited_api,
)
],
cmdclass={
"build": Build,
"bdist_wheel": BdistWheel,
"egg_info": EggInfo,
},
zip_safe=False
)

View file

@ -0,0 +1,12 @@
from unittest import TestCase
import tree_sitter
import tree_sitter_LOWER_PARSER_NAME
class TestLanguage(TestCase):
def test_can_load_grammar(self):
try:
tree_sitter.Language(tree_sitter_LOWER_PARSER_NAME.language())
except Exception:
self.fail("Error loading TITLE_PARSER_NAME grammar")

View file

@ -0,0 +1,12 @@
import XCTest
import SwiftTreeSitter
import PARSER_CLASS_NAME
final class PARSER_CLASS_NAMETests: XCTestCase {
func testCanLoadGrammar() throws {
let parser = Parser()
let language = Language(language: tree_sitter_LOWER_PARSER_NAME())
XCTAssertNoThrow(try parser.setLanguage(language),
"Error loading TITLE_PARSER_NAME grammar")
}
}

1584
crates/cli/src/test.rs Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,276 @@
use std::{fs, path::Path};
use anstyle::AnsiColor;
use anyhow::{anyhow, Result};
use tree_sitter::Point;
use tree_sitter_highlight::{Highlight, HighlightConfiguration, HighlightEvent, Highlighter};
use tree_sitter_loader::{Config, Loader};
use super::{
query_testing::{parse_position_comments, to_utf8_point, Assertion, Utf8Point},
test::paint,
util,
};
#[derive(Debug)]
pub struct Failure {
row: usize,
column: usize,
expected_highlight: String,
actual_highlights: Vec<String>,
}
impl std::error::Error for Failure {}
impl std::fmt::Display for Failure {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
f,
"Failure - row: {}, column: {}, expected highlight '{}', actual highlights: ",
self.row, self.column, self.expected_highlight
)?;
if self.actual_highlights.is_empty() {
write!(f, "none.")?;
} else {
for (i, actual_highlight) in self.actual_highlights.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "'{actual_highlight}'")?;
}
}
Ok(())
}
}
pub fn test_highlights(
loader: &Loader,
loader_config: &Config,
highlighter: &mut Highlighter,
directory: &Path,
use_color: bool,
) -> Result<()> {
println!("syntax highlighting:");
test_highlights_indented(loader, loader_config, highlighter, directory, use_color, 2)
}
fn test_highlights_indented(
loader: &Loader,
loader_config: &Config,
highlighter: &mut Highlighter,
directory: &Path,
use_color: bool,
indent_level: usize,
) -> Result<()> {
let mut failed = false;
for highlight_test_file in fs::read_dir(directory)? {
let highlight_test_file = highlight_test_file?;
let test_file_path = highlight_test_file.path();
let test_file_name = highlight_test_file.file_name();
print!(
"{indent:indent_level$}",
indent = "",
indent_level = indent_level * 2
);
if test_file_path.is_dir() && test_file_path.read_dir()?.next().is_some() {
println!("{}:", test_file_name.to_string_lossy());
if test_highlights_indented(
loader,
loader_config,
highlighter,
&test_file_path,
use_color,
indent_level + 1,
)
.is_err()
{
failed = true;
}
} else {
let (language, language_config) = loader
.language_configuration_for_file_name(&test_file_path)?
.ok_or_else(|| {
anyhow!(
"{}",
util::lang_not_found_for_path(test_file_path.as_path(), loader_config)
)
})?;
let highlight_config = language_config
.highlight_config(language, None)?
.ok_or_else(|| anyhow!("No highlighting config found for {test_file_path:?}"))?;
match test_highlight(
loader,
highlighter,
highlight_config,
fs::read(&test_file_path)?.as_slice(),
) {
Ok(assertion_count) => {
println!(
"✓ {} ({assertion_count} assertions)",
paint(
use_color.then_some(AnsiColor::Green),
test_file_name.to_string_lossy().as_ref()
),
);
}
Err(e) => {
println!(
"✗ {}",
paint(
use_color.then_some(AnsiColor::Red),
test_file_name.to_string_lossy().as_ref()
)
);
println!(
"{indent:indent_level$} {e}",
indent = "",
indent_level = indent_level * 2
);
failed = true;
}
}
}
}
if failed {
Err(anyhow!(""))
} else {
Ok(())
}
}
pub fn iterate_assertions(
assertions: &[Assertion],
highlights: &[(Utf8Point, Utf8Point, Highlight)],
highlight_names: &[String],
) -> Result<usize> {
// Iterate through all of the highlighting assertions, checking each one against the
// actual highlights.
let mut i = 0;
let mut actual_highlights = Vec::new();
for Assertion {
position,
length,
negative,
expected_capture_name: expected_highlight,
} in assertions
{
let mut passed = false;
let mut end_column = position.column + length - 1;
actual_highlights.clear();
// The assertions are ordered by position, so skip past all of the highlights that
// end at or before this assertion's position.
'highlight_loop: while let Some(highlight) = highlights.get(i) {
if highlight.1 <= *position {
i += 1;
continue;
}
// Iterate through all of the highlights that start at or before this assertion's
// position, looking for one that matches the assertion.
let mut j = i;
while let (false, Some(highlight)) = (passed, highlights.get(j)) {
end_column = position.column + length - 1;
if highlight.0.row >= position.row && highlight.0.column > end_column {
break 'highlight_loop;
}
// If the highlight matches the assertion, or if the highlight doesn't
// match the assertion but it's negative, this test passes. Otherwise,
// add this highlight to the list of actual highlights that span the
// assertion's position, in order to generate an error message in the event
// of a failure.
let highlight_name = &highlight_names[(highlight.2).0];
if (*highlight_name == *expected_highlight) == *negative {
actual_highlights.push(highlight_name);
} else {
passed = true;
break 'highlight_loop;
}
j += 1;
}
}
if !passed {
return Err(Failure {
row: position.row,
column: end_column,
expected_highlight: expected_highlight.clone(),
actual_highlights: actual_highlights.into_iter().cloned().collect(),
}
.into());
}
}
Ok(assertions.len())
}
pub fn test_highlight(
loader: &Loader,
highlighter: &mut Highlighter,
highlight_config: &HighlightConfiguration,
source: &[u8],
) -> Result<usize> {
// Highlight the file, and parse out all of the highlighting assertions.
let highlight_names = loader.highlight_names();
let highlights = get_highlight_positions(loader, highlighter, highlight_config, source)?;
let assertions =
parse_position_comments(highlighter.parser(), &highlight_config.language, source)?;
iterate_assertions(&assertions, &highlights, &highlight_names)
}
pub fn get_highlight_positions(
loader: &Loader,
highlighter: &mut Highlighter,
highlight_config: &HighlightConfiguration,
source: &[u8],
) -> Result<Vec<(Utf8Point, Utf8Point, Highlight)>> {
let mut row = 0;
let mut column = 0;
let mut byte_offset = 0;
let mut was_newline = false;
let mut result = Vec::new();
let mut highlight_stack = Vec::new();
let source = String::from_utf8_lossy(source);
let mut char_indices = source.char_indices();
for event in highlighter.highlight(highlight_config, source.as_bytes(), None, |string| {
loader.highlight_config_for_injection_string(string)
})? {
match event? {
HighlightEvent::HighlightStart(h) => highlight_stack.push(h),
HighlightEvent::HighlightEnd => {
highlight_stack.pop();
}
HighlightEvent::Source { start, end } => {
let mut start_position = Point::new(row, column);
while byte_offset < end {
if byte_offset <= start {
start_position = Point::new(row, column);
}
if let Some((i, c)) = char_indices.next() {
if was_newline {
row += 1;
column = 0;
} else {
column += i - byte_offset;
}
was_newline = c == '\n';
byte_offset = i;
} else {
break;
}
}
if let Some(highlight) = highlight_stack.last() {
let utf8_start_position = to_utf8_point(start_position, source.as_bytes());
let utf8_end_position =
to_utf8_point(Point::new(row, column), source.as_bytes());
result.push((utf8_start_position, utf8_end_position, *highlight));
}
}
}
}
Ok(result)
}

229
crates/cli/src/test_tags.rs Normal file
View file

@ -0,0 +1,229 @@
use std::{fs, path::Path};
use anstyle::AnsiColor;
use anyhow::{anyhow, Result};
use tree_sitter_loader::{Config, Loader};
use tree_sitter_tags::{TagsConfiguration, TagsContext};
use super::{
query_testing::{parse_position_comments, to_utf8_point, Assertion, Utf8Point},
test::paint,
util,
};
#[derive(Debug)]
pub struct Failure {
row: usize,
column: usize,
expected_tag: String,
actual_tags: Vec<String>,
}
impl std::error::Error for Failure {}
impl std::fmt::Display for Failure {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
f,
"Failure - row: {}, column: {}, expected tag: '{}', actual tag: ",
self.row, self.column, self.expected_tag
)?;
if self.actual_tags.is_empty() {
write!(f, "none.")?;
} else {
for (i, actual_tag) in self.actual_tags.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "'{actual_tag}'")?;
}
}
Ok(())
}
}
pub fn test_tags(
loader: &Loader,
loader_config: &Config,
tags_context: &mut TagsContext,
directory: &Path,
use_color: bool,
) -> Result<()> {
println!("tags:");
test_tags_indented(loader, loader_config, tags_context, directory, use_color, 2)
}
pub fn test_tags_indented(
loader: &Loader,
loader_config: &Config,
tags_context: &mut TagsContext,
directory: &Path,
use_color: bool,
indent_level: usize,
) -> Result<()> {
let mut failed = false;
for tag_test_file in fs::read_dir(directory)? {
let tag_test_file = tag_test_file?;
let test_file_path = tag_test_file.path();
let test_file_name = tag_test_file.file_name();
print!(
"{indent:indent_level$}",
indent = "",
indent_level = indent_level * 2
);
if test_file_path.is_dir() && test_file_path.read_dir()?.next().is_some() {
println!("{}:", test_file_name.to_string_lossy());
if test_tags_indented(
loader,
loader_config,
tags_context,
&test_file_path,
use_color,
indent_level + 1,
)
.is_err()
{
failed = true;
}
} else {
let (language, language_config) = loader
.language_configuration_for_file_name(&test_file_path)?
.ok_or_else(|| {
anyhow!(
"{}",
util::lang_not_found_for_path(test_file_path.as_path(), loader_config)
)
})?;
let tags_config = language_config
.tags_config(language)?
.ok_or_else(|| anyhow!("No tags config found for {test_file_path:?}"))?;
match test_tag(
tags_context,
tags_config,
fs::read(&test_file_path)?.as_slice(),
) {
Ok(assertion_count) => {
println!(
"✓ {} ({assertion_count} assertions)",
paint(
use_color.then_some(AnsiColor::Green),
test_file_name.to_string_lossy().as_ref()
),
);
}
Err(e) => {
println!(
"✗ {}",
paint(
use_color.then_some(AnsiColor::Red),
test_file_name.to_string_lossy().as_ref()
)
);
println!(
"{indent:indent_level$} {e}",
indent = "",
indent_level = indent_level * 2
);
failed = true;
}
}
}
}
if failed {
Err(anyhow!(""))
} else {
Ok(())
}
}
pub fn test_tag(
tags_context: &mut TagsContext,
tags_config: &TagsConfiguration,
source: &[u8],
) -> Result<usize> {
let tags = get_tag_positions(tags_context, tags_config, source)?;
let assertions = parse_position_comments(tags_context.parser(), &tags_config.language, source)?;
// Iterate through all of the assertions, checking against the actual tags.
let mut i = 0;
let mut actual_tags = Vec::<&String>::new();
for Assertion {
position,
length,
negative,
expected_capture_name: expected_tag,
} in &assertions
{
let mut passed = false;
let mut end_column = position.column + length - 1;
'tag_loop: while let Some(tag) = tags.get(i) {
if tag.1 <= *position {
i += 1;
continue;
}
// Iterate through all of the tags that start at or before this assertion's
// position, looking for one that matches the assertion
let mut j = i;
while let (false, Some(tag)) = (passed, tags.get(j)) {
end_column = position.column + length - 1;
if tag.0.column > end_column {
break 'tag_loop;
}
let tag_name = &tag.2;
if (*tag_name == *expected_tag) == *negative {
actual_tags.push(tag_name);
} else {
passed = true;
break 'tag_loop;
}
j += 1;
if tag == tags.last().unwrap() {
break 'tag_loop;
}
}
}
if !passed {
return Err(Failure {
row: position.row,
column: end_column,
expected_tag: expected_tag.clone(),
actual_tags: actual_tags.into_iter().cloned().collect(),
}
.into());
}
}
Ok(assertions.len())
}
pub fn get_tag_positions(
tags_context: &mut TagsContext,
tags_config: &TagsConfiguration,
source: &[u8],
) -> Result<Vec<(Utf8Point, Utf8Point, String)>> {
let (tags_iter, _has_error) = tags_context.generate_tags(tags_config, source, None)?;
let tag_positions = tags_iter
.filter_map(std::result::Result::ok)
.map(|tag| {
let tag_postfix = tags_config.syntax_type_name(tag.syntax_type_id).to_string();
let tag_name = if tag.is_definition {
format!("definition.{tag_postfix}")
} else {
format!("reference.{tag_postfix}")
};
(
to_utf8_point(tag.span.start, source),
to_utf8_point(tag.span.end, source),
tag_name,
)
})
.collect();
Ok(tag_positions)
}

34
crates/cli/src/tests.rs Normal file
View file

@ -0,0 +1,34 @@
mod async_context_test;
mod corpus_test;
mod detect_language;
mod helpers;
mod highlight_test;
mod language_test;
mod node_test;
mod parser_hang_test;
mod parser_test;
mod pathological_test;
mod query_test;
mod tags_test;
mod test_highlight_test;
mod test_tags_test;
mod text_provider_test;
mod tree_test;
#[cfg(feature = "wasm")]
mod wasm_language_test;
use tree_sitter_generate::GenerateResult;
pub use crate::fuzz::{
allocations,
edits::{get_random_edit, invert_edit},
random::Rand,
ITERATION_COUNT,
};
/// This is a simple wrapper around [`tree_sitter_generate::generate_parser_for_grammar`], because
/// our tests do not need to pass in a version number, only the grammar JSON.
fn generate_parser(grammar_json: &str) -> GenerateResult<(String, String)> {
tree_sitter_generate::generate_parser_for_grammar(grammar_json, Some((0, 0, 0)))
}

View file

@ -0,0 +1,278 @@
use std::{
future::Future,
pin::{pin, Pin},
ptr,
task::{self, Context, Poll, RawWaker, RawWakerVTable, Waker},
};
use tree_sitter::Parser;
use super::helpers::fixtures::get_language;
#[test]
fn test_node_in_fut() {
let (ret, pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("bash");
parser.set_language(&language).unwrap();
let tree = parser.parse("#", None).unwrap();
let root = tree.root_node();
let root_ref = &root;
let fut_val_fn = || async {
yield_now().await;
root.child(0).unwrap().kind()
};
yield_now().await;
let fut_ref_fn = || async {
yield_now().await;
root_ref.child(0).unwrap().kind()
};
let f1 = fut_val_fn().await;
let f2 = fut_ref_fn().await;
assert_eq!(f1, f2);
let fut_val = async {
yield_now().await;
root.child(0).unwrap().kind()
};
let fut_ref = async {
yield_now().await;
root_ref.child(0).unwrap().kind()
};
let f1 = fut_val.await;
let f2 = fut_ref.await;
assert_eq!(f1, f2);
f1
})
.join();
assert_eq!(ret, "comment");
assert_eq!(pended, 5);
}
#[test]
fn test_node_and_cursor_ref_in_fut() {
let ((), pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("c");
parser.set_language(&language).unwrap();
let tree = parser.parse("#", None).unwrap();
let root = tree.root_node();
let root_ref = &root;
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
cursor_ref.goto_first_child();
let fut_val = async {
yield_now().await;
let _ = root.to_sexp();
};
yield_now().await;
let fut_ref = async {
yield_now().await;
let _ = root_ref.to_sexp();
cursor_ref.goto_first_child();
};
fut_val.await;
fut_ref.await;
cursor_ref.goto_first_child();
})
.join();
assert_eq!(pended, 3);
}
#[test]
fn test_node_and_cursor_ref_in_fut_with_fut_fabrics() {
let ((), pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("javascript");
parser.set_language(&language).unwrap();
let tree = parser.parse("#", None).unwrap();
let root = tree.root_node();
let root_ref = &root;
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
cursor_ref.goto_first_child();
let fut_val = || async {
yield_now().await;
let _ = root.to_sexp();
};
yield_now().await;
let fut_ref = || async move {
yield_now().await;
let _ = root_ref.to_sexp();
cursor_ref.goto_first_child();
};
fut_val().await;
fut_val().await;
fut_ref().await;
})
.join();
assert_eq!(pended, 4);
}
#[test]
fn test_node_and_cursor_ref_in_fut_with_inner_spawns() {
let (ret, pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("rust");
parser.set_language(&language).unwrap();
let tree = parser.parse("#", None).unwrap();
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
cursor_ref.goto_first_child();
let fut_val = || {
let tree = tree.clone();
async move {
let root = tree.root_node();
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
yield_now().await;
let _ = root.to_sexp();
cursor_ref.goto_first_child();
}
};
yield_now().await;
let fut_ref = || {
let tree = tree.clone();
async move {
let root = tree.root_node();
let root_ref = &root;
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
yield_now().await;
let _ = root_ref.to_sexp();
cursor_ref.goto_first_child();
}
};
let ((), p1) = tokio_like_spawn(fut_val()).await.unwrap();
let ((), p2) = tokio_like_spawn(fut_ref()).await.unwrap();
cursor_ref.goto_first_child();
fut_val().await;
fut_val().await;
fut_ref().await;
cursor_ref.goto_first_child();
p1 + p2
})
.join();
assert_eq!(pended, 4);
assert_eq!(ret, 2);
}
fn tokio_like_spawn<T>(future: T) -> JoinHandle<(T::Output, usize)>
where
T: Future + Send + 'static,
T::Output: Send + 'static,
{
// No runtime, just noop waker
let waker = noop_waker();
let mut cx = task::Context::from_waker(&waker);
let mut pending = 0;
let mut future = pin!(future);
let ret = loop {
match future.as_mut().poll(&mut cx) {
Poll::Pending => pending += 1,
Poll::Ready(r) => {
break r;
}
}
};
JoinHandle::new((ret, pending))
}
async fn yield_now() {
struct SimpleYieldNow {
yielded: bool,
}
impl Future for SimpleYieldNow {
type Output = ();
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> {
cx.waker().wake_by_ref();
if self.yielded {
return Poll::Ready(());
}
self.yielded = true;
Poll::Pending
}
}
SimpleYieldNow { yielded: false }.await;
}
pub const fn noop_waker() -> Waker {
const VTABLE: RawWakerVTable = RawWakerVTable::new(
// Cloning just returns a new no-op raw waker
|_| RAW,
// `wake` does nothing
|_| {},
// `wake_by_ref` does nothing
|_| {},
// Dropping does nothing as we don't allocate anything
|_| {},
);
const RAW: RawWaker = RawWaker::new(ptr::null(), &VTABLE);
unsafe { Waker::from_raw(RAW) }
}
struct JoinHandle<T> {
data: Option<T>,
}
impl<T> JoinHandle<T> {
#[must_use]
const fn new(data: T) -> Self {
Self { data: Some(data) }
}
fn join(&mut self) -> T {
self.data.take().unwrap()
}
}
impl<T: Unpin> Future for JoinHandle<T> {
type Output = std::result::Result<T, ()>;
fn poll(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Self::Output> {
let data = self.get_mut().data.take().unwrap();
Poll::Ready(Ok(data))
}
}

View file

@ -0,0 +1,439 @@
use std::{collections::HashMap, env, fs};
use tree_sitter::Parser;
use tree_sitter_proc_macro::test_with_seed;
use crate::{
fuzz::{
corpus_test::{
check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges,
},
edits::{get_random_edit, invert_edit},
flatten_tests, new_seed,
random::Rand,
EDIT_COUNT, EXAMPLE_EXCLUDE, EXAMPLE_INCLUDE, ITERATION_COUNT, LANGUAGE_FILTER,
LOG_GRAPH_ENABLED, START_SEED,
},
parse::perform_edit,
test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields},
tests::{
allocations,
helpers::fixtures::{fixtures_dir, get_language, get_test_language, SCRATCH_BASE_DIR},
},
};
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_bash_language(seed: usize) {
test_language_corpus(
"bash",
seed,
Some(&[
// Fragile tests where edit customization changes
// lead to significant parse tree structure changes.
"bash - corpus - commands - Nested Heredocs",
"bash - corpus - commands - Quoted Heredocs",
"bash - corpus - commands - Heredocs with weird characters",
]),
None,
);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_c_language(seed: usize) {
test_language_corpus("c", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_cpp_language(seed: usize) {
test_language_corpus("cpp", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_embedded_template_language(seed: usize) {
test_language_corpus("embedded-template", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_go_language(seed: usize) {
test_language_corpus("go", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_html_language(seed: usize) {
test_language_corpus("html", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_java_language(seed: usize) {
test_language_corpus(
"java",
seed,
Some(&["java - corpus - expressions - switch with unnamed pattern variable"]),
None,
);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_javascript_language(seed: usize) {
test_language_corpus("javascript", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_json_language(seed: usize) {
test_language_corpus("json", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_php_language(seed: usize) {
test_language_corpus("php", seed, None, Some("php"));
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_python_language(seed: usize) {
test_language_corpus("python", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_ruby_language(seed: usize) {
test_language_corpus("ruby", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_rust_language(seed: usize) {
test_language_corpus("rust", seed, None, None);
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_typescript_language(seed: usize) {
test_language_corpus("typescript", seed, None, Some("typescript"));
}
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_tsx_language(seed: usize) {
test_language_corpus("typescript", seed, None, Some("tsx"));
}
pub fn test_language_corpus(
language_name: &str,
start_seed: usize,
skipped: Option<&[&str]>,
language_dir: Option<&str>,
) {
if let Some(filter) = LANGUAGE_FILTER.as_ref() {
if language_name != filter {
return;
}
}
let language_dir = language_dir.unwrap_or_default();
let grammars_dir = fixtures_dir().join("grammars");
let error_corpus_dir = fixtures_dir().join("error_corpus");
let template_corpus_dir = fixtures_dir().join("template_corpus");
let corpus_dir = grammars_dir.join(language_name).join("test").join("corpus");
println!("Testing {language_name} corpus @ {}", corpus_dir.display());
let error_corpus_file = error_corpus_dir.join(format!("{language_name}_errors.txt"));
let template_corpus_file = template_corpus_dir.join(format!("{language_name}_templates.txt"));
let main_tests = parse_tests(&corpus_dir).unwrap();
let error_tests = parse_tests(&error_corpus_file).unwrap_or_default();
let template_tests = parse_tests(&template_corpus_file).unwrap_or_default();
let mut tests = flatten_tests(
main_tests,
EXAMPLE_INCLUDE.as_ref(),
EXAMPLE_EXCLUDE.as_ref(),
);
tests.extend(flatten_tests(
error_tests,
EXAMPLE_INCLUDE.as_ref(),
EXAMPLE_EXCLUDE.as_ref(),
));
tests.extend(
flatten_tests(
template_tests,
EXAMPLE_INCLUDE.as_ref(),
EXAMPLE_EXCLUDE.as_ref(),
)
.into_iter()
.map(|mut t| {
t.template_delimiters = Some(("<%", "%>"));
t
}),
);
tests.retain(|t| t.languages[0].is_empty() || t.languages.contains(&Box::from(language_dir)));
let mut skipped = skipped.map(|x| x.iter().map(|x| (*x, 0)).collect::<HashMap<&str, usize>>());
let language_path = if language_dir.is_empty() {
language_name.to_string()
} else {
format!("{language_name}/{language_dir}")
};
let language = get_language(&language_path);
let mut failure_count = 0;
let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
if log_seed {
println!(" start seed: {start_seed}");
}
println!();
for (test_index, test) in tests.iter().enumerate() {
let test_name = format!("{language_name} - {}", test.name);
if let Some(skipped) = skipped.as_mut() {
if let Some(counter) = skipped.get_mut(test_name.as_str()) {
println!(" {test_index}. {test_name} - SKIPPED");
*counter += 1;
continue;
}
}
println!(" {test_index}. {test_name}");
let passed = allocations::record(|| {
let mut log_session = None;
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(&language).unwrap();
set_included_ranges(&mut parser, &test.input, test.template_delimiters);
let tree = parser.parse(&test.input, None).unwrap();
let mut actual_output = tree.root_node().to_sexp();
if !test.has_fields {
actual_output = strip_sexp_fields(&actual_output);
}
if actual_output != test.output {
println!("Incorrect initial parse for {test_name}");
print_diff_key();
print_diff(&actual_output, &test.output, true);
println!();
return false;
}
true
})
.unwrap();
if !passed {
failure_count += 1;
continue;
}
let mut parser = Parser::new();
parser.set_language(&language).unwrap();
let tree = parser.parse(&test.input, None).unwrap();
drop(parser);
for trial in 0..*ITERATION_COUNT {
let seed = start_seed + trial;
let passed = allocations::record(|| {
let mut rand = Rand::new(seed);
let mut log_session = None;
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(&language).unwrap();
let mut tree = tree.clone();
let mut input = test.input.clone();
if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));
}
// Perform a random series of edits and reparse.
let mut undo_stack = Vec::new();
for _ in 0..=rand.unsigned(*EDIT_COUNT) {
let edit = get_random_edit(&mut rand, &input);
undo_stack.push(invert_edit(&input, &edit));
perform_edit(&mut tree, &mut input, &edit).unwrap();
}
if log_seed {
println!(" {test_index}.{trial:<2} seed: {seed}");
}
if dump_edits {
fs::write(
SCRATCH_BASE_DIR
.join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
&input,
)
.unwrap();
}
if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));
}
set_included_ranges(&mut parser, &input, test.template_delimiters);
let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
// Check that the new tree is consistent.
check_consistent_sizes(&tree2, &input);
if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
return false;
}
// Undo all of the edits and re-parse again.
while let Some(edit) = undo_stack.pop() {
perform_edit(&mut tree2, &mut input, &edit).unwrap();
}
if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));
}
set_included_ranges(&mut parser, &test.input, test.template_delimiters);
let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
// Verify that the final tree matches the expectation from the corpus.
let mut actual_output = tree3.root_node().to_sexp();
if !test.has_fields {
actual_output = strip_sexp_fields(&actual_output);
}
if actual_output != test.output {
println!("Incorrect parse for {test_name} - seed {seed}");
print_diff_key();
print_diff(&actual_output, &test.output, true);
println!();
return false;
}
// Check that the edited tree is consistent.
check_consistent_sizes(&tree3, &input);
if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
return false;
}
true
}).unwrap();
if !passed {
failure_count += 1;
break;
}
}
}
assert!(
failure_count == 0,
"{failure_count} {language_name} corpus tests failed"
);
if let Some(skipped) = skipped.as_mut() {
skipped.retain(|_, v| *v == 0);
if !skipped.is_empty() {
println!("Non matchable skip definitions:");
for k in skipped.keys() {
println!(" {k}");
}
panic!("Non matchable skip definitions needs to be removed");
}
}
}
#[test]
fn test_feature_corpus_files() {
let test_grammars_dir = fixtures_dir().join("test_grammars");
let mut failure_count = 0;
for entry in fs::read_dir(test_grammars_dir).unwrap() {
let entry = entry.unwrap();
if !entry.metadata().unwrap().is_dir() {
continue;
}
let language_name = entry.file_name();
let language_name = language_name.to_str().unwrap();
if let Some(filter) = LANGUAGE_FILTER.as_ref() {
if language_name != filter {
continue;
}
}
let test_path = entry.path();
let mut grammar_path = test_path.join("grammar.js");
if !grammar_path.exists() {
grammar_path = test_path.join("grammar.json");
}
let error_message_path = test_path.join("expected_error.txt");
let grammar_json = tree_sitter_generate::load_grammar_file(&grammar_path, None).unwrap();
let generate_result =
tree_sitter_generate::generate_parser_for_grammar(&grammar_json, Some((0, 0, 0)));
if error_message_path.exists() {
if EXAMPLE_INCLUDE.is_some() || EXAMPLE_EXCLUDE.is_some() {
continue;
}
eprintln!("test language: {language_name:?}");
let expected_message = fs::read_to_string(&error_message_path)
.unwrap()
.replace("\r\n", "\n");
if let Err(e) = generate_result {
let actual_message = e.to_string().replace("\r\n", "\n");
if expected_message != actual_message {
eprintln!(
"Unexpected error message.\n\nExpected:\n\n{expected_message}\nActual:\n\n{actual_message}\n",
);
failure_count += 1;
}
} else {
eprintln!("Expected error message but got none for test grammar '{language_name}'",);
failure_count += 1;
}
} else {
if let Err(e) = &generate_result {
eprintln!("Unexpected error for test grammar '{language_name}':\n{e}",);
failure_count += 1;
continue;
}
let corpus_path = test_path.join("corpus.txt");
let c_code = generate_result.unwrap().1;
let language = get_test_language(language_name, &c_code, Some(&test_path));
let test = parse_tests(&corpus_path).unwrap();
let tests = flatten_tests(test, EXAMPLE_INCLUDE.as_ref(), EXAMPLE_EXCLUDE.as_ref());
if !tests.is_empty() {
eprintln!("test language: {language_name:?}");
}
for test in tests {
eprintln!(" example: {:?}", test.name);
let passed = allocations::record(|| {
let mut log_session = None;
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(&language).unwrap();
let tree = parser.parse(&test.input, None).unwrap();
let mut actual_output = tree.root_node().to_sexp();
if !test.has_fields {
actual_output = strip_sexp_fields(&actual_output);
}
if actual_output == test.output {
true
} else {
print_diff_key();
print_diff(&actual_output, &test.output, true);
println!();
false
}
})
.unwrap();
if !passed {
failure_count += 1;
}
}
}
}
assert!(failure_count == 0, "{failure_count} corpus tests failed");
}

View file

@ -0,0 +1,254 @@
use std::{fs, path::Path};
use tree_sitter_loader::Loader;
use crate::tests::helpers::fixtures::scratch_dir;
#[test]
fn detect_language_by_first_line_regex() {
let strace_dir = tree_sitter_dir(
r#"{
"grammars": [
{
"name": "strace",
"path": ".",
"scope": "source.strace",
"file-types": [
"strace"
],
"first-line-regex": "[0-9:.]* *execve"
}
],
"metadata": {
"version": "0.0.1"
}
}
"#,
"strace",
);
let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf());
let config = loader
.find_language_configurations_at_path(strace_dir.path(), false)
.unwrap();
// this is just to validate that we can read the tree-sitter.json correctly
assert_eq!(config[0].scope.as_ref().unwrap(), "source.strace");
let file_name = strace_dir.path().join("strace.log");
fs::write(&file_name, "execve\nworld").unwrap();
assert_eq!(
get_lang_scope(&loader, &file_name),
Some("source.strace".into())
);
let file_name = strace_dir.path().join("strace.log");
fs::write(&file_name, "447845 execve\nworld").unwrap();
assert_eq!(
get_lang_scope(&loader, &file_name),
Some("source.strace".into())
);
let file_name = strace_dir.path().join("strace.log");
fs::write(&file_name, "hello\nexecve").unwrap();
assert!(get_lang_scope(&loader, &file_name).is_none());
let file_name = strace_dir.path().join("strace.log");
fs::write(&file_name, "").unwrap();
assert!(get_lang_scope(&loader, &file_name).is_none());
let dummy_dir = tree_sitter_dir(
r#"{
"grammars": [
{
"name": "dummy",
"scope": "source.dummy",
"path": ".",
"file-types": [
"dummy"
]
}
],
"metadata": {
"version": "0.0.1"
}
}
"#,
"dummy",
);
// file-type takes precedence over first-line-regex
loader
.find_language_configurations_at_path(dummy_dir.path(), false)
.unwrap();
let file_name = dummy_dir.path().join("strace.dummy");
fs::write(&file_name, "execve").unwrap();
assert_eq!(
get_lang_scope(&loader, &file_name),
Some("source.dummy".into())
);
}
#[test]
fn detect_langauge_by_double_barrel_file_extension() {
let blade_dir = tree_sitter_dir(
r#"{
"grammars": [
{
"name": "blade",
"path": ".",
"scope": "source.blade",
"file-types": [
"blade.php"
]
}
],
"metadata": {
"version": "0.0.1"
}
}
"#,
"blade",
);
let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf());
let config = loader
.find_language_configurations_at_path(blade_dir.path(), false)
.unwrap();
// this is just to validate that we can read the tree-sitter.json correctly
assert_eq!(config[0].scope.as_ref().unwrap(), "source.blade");
let file_name = blade_dir.path().join("foo.blade.php");
fs::write(&file_name, "").unwrap();
assert_eq!(
get_lang_scope(&loader, &file_name),
Some("source.blade".into())
);
}
#[test]
fn detect_language_without_filename() {
let gitignore_dir = tree_sitter_dir(
r#"{
"grammars": [
{
"name": "gitignore",
"path": ".",
"scope": "source.gitignore",
"file-types": [
".gitignore"
]
}
],
"metadata": {
"version": "0.0.1"
}
}
"#,
"gitignore",
);
let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf());
let config = loader
.find_language_configurations_at_path(gitignore_dir.path(), false)
.unwrap();
// this is just to validate that we can read the tree-sitter.json correctly
assert_eq!(config[0].scope.as_ref().unwrap(), "source.gitignore");
let file_name = gitignore_dir.path().join(".gitignore");
fs::write(&file_name, "").unwrap();
assert_eq!(
get_lang_scope(&loader, &file_name),
Some("source.gitignore".into())
);
}
#[test]
fn detect_language_without_file_extension() {
let ssh_config_dir = tree_sitter_dir(
r#"{
"grammars": [
{
"name": "ssh_config",
"path": ".",
"scope": "source.ssh_config",
"file-types": [
"ssh_config"
]
}
],
"metadata": {
"version": "0.0.1"
}
}
"#,
"ssh_config",
);
let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf());
let config = loader
.find_language_configurations_at_path(ssh_config_dir.path(), false)
.unwrap();
// this is just to validate that we can read the tree-sitter.json correctly
assert_eq!(config[0].scope.as_ref().unwrap(), "source.ssh_config");
let file_name = ssh_config_dir.path().join("ssh_config");
fs::write(&file_name, "").unwrap();
assert_eq!(
get_lang_scope(&loader, &file_name),
Some("source.ssh_config".into())
);
}
fn tree_sitter_dir(tree_sitter_json: &str, name: &str) -> tempfile::TempDir {
let temp_dir = tempfile::tempdir().unwrap();
fs::write(temp_dir.path().join("tree-sitter.json"), tree_sitter_json).unwrap();
fs::create_dir_all(temp_dir.path().join("src/tree_sitter")).unwrap();
fs::write(
temp_dir.path().join("src/grammar.json"),
format!(r#"{{"name":"{name}"}}"#),
)
.unwrap();
fs::write(
temp_dir.path().join("src/parser.c"),
format!(
r#"
#include "tree_sitter/parser.h"
#ifdef _WIN32
#define TS_PUBLIC __declspec(dllexport)
#else
#define TS_PUBLIC __attribute__((visibility("default")))
#endif
TS_PUBLIC const TSLanguage *tree_sitter_{name}() {{}}
"#
),
)
.unwrap();
fs::write(
temp_dir.path().join("src/tree_sitter/parser.h"),
include_str!("../../../../lib/src/parser.h"),
)
.unwrap();
temp_dir
}
// If we manage to get the language scope, it means we correctly detected the file-type
fn get_lang_scope(loader: &Loader, file_name: &Path) -> Option<String> {
loader
.language_configuration_for_file_name(file_name)
.ok()
.and_then(|config| {
if let Some((_, config)) = config {
config.scope.clone()
} else if let Ok(Some((_, config))) =
loader.language_configuration_for_first_line_regex(file_name)
{
config.scope.clone()
} else {
None
}
})
}

View file

@ -0,0 +1,4 @@
pub mod allocations;
pub mod edits;
pub(super) mod fixtures;
pub(super) mod query_helpers;

View file

@ -0,0 +1,121 @@
use std::{
collections::HashMap,
os::raw::c_void,
sync::{
atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst},
Mutex,
},
};
#[ctor::ctor]
unsafe fn initialize_allocation_recording() {
tree_sitter::set_allocator(
Some(ts_record_malloc),
Some(ts_record_calloc),
Some(ts_record_realloc),
Some(ts_record_free),
);
}
#[derive(Debug, PartialEq, Eq, Hash)]
struct Allocation(*const c_void);
unsafe impl Send for Allocation {}
unsafe impl Sync for Allocation {}
#[derive(Default)]
struct AllocationRecorder {
enabled: AtomicBool,
allocation_count: AtomicUsize,
outstanding_allocations: Mutex<HashMap<Allocation, usize>>,
}
thread_local! {
static RECORDER: AllocationRecorder = AllocationRecorder::default();
}
extern "C" {
fn malloc(size: usize) -> *mut c_void;
fn calloc(count: usize, size: usize) -> *mut c_void;
fn realloc(ptr: *mut c_void, size: usize) -> *mut c_void;
fn free(ptr: *mut c_void);
}
pub fn record<T>(f: impl FnOnce() -> T) -> T {
RECORDER.with(|recorder| {
recorder.enabled.store(true, SeqCst);
recorder.allocation_count.store(0, SeqCst);
recorder.outstanding_allocations.lock().unwrap().clear();
});
let value = f();
let outstanding_allocation_indices = RECORDER.with(|recorder| {
recorder.enabled.store(false, SeqCst);
recorder.allocation_count.store(0, SeqCst);
recorder
.outstanding_allocations
.lock()
.unwrap()
.drain()
.map(|e| e.1)
.collect::<Vec<_>>()
});
assert!(
outstanding_allocation_indices.is_empty(),
"Leaked allocation indices: {outstanding_allocation_indices:?}"
);
value
}
fn record_alloc(ptr: *mut c_void) {
RECORDER.with(|recorder| {
if recorder.enabled.load(SeqCst) {
let count = recorder.allocation_count.fetch_add(1, SeqCst);
recorder
.outstanding_allocations
.lock()
.unwrap()
.insert(Allocation(ptr), count);
}
});
}
fn record_dealloc(ptr: *mut c_void) {
RECORDER.with(|recorder| {
if recorder.enabled.load(SeqCst) {
recorder
.outstanding_allocations
.lock()
.unwrap()
.remove(&Allocation(ptr));
}
});
}
unsafe extern "C" fn ts_record_malloc(size: usize) -> *mut c_void {
let result = malloc(size);
record_alloc(result);
result
}
unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void {
let result = calloc(count, size);
record_alloc(result);
result
}
unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void {
let result = realloc(ptr, size);
if ptr.is_null() {
record_alloc(result);
} else if !core::ptr::eq(ptr, result) {
record_dealloc(ptr);
record_alloc(result);
}
result
}
unsafe extern "C" fn ts_record_free(ptr: *mut c_void) {
record_dealloc(ptr);
free(ptr);
}

View file

@ -0,0 +1,65 @@
pub static ROOT_DIR: LazyLock<PathBuf> = LazyLock::new(|| {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.parent()
.unwrap()
.to_owned()
});
pub static FIXTURES_DIR: LazyLock<PathBuf> =
LazyLock::new(|| ROOT_DIR.join("test").join("fixtures"));
pub static HEADER_DIR: LazyLock<PathBuf> = LazyLock::new(|| ROOT_DIR.join("lib").join("include"));
pub static GRAMMARS_DIR: LazyLock<PathBuf> =
LazyLock::new(|| ROOT_DIR.join("test").join("fixtures").join("grammars"));
pub static SCRATCH_BASE_DIR: LazyLock<PathBuf> = LazyLock::new(|| {
let result = ROOT_DIR.join("target").join("scratch");
fs::create_dir_all(&result).unwrap();
result
});
#[cfg(feature = "wasm")]
pub static WASM_DIR: LazyLock<PathBuf> = LazyLock::new(|| ROOT_DIR.join("target").join("release"));
pub static SCRATCH_DIR: LazyLock<PathBuf> = LazyLock::new(|| {
// https://doc.rust-lang.org/reference/conditional-compilation.html
let vendor = if cfg!(target_vendor = "apple") {
"apple"
} else if cfg!(target_vendor = "fortanix") {
"fortanix"
} else if cfg!(target_vendor = "pc") {
"pc"
} else {
"unknown"
};
let env = if cfg!(target_env = "gnu") {
"gnu"
} else if cfg!(target_env = "msvc") {
"msvc"
} else if cfg!(target_env = "musl") {
"musl"
} else if cfg!(target_env = "sgx") {
"sgx"
} else {
"unknown"
};
let endian = if cfg!(target_endian = "little") {
"little"
} else if cfg!(target_endian = "big") {
"big"
} else {
"unknown"
};
let machine = format!(
"{}-{}-{vendor}-{env}-{endian}",
std::env::consts::ARCH,
std::env::consts::OS
);
let result = SCRATCH_BASE_DIR.join(machine);
fs::create_dir_all(&result).unwrap();
result
});

View file

@ -0,0 +1,49 @@
use std::{ops::Range, str};
#[derive(Debug)]
pub struct ReadRecorder<'a> {
content: &'a [u8],
indices_read: Vec<usize>,
}
impl<'a> ReadRecorder<'a> {
#[must_use]
pub const fn new(content: &'a [u8]) -> Self {
Self {
content,
indices_read: Vec::new(),
}
}
pub fn read(&mut self, offset: usize) -> &'a [u8] {
if offset < self.content.len() {
if let Err(i) = self.indices_read.binary_search(&offset) {
self.indices_read.insert(i, offset);
}
&self.content[offset..(offset + 1)]
} else {
&[]
}
}
pub fn strings_read(&self) -> Vec<&'a str> {
let mut result = Vec::new();
let mut last_range = Option::<Range<usize>>::None;
for index in &self.indices_read {
if let Some(ref mut range) = &mut last_range {
if range.end == *index {
range.end += 1;
} else {
result.push(str::from_utf8(&self.content[range.clone()]).unwrap());
last_range = None;
}
} else {
last_range = Some(*index..(*index + 1));
}
}
if let Some(range) = last_range {
result.push(str::from_utf8(&self.content[range]).unwrap());
}
result
}
}

View file

@ -0,0 +1,140 @@
use std::{
env, fs,
path::{Path, PathBuf},
sync::LazyLock,
};
use anyhow::Context;
use tree_sitter::Language;
use tree_sitter_generate::{load_grammar_file, ALLOC_HEADER, ARRAY_HEADER};
use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_loader::{CompileConfig, Loader};
use tree_sitter_tags::TagsConfiguration;
use crate::tests::generate_parser;
include!("./dirs.rs");
static TEST_LOADER: LazyLock<Loader> = LazyLock::new(|| {
let mut loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone());
if env::var("TREE_SITTER_GRAMMAR_DEBUG").is_ok() {
loader.debug_build(true);
}
loader
});
pub fn test_loader() -> &'static Loader {
&TEST_LOADER
}
pub fn fixtures_dir() -> &'static Path {
&FIXTURES_DIR
}
pub fn scratch_dir() -> &'static Path {
&SCRATCH_DIR
}
pub fn get_language(name: &str) -> Language {
let src_dir = GRAMMARS_DIR.join(name).join("src");
let mut config = CompileConfig::new(&src_dir, None, None);
config.header_paths.push(&HEADER_DIR);
TEST_LOADER.load_language_at_path(config).unwrap()
}
pub fn get_test_fixture_language(name: &str) -> Language {
let grammar_dir_path = fixtures_dir().join("test_grammars").join(name);
let grammar_json = load_grammar_file(&grammar_dir_path.join("grammar.js"), None).unwrap();
let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap();
get_test_language(&parser_name, &parser_code, Some(&grammar_dir_path))
}
pub fn get_language_queries_path(language_name: &str) -> PathBuf {
GRAMMARS_DIR.join(language_name).join("queries")
}
pub fn get_highlight_config(
language_name: &str,
injection_query_filename: Option<&str>,
highlight_names: &[String],
) -> HighlightConfiguration {
let language = get_language(language_name);
let queries_path = get_language_queries_path(language_name);
let highlights_query = fs::read_to_string(queries_path.join("highlights.scm")).unwrap();
let injections_query =
injection_query_filename.map_or_else(String::new, |injection_query_filename| {
fs::read_to_string(queries_path.join(injection_query_filename)).unwrap()
});
let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or_default();
let mut result = HighlightConfiguration::new(
language,
language_name,
&highlights_query,
&injections_query,
&locals_query,
)
.unwrap();
result.configure(highlight_names);
result
}
pub fn get_tags_config(language_name: &str) -> TagsConfiguration {
let language = get_language(language_name);
let queries_path = get_language_queries_path(language_name);
let tags_query = fs::read_to_string(queries_path.join("tags.scm")).unwrap();
let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or_default();
TagsConfiguration::new(language, &tags_query, &locals_query).unwrap()
}
pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {
let src_dir = scratch_dir().join("src").join(name);
fs::create_dir_all(&src_dir).unwrap();
let parser_path = src_dir.join("parser.c");
if !fs::read_to_string(&parser_path).is_ok_and(|content| content == parser_code) {
fs::write(&parser_path, parser_code).unwrap();
}
let scanner_path = if let Some(path) = path {
let scanner_path = path.join("scanner.c");
if scanner_path.exists() {
let scanner_code = fs::read_to_string(&scanner_path).unwrap();
let scanner_copy_path = src_dir.join("scanner.c");
if !fs::read_to_string(&scanner_copy_path).is_ok_and(|content| content == scanner_code)
{
fs::write(&scanner_copy_path, scanner_code).unwrap();
}
Some(scanner_copy_path)
} else {
None
}
} else {
None
};
let header_path = src_dir.join("tree_sitter");
fs::create_dir_all(&header_path).unwrap();
for (file, content) in [
("alloc.h", ALLOC_HEADER),
("array.h", ARRAY_HEADER),
("parser.h", tree_sitter::PARSER_HEADER),
] {
let file = header_path.join(file);
fs::write(&file, content)
.with_context(|| format!("Failed to write {:?}", file.file_name().unwrap()))
.unwrap();
}
let paths_to_check = if let Some(scanner_path) = &scanner_path {
vec![parser_path, scanner_path.clone()]
} else {
vec![parser_path]
};
let mut config = CompileConfig::new(&src_dir, Some(&paths_to_check), None);
config.header_paths = vec![&HEADER_DIR];
config.name = name.to_string();
TEST_LOADER.load_language_at_path_with_name(config).unwrap()
}

View file

@ -0,0 +1,363 @@
use std::{cmp::Ordering, fmt::Write, ops::Range};
use rand::prelude::Rng;
use streaming_iterator::{IntoStreamingIterator, StreamingIterator};
use tree_sitter::{
Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryMatch, Tree, TreeCursor,
};
#[derive(Debug)]
pub struct Pattern {
kind: Option<&'static str>,
named: bool,
field: Option<&'static str>,
capture: Option<String>,
children: Vec<Pattern>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Match<'a, 'tree> {
pub captures: Vec<(&'a str, Node<'tree>)>,
pub last_node: Option<Node<'tree>>,
}
const CAPTURE_NAMES: &[&str] = &[
"one", "two", "three", "four", "five", "six", "seven", "eight",
];
impl Pattern {
pub fn random_pattern_in_tree(tree: &Tree, rng: &mut impl Rng) -> (Self, Range<Point>) {
let mut cursor = tree.walk();
// Descend to the node at a random byte offset and depth.
let mut max_depth = 0;
let byte_offset = rng.gen_range(0..cursor.node().end_byte());
while cursor.goto_first_child_for_byte(byte_offset).is_some() {
max_depth += 1;
}
let depth = rng.gen_range(0..=max_depth);
for _ in 0..depth {
cursor.goto_parent();
}
// Build a pattern that matches that node.
// Sometimes include subsequent siblings of the node.
let pattern_start = cursor.node().start_position();
let mut roots = vec![Self::random_pattern_for_node(&mut cursor, rng)];
while roots.len() < 5 && cursor.goto_next_sibling() {
if rng.gen_bool(0.2) {
roots.push(Self::random_pattern_for_node(&mut cursor, rng));
}
}
let pattern_end = cursor.node().end_position();
let mut pattern = Self {
kind: None,
named: true,
field: None,
capture: None,
children: roots,
};
if pattern.children.len() == 1 ||
// In a parenthesized list of sibling patterns, the first
// sibling can't be an anonymous `_` wildcard.
(pattern.children[0].kind == Some("_") && !pattern.children[0].named)
{
pattern = pattern.children.pop().unwrap();
}
// In a parenthesized list of sibling patterns, the first
// sibling can't have a field name.
else {
pattern.children[0].field = None;
}
(pattern, pattern_start..pattern_end)
}
fn random_pattern_for_node(cursor: &mut TreeCursor, rng: &mut impl Rng) -> Self {
let node = cursor.node();
// Sometimes specify the node's type, sometimes use a wildcard.
let (kind, named) = if rng.gen_bool(0.9) {
(Some(node.kind()), node.is_named())
} else {
(Some("_"), node.is_named() && rng.gen_bool(0.8))
};
// Sometimes specify the node's field.
let field = if rng.gen_bool(0.75) {
cursor.field_name()
} else {
None
};
// Sometimes capture the node.
let capture = if rng.gen_bool(0.7) {
Some(CAPTURE_NAMES[rng.gen_range(0..CAPTURE_NAMES.len())].to_string())
} else {
None
};
// Walk the children and include child patterns for some of them.
let mut children = Vec::new();
if named && cursor.goto_first_child() {
let max_children = rng.gen_range(0..4);
while cursor.goto_next_sibling() {
if rng.gen_bool(0.6) {
let child_ast = Self::random_pattern_for_node(cursor, rng);
children.push(child_ast);
if children.len() >= max_children {
break;
}
}
}
cursor.goto_parent();
}
Self {
kind,
named,
field,
capture,
children,
}
}
fn write_to_string(&self, string: &mut String, indent: usize) {
if let Some(field) = self.field {
write!(string, "{field}: ").unwrap();
}
if self.named {
string.push('(');
let mut has_contents = false;
if let Some(kind) = &self.kind {
write!(string, "{kind}").unwrap();
has_contents = true;
}
for child in &self.children {
let indent = indent + 2;
if has_contents {
string.push('\n');
string.push_str(&" ".repeat(indent));
}
child.write_to_string(string, indent);
has_contents = true;
}
string.push(')');
} else if self.kind == Some("_") {
string.push('_');
} else {
write!(string, "\"{}\"", self.kind.unwrap().replace('\"', "\\\"")).unwrap();
}
if let Some(capture) = &self.capture {
write!(string, " @{capture}").unwrap();
}
}
pub fn matches_in_tree<'tree>(&self, tree: &'tree Tree) -> Vec<Match<'_, 'tree>> {
let mut matches = Vec::new();
// Compute the matches naively: walk the tree and
// retry the entire pattern for each node.
let mut cursor = tree.walk();
let mut ascending = false;
loop {
if ascending {
if cursor.goto_next_sibling() {
ascending = false;
} else if !cursor.goto_parent() {
break;
}
} else {
let matches_here = self.match_node(&mut cursor);
matches.extend_from_slice(&matches_here);
if !cursor.goto_first_child() {
ascending = true;
}
}
}
matches.sort_unstable();
matches.iter_mut().for_each(|m| m.last_node = None);
matches.dedup();
matches
}
pub fn match_node<'tree>(&self, cursor: &mut TreeCursor<'tree>) -> Vec<Match<'_, 'tree>> {
let node = cursor.node();
// If a kind is specified, check that it matches the node.
if let Some(kind) = self.kind {
if kind == "_" {
if self.named && !node.is_named() {
return Vec::new();
}
} else if kind != node.kind() || self.named != node.is_named() {
return Vec::new();
}
}
// If a field is specified, check that it matches the node.
if let Some(field) = self.field {
if cursor.field_name() != Some(field) {
return Vec::new();
}
}
// Create a match for the current node.
let mat = Match {
captures: self
.capture
.as_ref()
.map_or_else(Vec::new, |name| vec![(name.as_str(), node)]),
last_node: Some(node),
};
// If there are no child patterns to match, then return this single match.
if self.children.is_empty() {
return vec![mat];
}
// Find every matching combination of child patterns and child nodes.
let mut finished_matches = Vec::<Match>::new();
if cursor.goto_first_child() {
let mut match_states = vec![(0, mat)];
loop {
let mut new_match_states = Vec::new();
for (pattern_index, mat) in &match_states {
let child_pattern = &self.children[*pattern_index];
let child_matches = child_pattern.match_node(cursor);
for child_match in child_matches {
let mut combined_match = mat.clone();
combined_match.last_node = child_match.last_node;
combined_match
.captures
.extend_from_slice(&child_match.captures);
if pattern_index + 1 < self.children.len() {
new_match_states.push((*pattern_index + 1, combined_match));
} else {
let mut existing = false;
for existing_match in &mut finished_matches {
if existing_match.captures == combined_match.captures {
if child_pattern.capture.is_some() {
existing_match.last_node = combined_match.last_node;
}
existing = true;
}
}
if !existing {
finished_matches.push(combined_match);
}
}
}
}
match_states.extend_from_slice(&new_match_states);
if !cursor.goto_next_sibling() {
break;
}
}
cursor.goto_parent();
}
finished_matches
}
}
impl std::fmt::Display for Pattern {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut result = String::new();
self.write_to_string(&mut result, 0);
write!(f, "{result}")
}
}
impl PartialOrd for Match<'_, '_> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for Match<'_, '_> {
// Tree-sitter returns matches in the order that they terminate
// during a depth-first walk of the tree. If multiple matches
// terminate on the same node, those matches are produced in the
// order that their captures were discovered.
fn cmp(&self, other: &Self) -> Ordering {
if let Some((last_node_a, last_node_b)) = self.last_node.zip(other.last_node) {
let cmp = compare_depth_first(last_node_a, last_node_b);
if cmp.is_ne() {
return cmp;
}
}
for (a, b) in self.captures.iter().zip(other.captures.iter()) {
let cmp = compare_depth_first(a.1, b.1);
if !cmp.is_eq() {
return cmp;
}
}
self.captures.len().cmp(&other.captures.len())
}
}
fn compare_depth_first(a: Node, b: Node) -> Ordering {
let a = a.byte_range();
let b = b.byte_range();
a.start.cmp(&b.start).then_with(|| b.end.cmp(&a.end))
}
pub fn assert_query_matches(
language: &Language,
query: &Query,
source: &str,
expected: &[(usize, Vec<(&str, &str)>)],
) {
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(query, tree.root_node(), source.as_bytes());
pretty_assertions::assert_eq!(expected, collect_matches(matches, query, source));
pretty_assertions::assert_eq!(false, cursor.did_exceed_match_limit());
}
pub fn collect_matches<'a>(
mut matches: impl StreamingIterator<Item = QueryMatch<'a, 'a>>,
query: &'a Query,
source: &'a str,
) -> Vec<(usize, Vec<(&'a str, &'a str)>)> {
let mut result = Vec::new();
while let Some(m) = matches.next() {
result.push((
m.pattern_index,
format_captures(m.captures.iter().into_streaming_iter_ref(), query, source),
));
}
result
}
pub fn collect_captures<'a>(
captures: impl StreamingIterator<Item = (QueryMatch<'a, 'a>, usize)>,
query: &'a Query,
source: &'a str,
) -> Vec<(&'a str, &'a str)> {
format_captures(captures.map(|(m, i)| m.captures[*i]), query, source)
}
fn format_captures<'a>(
mut captures: impl StreamingIterator<Item = QueryCapture<'a>>,
query: &'a Query,
source: &'a str,
) -> Vec<(&'a str, &'a str)> {
let mut result = Vec::new();
while let Some(capture) = captures.next() {
result.push((
query.capture_names()[capture.index as usize],
capture.node.utf8_text(source.as_bytes()).unwrap(),
));
}
result
}

View file

@ -0,0 +1,786 @@
use std::{
ffi::CString,
fs,
os::raw::c_char,
ptr, slice, str,
sync::{
atomic::{AtomicUsize, Ordering},
LazyLock,
},
};
use tree_sitter_highlight::{
c, Error, Highlight, HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer,
};
use super::helpers::fixtures::{get_highlight_config, get_language, get_language_queries_path};
static JS_HIGHLIGHT: LazyLock<HighlightConfiguration> =
LazyLock::new(|| get_highlight_config("javascript", Some("injections.scm"), &HIGHLIGHT_NAMES));
static JSDOC_HIGHLIGHT: LazyLock<HighlightConfiguration> =
LazyLock::new(|| get_highlight_config("jsdoc", None, &HIGHLIGHT_NAMES));
static HTML_HIGHLIGHT: LazyLock<HighlightConfiguration> =
LazyLock::new(|| get_highlight_config("html", Some("injections.scm"), &HIGHLIGHT_NAMES));
static EJS_HIGHLIGHT: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
get_highlight_config(
"embedded-template",
Some("injections-ejs.scm"),
&HIGHLIGHT_NAMES,
)
});
static RUST_HIGHLIGHT: LazyLock<HighlightConfiguration> =
LazyLock::new(|| get_highlight_config("rust", Some("injections.scm"), &HIGHLIGHT_NAMES));
static HIGHLIGHT_NAMES: LazyLock<Vec<String>> = LazyLock::new(|| {
[
"attribute",
"boolean",
"carriage-return",
"comment",
"constant",
"constant.builtin",
"constructor",
"embedded",
"function",
"function.builtin",
"keyword",
"module",
"number",
"operator",
"property",
"property.builtin",
"punctuation",
"punctuation.bracket",
"punctuation.delimiter",
"punctuation.special",
"string",
"string.special",
"tag",
"type",
"type.builtin",
"variable",
"variable.builtin",
"variable.parameter",
]
.iter()
.copied()
.map(String::from)
.collect()
});
static HTML_ATTRS: LazyLock<Vec<String>> = LazyLock::new(|| {
HIGHLIGHT_NAMES
.iter()
.map(|s| format!("class={s}"))
.collect()
});
#[test]
fn test_highlighting_javascript() {
let source = "const a = function(b) { return b + c; }";
assert_eq!(
&to_token_vector(source, &JS_HIGHLIGHT).unwrap(),
&[vec![
("const", vec!["keyword"]),
(" ", vec![]),
("a", vec!["function"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("function", vec!["keyword"]),
("(", vec!["punctuation.bracket"]),
("b", vec!["variable"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("{", vec!["punctuation.bracket"]),
(" ", vec![]),
("return", vec!["keyword"]),
(" ", vec![]),
("b", vec!["variable"]),
(" ", vec![]),
("+", vec!["operator"]),
(" ", vec![]),
("c", vec!["variable"]),
(";", vec!["punctuation.delimiter"]),
(" ", vec![]),
("}", vec!["punctuation.bracket"]),
]]
);
}
#[test]
fn test_highlighting_injected_html_in_javascript() {
let source = ["const s = html `<div>${a < b}</div>`;"].join("\n");
assert_eq!(
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
&[vec![
("const", vec!["keyword"]),
(" ", vec![]),
("s", vec!["variable"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("html", vec!["function"]),
(" ", vec![]),
("`", vec!["string"]),
("<", vec!["string", "punctuation.bracket"]),
("div", vec!["string", "tag"]),
(">", vec!["string", "punctuation.bracket"]),
("${", vec!["string", "embedded", "punctuation.special"]),
("a", vec!["string", "embedded", "variable"]),
(" ", vec!["string", "embedded"]),
("<", vec!["string", "embedded", "operator"]),
(" ", vec!["string", "embedded"]),
("b", vec!["string", "embedded", "variable"]),
("}", vec!["string", "embedded", "punctuation.special"]),
("</", vec!["string", "punctuation.bracket"]),
("div", vec!["string", "tag"]),
(">", vec!["string", "punctuation.bracket"]),
("`", vec!["string"]),
(";", vec!["punctuation.delimiter"]),
]]
);
}
#[test]
fn test_highlighting_injected_javascript_in_html_mini() {
let source = "<script>const x = new Thing();</script>";
assert_eq!(
&to_token_vector(source, &HTML_HIGHLIGHT).unwrap(),
&[vec![
("<", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
("const", vec!["keyword"]),
(" ", vec![]),
("x", vec!["variable"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("new", vec!["keyword"]),
(" ", vec![]),
("Thing", vec!["constructor"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
(";", vec!["punctuation.delimiter"]),
("</", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],]
);
}
#[test]
fn test_highlighting_injected_javascript_in_html() {
let source = [
"<body>",
" <script>",
" const x = new Thing();",
" </script>",
"</body>",
]
.join("\n");
assert_eq!(
&to_token_vector(&source, &HTML_HIGHLIGHT).unwrap(),
&[
vec![
("<", vec!["punctuation.bracket"]),
("body", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],
vec![
(" ", vec![]),
("<", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],
vec![
(" ", vec![]),
("const", vec!["keyword"]),
(" ", vec![]),
("x", vec!["variable"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("new", vec!["keyword"]),
(" ", vec![]),
("Thing", vec!["constructor"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
(";", vec!["punctuation.delimiter"]),
],
vec![
(" ", vec![]),
("</", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],
vec![
("</", vec!["punctuation.bracket"]),
("body", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],
]
);
}
#[test]
fn test_highlighting_multiline_nodes_to_html() {
let source = [
"const SOMETHING = `",
" one ${",
" two()",
" } three",
"`",
"",
]
.join("\n");
assert_eq!(
&to_html(&source, &JS_HIGHLIGHT).unwrap(),
&[
"<span class=keyword>const</span> <span class=constant>SOMETHING</span> <span class=operator>=</span> <span class=string>`</span>\n".to_string(),
"<span class=string> one <span class=embedded><span class=punctuation.special>${</span></span></span>\n".to_string(),
"<span class=string><span class=embedded> <span class=function>two</span><span class=punctuation.bracket>(</span><span class=punctuation.bracket>)</span></span></span>\n".to_string(),
"<span class=string><span class=embedded> <span class=punctuation.special>}</span></span> three</span>\n".to_string(),
"<span class=string>`</span>\n".to_string(),
]
);
}
#[test]
fn test_highlighting_with_local_variable_tracking() {
let source = [
"module.exports = function a(b) {",
" const module = c;",
" console.log(module, b);",
"}",
]
.join("\n");
assert_eq!(
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
&[
vec![
("module", vec!["variable.builtin"]),
(".", vec!["punctuation.delimiter"]),
("exports", vec!["function"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("function", vec!["keyword"]),
(" ", vec![]),
("a", vec!["function"]),
("(", vec!["punctuation.bracket"]),
("b", vec!["variable"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("{", vec!["punctuation.bracket"])
],
vec![
(" ", vec![]),
("const", vec!["keyword"]),
(" ", vec![]),
("module", vec!["variable"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("c", vec!["variable"]),
(";", vec!["punctuation.delimiter"])
],
vec![
(" ", vec![]),
("console", vec!["variable.builtin"]),
(".", vec!["punctuation.delimiter"]),
("log", vec!["function"]),
("(", vec!["punctuation.bracket"]),
// Not a builtin, because `module` was defined as a variable above.
("module", vec!["variable"]),
(",", vec!["punctuation.delimiter"]),
(" ", vec![]),
// A parameter, because `b` was defined as a parameter above.
("b", vec!["variable"]),
(")", vec!["punctuation.bracket"]),
(";", vec!["punctuation.delimiter"]),
],
vec![("}", vec!["punctuation.bracket"])]
],
);
}
#[test]
fn test_highlighting_empty_lines() {
let source = [
"class A {",
"",
" b(c) {",
"",
" d(e)",
"",
" }",
"",
"}",
]
.join("\n");
assert_eq!(
&to_html(&source, &JS_HIGHLIGHT).unwrap(),
&[
"<span class=keyword>class</span> <span class=constructor>A</span> <span class=punctuation.bracket>{</span>\n".to_string(),
"\n".to_string(),
" <span class=function>b</span><span class=punctuation.bracket>(</span><span class=variable>c</span><span class=punctuation.bracket>)</span> <span class=punctuation.bracket>{</span>\n".to_string(),
"\n".to_string(),
" <span class=function>d</span><span class=punctuation.bracket>(</span><span class=variable>e</span><span class=punctuation.bracket>)</span>\n".to_string(),
"\n".to_string(),
" <span class=punctuation.bracket>}</span>\n".to_string(),
"\n".to_string(),
"<span class=punctuation.bracket>}</span>\n".to_string(),
]
);
}
#[test]
fn test_highlighting_carriage_returns() {
let source = "a = \"a\rb\"\r\nb\r";
assert_eq!(
&to_html(source, &JS_HIGHLIGHT).unwrap(),
&[
"<span class=variable>a</span> <span class=operator>=</span> <span class=string>&quot;a<span class=carriage-return></span><span class=variable>b</span>&quot;</span>\n",
"<span class=variable>b</span><span class=carriage-return></span>\n",
],
);
}
#[test]
fn test_highlighting_ejs_with_html_and_javascript() {
let source = ["<div><% foo() %></div><script> bar() </script>"].join("\n");
assert_eq!(
&to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(),
&[[
("<", vec!["punctuation.bracket"]),
("div", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
("<%", vec!["keyword"]),
(" ", vec![]),
("foo", vec!["function"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("%>", vec!["keyword"]),
("</", vec!["punctuation.bracket"]),
("div", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
("<", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
(" ", vec![]),
("bar", vec!["function"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("</", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
]],
);
}
#[test]
fn test_highlighting_javascript_with_jsdoc() {
// Regression test: the middle comment has no highlights. This should not prevent
// later injections from highlighting properly.
let source = ["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n");
assert_eq!(
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
&[[
("a", vec!["variable"]),
(" ", vec![]),
("/* ", vec!["comment"]),
("@see", vec!["comment", "keyword"]),
(" a */", vec!["comment"]),
(" ", vec![]),
("b", vec!["variable"]),
(";", vec!["punctuation.delimiter"]),
(" ", vec![]),
("/* nothing */", vec!["comment"]),
(" ", vec![]),
("c", vec!["variable"]),
(";", vec!["punctuation.delimiter"]),
(" ", vec![]),
("/* ", vec!["comment"]),
("@see", vec!["comment", "keyword"]),
(" b */", vec!["comment"])
]],
);
}
#[test]
fn test_highlighting_with_content_children_included() {
let source = ["assert!(", " a.b.c() < D::e::<F>()", ");"].join("\n");
assert_eq!(
&to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(),
&[
vec![
("assert", vec!["function"]),
("!", vec!["function"]),
("(", vec!["punctuation.bracket"]),
],
vec![
(" a", vec![]),
(".", vec!["punctuation.delimiter"]),
("b", vec!["property"]),
(".", vec!["punctuation.delimiter"]),
("c", vec!["function"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
(" < ", vec![]),
("D", vec!["type"]),
("::", vec!["punctuation.delimiter"]),
("e", vec!["function"]),
("::", vec!["punctuation.delimiter"]),
("<", vec!["punctuation.bracket"]),
("F", vec!["type"]),
(">", vec!["punctuation.bracket"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
],
vec![
(")", vec!["punctuation.bracket"]),
(";", vec!["punctuation.delimiter"]),
]
],
);
}
#[test]
fn test_highlighting_cancellation() {
// An HTML document with a large injected JavaScript document:
let mut source = "<script>\n".to_string();
for _ in 0..500 {
source += "function a() { console.log('hi'); }\n";
}
source += "</script>\n";
// Cancel the highlighting before parsing the injected document.
let cancellation_flag = AtomicUsize::new(0);
let injection_callback = |name: &str| {
cancellation_flag.store(1, Ordering::SeqCst);
test_language_for_injection_string(name)
};
// The initial `highlight` call, which eagerly parses the outer document, should not fail.
let mut highlighter = Highlighter::new();
let events = highlighter
.highlight(
&HTML_HIGHLIGHT,
source.as_bytes(),
Some(&cancellation_flag),
injection_callback,
)
.unwrap();
// Iterating the scopes should not panic. It should return an error once the
// cancellation is detected.
for event in events {
if let Err(e) = event {
assert_eq!(e, Error::Cancelled);
return;
}
}
panic!("Expected an error while iterating highlighter");
}
#[test]
fn test_highlighting_via_c_api() {
let highlights = [
"class=tag\0",
"class=function\0",
"class=string\0",
"class=keyword\0",
];
let highlight_names = highlights
.iter()
.map(|h| h["class=".len()..].as_ptr().cast::<c_char>())
.collect::<Vec<_>>();
let highlight_attrs = highlights
.iter()
.map(|h| h.as_bytes().as_ptr().cast::<c_char>())
.collect::<Vec<_>>();
let highlighter = unsafe {
c::ts_highlighter_new(
std::ptr::addr_of!(highlight_names[0]),
std::ptr::addr_of!(highlight_attrs[0]),
highlights.len() as u32,
)
};
let source_code = c_string("<script>\nconst a = b('c');\nc.d();\n</script>");
let js_scope = c_string("source.js");
let js_injection_regex = c_string("^javascript");
let language = get_language("javascript");
let lang_name = c_string("javascript");
let queries = get_language_queries_path("javascript");
let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
let locals_query = fs::read_to_string(queries.join("locals.scm")).unwrap();
unsafe {
c::ts_highlighter_add_language(
highlighter,
lang_name.as_ptr(),
js_scope.as_ptr(),
js_injection_regex.as_ptr(),
language,
highlights_query.as_ptr().cast::<c_char>(),
injections_query.as_ptr().cast::<c_char>(),
locals_query.as_ptr().cast::<c_char>(),
highlights_query.len() as u32,
injections_query.len() as u32,
locals_query.len() as u32,
);
}
let html_scope = c_string("text.html.basic");
let html_injection_regex = c_string("^html");
let language = get_language("html");
let lang_name = c_string("html");
let queries = get_language_queries_path("html");
let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
unsafe {
c::ts_highlighter_add_language(
highlighter,
lang_name.as_ptr(),
html_scope.as_ptr(),
html_injection_regex.as_ptr(),
language,
highlights_query.as_ptr().cast::<c_char>(),
injections_query.as_ptr().cast::<c_char>(),
ptr::null(),
highlights_query.len() as u32,
injections_query.len() as u32,
0,
);
}
let buffer = c::ts_highlight_buffer_new();
unsafe {
c::ts_highlighter_highlight(
highlighter,
html_scope.as_ptr(),
source_code.as_ptr(),
source_code.as_bytes().len() as u32,
buffer,
ptr::null_mut(),
);
}
let output_bytes = unsafe { c::ts_highlight_buffer_content(buffer) };
let output_line_offsets = unsafe { c::ts_highlight_buffer_line_offsets(buffer) };
let output_len = unsafe { c::ts_highlight_buffer_len(buffer) };
let output_line_count = unsafe { c::ts_highlight_buffer_line_count(buffer) };
let output_bytes = unsafe { slice::from_raw_parts(output_bytes, output_len as usize) };
let output_line_offsets =
unsafe { slice::from_raw_parts(output_line_offsets, output_line_count as usize) };
let mut lines = Vec::new();
for i in 0..(output_line_count as usize) {
let line_start = output_line_offsets[i] as usize;
let line_end = output_line_offsets
.get(i + 1)
.map_or(output_bytes.len(), |x| *x as usize);
lines.push(str::from_utf8(&output_bytes[line_start..line_end]).unwrap());
}
assert_eq!(
lines,
vec![
"&lt;<span class=tag>script</span>&gt;\n",
"<span class=keyword>const</span> a = <span class=function>b</span>(<span class=string>&#39;c&#39;</span>);\n",
"c.<span class=function>d</span>();\n",
"&lt;/<span class=tag>script</span>&gt;\n",
]
);
unsafe {
c::ts_highlighter_delete(highlighter);
c::ts_highlight_buffer_delete(buffer);
}
}
#[test]
fn test_highlighting_with_all_captures_applied() {
let source = "fn main(a: u32, b: u32) -> { let c = a + b; }";
let language = get_language("rust");
let highlights_query = indoc::indoc! {"
[
\"fn\"
\"let\"
] @keyword
(identifier) @variable
(function_item name: (identifier) @function)
(parameter pattern: (identifier) @variable.parameter)
(primitive_type) @type.builtin
\"=\" @operator
[ \"->\" \":\" \";\" ] @punctuation.delimiter
[ \"{\" \"}\" \"(\" \")\" ] @punctuation.bracket
"};
let mut rust_highlight_reverse =
HighlightConfiguration::new(language, "rust", highlights_query, "", "").unwrap();
rust_highlight_reverse.configure(&HIGHLIGHT_NAMES);
assert_eq!(
&to_token_vector(source, &rust_highlight_reverse).unwrap(),
&[[
("fn", vec!["keyword"]),
(" ", vec![]),
("main", vec!["function"]),
("(", vec!["punctuation.bracket"]),
("a", vec!["variable.parameter"]),
(":", vec!["punctuation.delimiter"]),
(" ", vec![]),
("u32", vec!["type.builtin"]),
(", ", vec![]),
("b", vec!["variable.parameter"]),
(":", vec!["punctuation.delimiter"]),
(" ", vec![]),
("u32", vec!["type.builtin"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("->", vec!["punctuation.delimiter"]),
(" ", vec![]),
("{", vec!["punctuation.bracket"]),
(" ", vec![]),
("let", vec!["keyword"]),
(" ", vec![]),
("c", vec!["variable"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("a", vec!["variable"]),
(" + ", vec![]),
("b", vec!["variable"]),
(";", vec!["punctuation.delimiter"]),
(" ", vec![]),
("}", vec!["punctuation.bracket"])
]],
);
}
#[test]
fn test_decode_utf8_lossy() {
use tree_sitter::LossyUtf8;
let parts = LossyUtf8::new(b"hi").collect::<Vec<_>>();
assert_eq!(parts, vec!["hi"]);
let parts = LossyUtf8::new(b"hi\xc0\xc1bye").collect::<Vec<_>>();
assert_eq!(parts, vec!["hi", "\u{fffd}", "\u{fffd}", "bye"]);
let parts = LossyUtf8::new(b"\xc0\xc1bye").collect::<Vec<_>>();
assert_eq!(parts, vec!["\u{fffd}", "\u{fffd}", "bye"]);
let parts = LossyUtf8::new(b"hello\xc0\xc1").collect::<Vec<_>>();
assert_eq!(parts, vec!["hello", "\u{fffd}", "\u{fffd}"]);
}
fn c_string(s: &str) -> CString {
CString::new(s.as_bytes().to_vec()).unwrap()
}
fn test_language_for_injection_string<'a>(string: &str) -> Option<&'a HighlightConfiguration> {
match string {
"javascript" => Some(&JS_HIGHLIGHT),
"html" => Some(&HTML_HIGHLIGHT),
"rust" => Some(&RUST_HIGHLIGHT),
"jsdoc" => Some(&JSDOC_HIGHLIGHT),
_ => None,
}
}
fn to_html<'a>(
src: &'a str,
language_config: &'a HighlightConfiguration,
) -> Result<Vec<String>, Error> {
let src = src.as_bytes();
let mut renderer = HtmlRenderer::new();
let mut highlighter = Highlighter::new();
let events = highlighter.highlight(
language_config,
src,
None,
&test_language_for_injection_string,
)?;
renderer.set_carriage_return_highlight(
HIGHLIGHT_NAMES
.iter()
.position(|s| s == "carriage-return")
.map(Highlight),
);
renderer
.render(events, src, &|highlight, output| {
output.extend(HTML_ATTRS[highlight.0].as_bytes());
})
.unwrap();
Ok(renderer
.lines()
.map(std::string::ToString::to_string)
.collect())
}
#[allow(clippy::type_complexity)]
fn to_token_vector<'a>(
src: &'a str,
language_config: &'a HighlightConfiguration,
) -> Result<Vec<Vec<(&'a str, Vec<&'static str>)>>, Error> {
let src = src.as_bytes();
let mut highlighter = Highlighter::new();
let mut lines = Vec::new();
let mut highlights = Vec::new();
let mut line = Vec::new();
let events = highlighter.highlight(
language_config,
src,
None,
&test_language_for_injection_string,
)?;
for event in events {
match event? {
HighlightEvent::HighlightStart(s) => highlights.push(HIGHLIGHT_NAMES[s.0].as_str()),
HighlightEvent::HighlightEnd => {
highlights.pop();
}
HighlightEvent::Source { start, end } => {
let s = str::from_utf8(&src[start..end]).unwrap();
for (i, l) in s.split('\n').enumerate() {
let l = l.trim_end_matches('\r');
if i > 0 {
lines.push(std::mem::take(&mut line));
}
if !l.is_empty() {
line.push((l, highlights.clone()));
}
}
}
}
}
if !line.is_empty() {
lines.push(line);
}
Ok(lines)
}

View file

@ -0,0 +1,199 @@
use tree_sitter::{self, Parser};
use super::helpers::fixtures::get_language;
#[test]
fn test_lookahead_iterator() {
let mut parser = Parser::new();
let language = get_language("rust");
parser.set_language(&language).unwrap();
let tree = parser.parse("struct Stuff {}", None).unwrap();
let mut cursor = tree.walk();
assert!(cursor.goto_first_child()); // struct
assert!(cursor.goto_first_child()); // struct keyword
let next_state = cursor.node().next_parse_state();
assert_ne!(next_state, 0);
assert_eq!(
next_state,
language.next_state(cursor.node().parse_state(), cursor.node().grammar_id())
);
assert!((next_state as usize) < language.parse_state_count());
assert!(cursor.goto_next_sibling()); // type_identifier
assert_eq!(next_state, cursor.node().parse_state());
assert_eq!(cursor.node().grammar_name(), "identifier");
assert_ne!(cursor.node().grammar_id(), cursor.node().kind_id());
let expected_symbols = ["//", "/*", "identifier", "line_comment", "block_comment"];
let mut lookahead = language.lookahead_iterator(next_state).unwrap();
assert_eq!(*lookahead.language(), language);
assert!(lookahead.iter_names().eq(expected_symbols));
lookahead.reset_state(next_state);
assert!(lookahead.iter_names().eq(expected_symbols));
lookahead.reset(&language, next_state);
assert!(lookahead
.map(|s| language.node_kind_for_id(s).unwrap())
.eq(expected_symbols));
}
#[test]
fn test_lookahead_iterator_modifiable_only_by_mut() {
let mut parser = Parser::new();
let language = get_language("rust");
parser.set_language(&language).unwrap();
let tree = parser.parse("struct Stuff {}", None).unwrap();
let mut cursor = tree.walk();
assert!(cursor.goto_first_child()); // struct
assert!(cursor.goto_first_child()); // struct keyword
let next_state = cursor.node().next_parse_state();
assert_ne!(next_state, 0);
let mut lookahead = language.lookahead_iterator(next_state).unwrap();
let _ = lookahead.next();
let mut names = lookahead.iter_names();
let _ = names.next();
}
#[test]
fn test_symbol_metadata_checks() {
let language = get_language("rust");
for i in 0..language.node_kind_count() {
let sym = i as u16;
let name = language.node_kind_for_id(sym).unwrap();
match name {
"_type"
| "_expression"
| "_pattern"
| "_literal"
| "_literal_pattern"
| "_declaration_statement" => assert!(language.node_kind_is_supertype(sym)),
"_raw_string_literal_start"
| "_raw_string_literal_end"
| "_line_doc_comment"
| "_error_sentinel" => assert!(!language.node_kind_is_supertype(sym)),
"enum_item" | "struct_item" | "type_item" => {
assert!(language.node_kind_is_named(sym));
}
"=>" | "[" | "]" | "(" | ")" | "{" | "}" => {
assert!(language.node_kind_is_visible(sym));
}
_ => {}
}
}
}
#[test]
fn test_supertypes() {
let language = get_language("rust");
let supertypes = language.supertypes();
if language.abi_version() < 15 {
return;
}
assert_eq!(supertypes.len(), 5);
assert_eq!(
supertypes
.iter()
.filter_map(|&s| language.node_kind_for_id(s))
.map(|s| s.to_string())
.collect::<Vec<String>>(),
vec![
"_expression",
"_literal",
"_literal_pattern",
"_pattern",
"_type"
]
);
for &supertype in supertypes {
let mut subtypes = language
.subtypes_for_supertype(supertype)
.iter()
.filter_map(|symbol| language.node_kind_for_id(*symbol))
.collect::<Vec<&str>>();
subtypes.sort_unstable();
subtypes.dedup();
match language.node_kind_for_id(supertype) {
Some("_literal") => {
assert_eq!(
subtypes,
&[
"boolean_literal",
"char_literal",
"float_literal",
"integer_literal",
"raw_string_literal",
"string_literal"
]
);
}
Some("_pattern") => {
assert_eq!(
subtypes,
&[
"_",
"_literal_pattern",
"captured_pattern",
"const_block",
"generic_pattern",
"identifier",
"macro_invocation",
"mut_pattern",
"or_pattern",
"range_pattern",
"ref_pattern",
"reference_pattern",
"remaining_field_pattern",
"scoped_identifier",
"slice_pattern",
"struct_pattern",
"tuple_pattern",
"tuple_struct_pattern",
]
);
}
Some("_type") => {
assert_eq!(
subtypes,
&[
"abstract_type",
"array_type",
"bounded_type",
"dynamic_type",
"function_type",
"generic_type",
"macro_invocation",
"metavariable",
"never_type",
"pointer_type",
"primitive_type",
"reference_type",
"removed_trait_bound",
"scoped_type_identifier",
"tuple_type",
"type_identifier",
"unit_type"
]
);
}
_ => {}
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,104 @@
// For some reasons `Command::spawn` doesn't work in CI env for many exotic arches.
#![cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))]
use std::{
env::VarError,
process::{Command, Stdio},
};
use tree_sitter::Parser;
use tree_sitter_generate::load_grammar_file;
use super::generate_parser;
use crate::tests::helpers::fixtures::{fixtures_dir, get_test_language};
// The `sanitizing` cfg is required to don't run tests under specific sunitizer
// because they don't work well with subprocesses _(it's an assumption)_.
//
// Below are two alternative examples of how to disable tests for some arches
// if a way with excluding the whole mod from compilation wouldn't work well.
//
// XXX: Also may be it makes sense to keep such tests as ignored by default
// to omit surprises and enable them on CI by passing an extra option explicitly:
//
// > cargo test -- --include-ignored
//
// #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))]
// #[cfg_attr(not(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing))), ignore)]
//
#[test]
fn test_grammar_that_should_hang_and_not_segfault() {
let parent_sleep_millis = 1000;
let test_name = "test_grammar_that_should_hang_and_not_segfault";
let test_var = "CARGO_HANG_TEST";
eprintln!(" {test_name}");
let tests_exec_path = std::env::args()
.next()
.expect("Failed to get tests executable path");
match std::env::var(test_var) {
Ok(v) if v == test_name => {
eprintln!(" child process id {}", std::process::id());
hang_test();
}
Err(VarError::NotPresent) => {
eprintln!(" parent process id {}", std::process::id());
let mut command = Command::new(tests_exec_path);
command.arg(test_name).env(test_var, test_name);
if std::env::args().any(|x| x == "--nocapture") {
command.arg("--nocapture");
} else {
command.stdout(Stdio::null()).stderr(Stdio::null());
}
match command.spawn() {
Ok(mut child) => {
std::thread::sleep(std::time::Duration::from_millis(parent_sleep_millis));
match child.try_wait() {
Ok(Some(status)) if status.success() => {
panic!("Child didn't hang and exited successfully")
}
Ok(Some(status)) => panic!(
"Child didn't hang and exited with status code: {:?}",
status.code()
),
_ => (),
}
if let Err(e) = child.kill() {
eprintln!(
"Failed to kill hang test's process id: {}, error: {e}",
child.id()
);
}
}
Err(e) => panic!("{e}"),
}
}
Err(e) => panic!("Env var error: {e}"),
_ => unreachable!(),
}
}
fn hang_test() {
let test_grammar_dir = fixtures_dir()
.join("test_grammars")
.join("get_col_should_hang_not_crash");
let grammar_json = load_grammar_file(&test_grammar_dir.join("grammar.js"), None).unwrap();
let (parser_name, parser_code) = generate_parser(grammar_json.as_str()).unwrap();
let language = get_test_language(&parser_name, &parser_code, Some(test_grammar_dir.as_path()));
let mut parser = Parser::new();
parser.set_language(&language).unwrap();
let code_that_should_hang = "\nHello";
parser.parse(code_that_should_hang, None).unwrap();
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,15 @@
use tree_sitter::Parser;
use super::helpers::{allocations, fixtures::get_language};
#[test]
fn test_pathological_example_1() {
let language = "cpp";
let source = r#"*ss<s"ss<sqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<qssqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<sqss<sqss<s._<s<sq>(qqX<sqss<s.ss<sqsssq<(qss<sq&=ss<s<sqss<s._<s<sq<(qqX<sqss<s.ss<sqs"#;
allocations::record(|| {
let mut parser = Parser::new();
parser.set_language(&get_language(language)).unwrap();
parser.parse(source, None).unwrap();
});
}

View file

@ -0,0 +1,18 @@
[package]
name = "tree-sitter-tests-proc-macro"
version = "0.0.0"
edition.workspace = true
rust-version.workspace = true
publish = false
[lints]
workspace = true
[lib]
proc-macro = true
[dependencies]
proc-macro2 = "1.0.93"
quote = "1.0.38"
rand = "0.8.5"
syn = { version = "2.0.96", features = ["full"] }

View file

@ -0,0 +1,135 @@
use proc_macro::TokenStream;
use proc_macro2::Span;
use quote::quote;
use syn::{
parse::{Parse, ParseStream},
parse_macro_input, Error, Expr, Ident, ItemFn, LitInt, Token,
};
#[proc_macro_attribute]
pub fn retry(args: TokenStream, input: TokenStream) -> TokenStream {
let count = parse_macro_input!(args as LitInt);
let input = parse_macro_input!(input as ItemFn);
let attrs = &input.attrs;
let name = &input.sig.ident;
TokenStream::from(quote! {
#(#attrs),*
fn #name() {
#input
for i in 0..=#count {
let result = std::panic::catch_unwind(|| {
#name();
});
if result.is_ok() {
return;
}
if i == #count {
std::panic::resume_unwind(result.unwrap_err());
}
}
}
})
}
#[proc_macro_attribute]
pub fn test_with_seed(args: TokenStream, input: TokenStream) -> TokenStream {
struct Args {
retry: LitInt,
seed: Expr,
seed_fn: Option<Ident>,
}
impl Parse for Args {
fn parse(input: ParseStream) -> syn::Result<Self> {
let mut retry = None;
let mut seed = None;
let mut seed_fn = None;
while !input.is_empty() {
let name = input.parse::<Ident>()?;
match name.to_string().as_str() {
"retry" => {
input.parse::<Token![=]>()?;
retry.replace(input.parse()?);
}
"seed" => {
input.parse::<Token![=]>()?;
seed.replace(input.parse()?);
}
"seed_fn" => {
input.parse::<Token![=]>()?;
seed_fn.replace(input.parse()?);
}
x => {
return Err(Error::new(
name.span(),
format!("Unsupported parameter `{x}`"),
))
}
}
if !input.is_empty() {
input.parse::<Token![,]>()?;
}
}
if retry.is_none() {
retry.replace(LitInt::new("0", Span::mixed_site()));
}
Ok(Self {
retry: retry.expect("`retry` parameter is required"),
seed: seed.expect("`seed` parameter is required"),
seed_fn,
})
}
}
let Args {
retry,
seed,
seed_fn,
} = parse_macro_input!(args as Args);
let seed_fn = seed_fn.iter();
let func = parse_macro_input!(input as ItemFn);
let attrs = &func.attrs;
let name = &func.sig.ident;
TokenStream::from(quote! {
#[test]
#(#attrs),*
fn #name() {
#func
let mut seed = #seed;
for i in 0..=#retry {
let result = std::panic::catch_unwind(|| {
#name(seed);
});
if result.is_ok() {
return;
}
if i == #retry {
std::panic::resume_unwind(result.unwrap_err());
}
#(
seed = #seed_fn();
)*
if i < #retry {
println!("\nRetry {}/{} with a new seed {}", i + 1, #retry, seed);
}
}
}
})
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,448 @@
use std::{
ffi::{CStr, CString},
fs, ptr, slice, str,
};
use tree_sitter::Point;
use tree_sitter_tags::{c_lib as c, Error, TagsConfiguration, TagsContext};
use super::helpers::{
allocations,
fixtures::{get_language, get_language_queries_path},
};
const PYTHON_TAG_QUERY: &str = r#"
(
(function_definition
name: (identifier) @name
body: (block . (expression_statement (string) @doc))) @definition.function
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
)
(function_definition
name: (identifier) @name) @definition.function
(
(class_definition
name: (identifier) @name
body: (block
. (expression_statement (string) @doc))) @definition.class
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
)
(class_definition
name: (identifier) @name) @definition.class
(call
function: (identifier) @name) @reference.call
(call
function: (attribute
attribute: (identifier) @name)) @reference.call
"#;
const JS_TAG_QUERY: &str = r#"
(
(comment)* @doc .
(class_declaration
name: (identifier) @name) @definition.class
(#select-adjacent! @doc @definition.class)
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
)
(
(comment)* @doc .
(method_definition
name: (property_identifier) @name) @definition.method
(#select-adjacent! @doc @definition.method)
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
)
(
(comment)* @doc .
(function_declaration
name: (identifier) @name) @definition.function
(#select-adjacent! @doc @definition.function)
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
)
(call_expression
function: (identifier) @name) @reference.call
"#;
const RUBY_TAG_QUERY: &str = r"
(method
name: (_) @name) @definition.method
(call
method: (identifier) @name) @reference.call
(setter (identifier) @ignore)
((identifier) @name @reference.call
(#is-not? local))
";
#[test]
fn test_tags_python() {
let language = get_language("python");
let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
let mut tag_context = TagsContext::new();
let source = br#"
class Customer:
"""
Data about a customer
"""
def age(self):
'''
Get the customer's age
'''
compute_age(self.id)
}
"#;
let tags = tag_context
.generate_tags(&tags_config, source, None)
.unwrap()
.0
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
tags.iter()
.map(|t| (
substr(source, &t.name_range),
tags_config.syntax_type_name(t.syntax_type_id)
))
.collect::<Vec<_>>(),
&[
("Customer", "class"),
("age", "function"),
("compute_age", "call"),
]
);
assert_eq!(substr(source, &tags[0].line_range), "class Customer:");
assert_eq!(substr(source, &tags[1].line_range), "def age(self):");
assert_eq!(tags[0].docs.as_ref().unwrap(), "Data about a customer");
assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");
}
#[test]
fn test_tags_javascript() {
let language = get_language("javascript");
let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap();
let source = br"
// hi
// Data about a customer.
// bla bla bla
class Customer {
/*
* Get the customer's age
*/
getAge() {
}
}
// ok
class Agent {
}
";
let mut tag_context = TagsContext::new();
let tags = tag_context
.generate_tags(&tags_config, source, None)
.unwrap()
.0
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
tags.iter()
.map(|t| (
substr(source, &t.name_range),
t.span.clone(),
tags_config.syntax_type_name(t.syntax_type_id)
))
.collect::<Vec<_>>(),
&[
("Customer", Point::new(5, 10)..Point::new(5, 18), "class",),
("getAge", Point::new(9, 8)..Point::new(9, 14), "method",),
("Agent", Point::new(15, 10)..Point::new(15, 15), "class",)
]
);
assert_eq!(
tags[0].docs.as_ref().unwrap(),
"Data about a customer.\nbla bla bla"
);
assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");
assert_eq!(tags[2].docs, None);
}
#[test]
fn test_tags_columns_measured_in_utf16_code_units() {
let language = get_language("python");
let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
let mut tag_context = TagsContext::new();
let source = r#""❤️❤️❤️".hello_α_ω()"#.as_bytes();
let tag = tag_context
.generate_tags(&tags_config, source, None)
.unwrap()
.0
.next()
.unwrap()
.unwrap();
assert_eq!(substr(source, &tag.name_range), "hello_α");
assert_eq!(tag.span, Point::new(0, 21)..Point::new(0, 32));
assert_eq!(tag.utf16_column_range, 9..18);
}
#[test]
fn test_tags_ruby() {
let language = get_language("ruby");
let locals_query =
fs::read_to_string(get_language_queries_path("ruby").join("locals.scm")).unwrap();
let tags_config = TagsConfiguration::new(language, RUBY_TAG_QUERY, &locals_query).unwrap();
let source = strip_whitespace(
8,
"
b = 1
def foo=()
c = 1
# a is a method because it is not in scope
# b is a method because `b` doesn't capture variables from its containing scope
bar a, b, c
[1, 2, 3].each do |a|
# a is a parameter
# b is a method
# c is a variable, because the block captures variables from its containing scope.
baz a, b, c
end
end",
);
let mut tag_context = TagsContext::new();
let tags = tag_context
.generate_tags(&tags_config, source.as_bytes(), None)
.unwrap()
.0
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
tags.iter()
.map(|t| (
substr(source.as_bytes(), &t.name_range),
tags_config.syntax_type_name(t.syntax_type_id),
(t.span.start.row, t.span.start.column),
))
.collect::<Vec<_>>(),
&[
("foo=", "method", (2, 4)),
("bar", "call", (7, 4)),
("a", "call", (7, 8)),
("b", "call", (7, 11)),
("each", "call", (9, 14)),
("baz", "call", (13, 8)),
("b", "call", (13, 15),),
]
);
}
#[test]
fn test_tags_cancellation() {
use std::sync::atomic::{AtomicUsize, Ordering};
allocations::record(|| {
// Large javascript document
let source = (0..500)
.map(|_| "/* hi */ class A { /* ok */ b() {} }\n")
.collect::<String>();
let cancellation_flag = AtomicUsize::new(0);
let language = get_language("javascript");
let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap();
let mut tag_context = TagsContext::new();
let tags = tag_context
.generate_tags(&tags_config, source.as_bytes(), Some(&cancellation_flag))
.unwrap();
for (i, tag) in tags.0.enumerate() {
if i == 150 {
cancellation_flag.store(1, Ordering::SeqCst);
}
if let Err(e) = tag {
assert_eq!(e, Error::Cancelled);
return;
}
}
panic!("Expected to halt tagging with an error");
});
}
#[test]
fn test_invalid_capture() {
let language = get_language("python");
let e = TagsConfiguration::new(language, "(identifier) @method", "")
.expect_err("expected InvalidCapture error");
assert_eq!(e, Error::InvalidCapture("method".to_string()));
}
#[test]
fn test_tags_with_parse_error() {
let language = get_language("python");
let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
let mut tag_context = TagsContext::new();
let source = br"
class Fine: pass
class Bad
";
let (tags, failed) = tag_context
.generate_tags(&tags_config, source, None)
.unwrap();
let newtags = tags.collect::<Result<Vec<_>, _>>().unwrap();
assert!(failed, "syntax error should have been detected");
assert_eq!(
newtags
.iter()
.map(|t| (
substr(source, &t.name_range),
tags_config.syntax_type_name(t.syntax_type_id)
))
.collect::<Vec<_>>(),
&[("Fine", "class"),]
);
}
#[test]
fn test_tags_via_c_api() {
allocations::record(|| {
let tagger = c::ts_tagger_new();
let buffer = c::ts_tags_buffer_new();
let scope_name = "source.js";
let language = get_language("javascript");
let source_code = strip_whitespace(
12,
"
var a = 1;
// one
// two
// three
function b() {
}
// four
// five
class C extends D {
}
b(a);",
);
let c_scope_name = CString::new(scope_name).unwrap();
let result = unsafe {
c::ts_tagger_add_language(
tagger,
c_scope_name.as_ptr(),
language,
JS_TAG_QUERY.as_ptr(),
ptr::null(),
JS_TAG_QUERY.len() as u32,
0,
)
};
assert_eq!(result, c::TSTagsError::Ok);
let result = unsafe {
c::ts_tagger_tag(
tagger,
c_scope_name.as_ptr(),
source_code.as_ptr(),
source_code.len() as u32,
buffer,
ptr::null(),
)
};
assert_eq!(result, c::TSTagsError::Ok);
let tags = unsafe {
slice::from_raw_parts(
c::ts_tags_buffer_tags(buffer),
c::ts_tags_buffer_tags_len(buffer) as usize,
)
};
let docs = str::from_utf8(unsafe {
slice::from_raw_parts(
c::ts_tags_buffer_docs(buffer).cast::<u8>(),
c::ts_tags_buffer_docs_len(buffer) as usize,
)
})
.unwrap();
let syntax_types = unsafe {
let mut len = 0;
let ptr = c::ts_tagger_syntax_kinds_for_scope_name(
tagger,
c_scope_name.as_ptr(),
&raw mut len,
);
slice::from_raw_parts(ptr, len as usize)
.iter()
.map(|i| CStr::from_ptr(*i).to_str().unwrap())
.collect::<Vec<_>>()
};
assert_eq!(
tags.iter()
.map(|tag| (
syntax_types[tag.syntax_type_id as usize],
&source_code[tag.name_start_byte as usize..tag.name_end_byte as usize],
&source_code[tag.line_start_byte as usize..tag.line_end_byte as usize],
&docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize],
))
.collect::<Vec<_>>(),
&[
("function", "b", "function b() {", "one\ntwo\nthree"),
("class", "C", "class C extends D {", "four\nfive"),
("call", "b", "b(a);", "")
]
);
unsafe {
c::ts_tags_buffer_delete(buffer);
c::ts_tagger_delete(tagger);
}
});
}
fn substr<'a>(source: &'a [u8], range: &std::ops::Range<usize>) -> &'a str {
std::str::from_utf8(&source[range.clone()]).unwrap()
}
fn strip_whitespace(indent: usize, s: &str) -> String {
s.lines()
.skip(1)
.map(|line| &line[line.len().min(indent)..])
.collect::<Vec<_>>()
.join("\n")
}

View file

@ -0,0 +1,70 @@
use tree_sitter::Parser;
use tree_sitter_highlight::{Highlight, Highlighter};
use super::helpers::fixtures::{get_highlight_config, get_language, test_loader};
use crate::{
query_testing::{parse_position_comments, Assertion, Utf8Point},
test_highlight::get_highlight_positions,
};
#[test]
fn test_highlight_test_with_basic_test() {
let language = get_language("javascript");
let config = get_highlight_config(
"javascript",
Some("injections.scm"),
&[
"function".to_string(),
"variable".to_string(),
"keyword".to_string(),
],
);
let source = [
"// hi",
"var abc = function(d) {",
" // ^ function",
" // ^^^ keyword",
" return d + e;",
" // ^ variable",
" // ^ !variable",
"};",
"var y̆y̆y̆y̆ = function() {}",
" // ^ function",
" // ^ keyword",
]
.join("\n");
let assertions =
parse_position_comments(&mut Parser::new(), &language, source.as_bytes()).unwrap();
assert_eq!(
assertions,
&[
Assertion::new(1, 5, 1, false, String::from("function")),
Assertion::new(1, 11, 3, false, String::from("keyword")),
Assertion::new(4, 9, 1, false, String::from("variable")),
Assertion::new(4, 11, 1, true, String::from("variable")),
Assertion::new(8, 5, 1, false, String::from("function")),
Assertion::new(8, 11, 1, false, String::from("keyword")),
]
);
let mut highlighter = Highlighter::new();
let highlight_positions =
get_highlight_positions(test_loader(), &mut highlighter, &config, source.as_bytes())
.unwrap();
assert_eq!(
highlight_positions,
&[
(Utf8Point::new(1, 0), Utf8Point::new(1, 3), Highlight(2)), // "var"
(Utf8Point::new(1, 4), Utf8Point::new(1, 7), Highlight(0)), // "abc"
(Utf8Point::new(1, 10), Utf8Point::new(1, 18), Highlight(2)), // "function"
(Utf8Point::new(1, 19), Utf8Point::new(1, 20), Highlight(1)), // "d"
(Utf8Point::new(4, 2), Utf8Point::new(4, 8), Highlight(2)), // "return"
(Utf8Point::new(4, 9), Utf8Point::new(4, 10), Highlight(1)), // "d"
(Utf8Point::new(4, 13), Utf8Point::new(4, 14), Highlight(1)), // "e"
(Utf8Point::new(8, 0), Utf8Point::new(8, 3), Highlight(2)), // "var"
(Utf8Point::new(8, 4), Utf8Point::new(8, 8), Highlight(0)), // "y̆y̆y̆y̆"
(Utf8Point::new(8, 11), Utf8Point::new(8, 19), Highlight(2)), // "function"
]
);
}

View file

@ -0,0 +1,62 @@
use tree_sitter::Parser;
use tree_sitter_tags::TagsContext;
use super::helpers::fixtures::{get_language, get_tags_config};
use crate::{
query_testing::{parse_position_comments, Assertion, Utf8Point},
test_tags::get_tag_positions,
};
#[test]
fn test_tags_test_with_basic_test() {
let language = get_language("python");
let config = get_tags_config("python");
let source = [
"# hi",
"def abc(d):",
" # <- definition.function",
" e = fgh(d)",
" # ^ reference.call",
" return d(e)",
" # ^ reference.call",
" # ^ !variable.parameter",
"",
]
.join("\n");
let assertions =
parse_position_comments(&mut Parser::new(), &language, source.as_bytes()).unwrap();
assert_eq!(
assertions,
&[
Assertion::new(1, 4, 1, false, String::from("definition.function")),
Assertion::new(3, 9, 1, false, String::from("reference.call")),
Assertion::new(5, 11, 1, false, String::from("reference.call")),
Assertion::new(5, 13, 1, true, String::from("variable.parameter")),
]
);
let mut tags_context = TagsContext::new();
let tag_positions = get_tag_positions(&mut tags_context, &config, source.as_bytes()).unwrap();
assert_eq!(
tag_positions,
&[
(
Utf8Point::new(1, 4),
Utf8Point::new(1, 7),
"definition.function".to_string()
),
(
Utf8Point::new(3, 8),
Utf8Point::new(3, 11),
"reference.call".to_string()
),
(
Utf8Point::new(5, 11),
Utf8Point::new(5, 12),
"reference.call".to_string()
),
]
);
}

View file

@ -0,0 +1,174 @@
use std::{iter, sync::Arc};
use streaming_iterator::StreamingIterator;
use tree_sitter::{Language, Node, Parser, Point, Query, QueryCursor, TextProvider, Tree};
use crate::tests::helpers::fixtures::get_language;
fn parse_text(text: impl AsRef<[u8]>) -> (Tree, Language) {
let language = get_language("c");
let mut parser = Parser::new();
parser.set_language(&language).unwrap();
(parser.parse(text, None).unwrap(), language)
}
fn parse_text_with<T, F>(callback: &mut F) -> (Tree, Language)
where
T: AsRef<[u8]>,
F: FnMut(usize, Point) -> T,
{
let language = get_language("c");
let mut parser = Parser::new();
parser.set_language(&language).unwrap();
let tree = parser.parse_with_options(callback, None, None).unwrap();
// eprintln!("{}", tree.clone().root_node().to_sexp());
assert_eq!("comment", tree.root_node().child(0).unwrap().kind());
(tree, language)
}
fn tree_query<I: AsRef<[u8]>>(tree: &Tree, text: impl TextProvider<I>, language: &Language) {
let query = Query::new(language, "((comment) @c (#eq? @c \"// comment\"))").unwrap();
let mut cursor = QueryCursor::new();
let mut captures = cursor.captures(&query, tree.root_node(), text);
let (match_, idx) = captures.next().unwrap();
let capture = match_.captures[*idx];
assert_eq!(capture.index as usize, *idx);
assert_eq!("comment", capture.node.kind());
}
fn check_parsing<I: AsRef<[u8]>>(
parser_text: impl AsRef<[u8]>,
text_provider: impl TextProvider<I>,
) {
let (tree, language) = parse_text(parser_text);
tree_query(&tree, text_provider, &language);
}
fn check_parsing_callback<T, F, I: AsRef<[u8]>>(
parser_callback: &mut F,
text_provider: impl TextProvider<I>,
) where
T: AsRef<[u8]>,
F: FnMut(usize, Point) -> T,
{
let (tree, language) = parse_text_with(parser_callback);
tree_query(&tree, text_provider, &language);
}
#[test]
fn test_text_provider_for_str_slice() {
let text: &str = "// comment";
check_parsing(text, text.as_bytes());
check_parsing(text.as_bytes(), text.as_bytes());
}
#[test]
fn test_text_provider_for_string() {
let text: String = "// comment".to_owned();
check_parsing(text.clone(), text.as_bytes());
check_parsing(text.as_bytes(), text.as_bytes());
check_parsing(<_ as AsRef<[u8]>>::as_ref(&text), text.as_bytes());
}
#[test]
fn test_text_provider_for_box_of_str_slice() {
let text = "// comment".to_owned().into_boxed_str();
check_parsing(text.as_bytes(), text.as_bytes());
check_parsing(<_ as AsRef<str>>::as_ref(&text), text.as_bytes());
check_parsing(text.as_ref(), text.as_ref().as_bytes());
check_parsing(text.as_ref(), text.as_bytes());
}
#[test]
fn test_text_provider_for_box_of_bytes_slice() {
let text = "// comment".to_owned().into_boxed_str().into_boxed_bytes();
check_parsing(text.as_ref(), text.as_ref());
check_parsing(text.as_ref(), &*text);
check_parsing(&*text, &*text);
}
#[test]
fn test_text_provider_for_vec_of_bytes() {
let text = "// comment".to_owned().into_bytes();
check_parsing(&*text, &*text);
}
#[test]
fn test_text_provider_for_arc_of_bytes_slice() {
let text: Arc<[u8]> = Arc::from("// comment".to_owned().into_bytes());
check_parsing(&*text, &*text);
check_parsing(text.as_ref(), text.as_ref());
check_parsing(text.clone(), text.as_ref());
}
#[test]
fn test_text_provider_callback_with_str_slice() {
let text: &str = "// comment";
check_parsing(text, |_node: Node<'_>| iter::once(text));
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then_some(text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| iter::once(text),
);
}
#[test]
fn test_text_provider_callback_with_owned_string_slice() {
let text: &str = "// comment";
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then_some(text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| {
let slice: String = text.to_owned();
iter::once(slice)
},
);
}
#[test]
fn test_text_provider_callback_with_owned_bytes_vec_slice() {
let text: &str = "// comment";
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then_some(text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| {
let slice = text.to_owned().into_bytes();
iter::once(slice)
},
);
}
#[test]
fn test_text_provider_callback_with_owned_arc_of_bytes_slice() {
let text: &str = "// comment";
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then_some(text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| {
let slice: Arc<[u8]> = text.to_owned().into_bytes().into();
iter::once(slice)
},
);
}

View file

@ -0,0 +1,797 @@
use std::str;
use tree_sitter::{InputEdit, Parser, Point, Range, Tree};
use super::helpers::fixtures::get_language;
use crate::{
fuzz::edits::Edit,
parse::perform_edit,
tests::{helpers::fixtures::get_test_fixture_language, invert_edit},
};
#[test]
fn test_tree_edit() {
let mut parser = Parser::new();
parser.set_language(&get_language("javascript")).unwrap();
let tree = parser.parse(" abc !== def", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (expression_statement (binary_expression left: (identifier) right: (identifier))))"
);
// edit entirely within the tree's padding:
// resize the padding of the tree and its leftmost descendants.
{
let mut tree = tree.clone();
tree.edit(&InputEdit {
start_byte: 1,
old_end_byte: 1,
new_end_byte: 2,
start_position: Point::new(0, 1),
old_end_position: Point::new(0, 1),
new_end_position: Point::new(0, 2),
});
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
let child1 = expr.child(0).unwrap();
let child2 = expr.child(1).unwrap();
assert!(expr.has_changes());
assert_eq!(expr.start_byte(), 3);
assert_eq!(expr.end_byte(), 16);
assert!(child1.has_changes());
assert_eq!(child1.start_byte(), 3);
assert_eq!(child1.end_byte(), 6);
assert!(!child2.has_changes());
assert_eq!(child2.start_byte(), 8);
assert_eq!(child2.end_byte(), 11);
}
// edit starting in the tree's padding but extending into its content:
// shrink the content to compensate for the expanded padding.
{
let mut tree = tree.clone();
tree.edit(&InputEdit {
start_byte: 1,
old_end_byte: 4,
new_end_byte: 5,
start_position: Point::new(0, 1),
old_end_position: Point::new(0, 5),
new_end_position: Point::new(0, 5),
});
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
let child1 = expr.child(0).unwrap();
let child2 = expr.child(1).unwrap();
assert!(expr.has_changes());
assert_eq!(expr.start_byte(), 5);
assert_eq!(expr.end_byte(), 16);
assert!(child1.has_changes());
assert_eq!(child1.start_byte(), 5);
assert_eq!(child1.end_byte(), 6);
assert!(!child2.has_changes());
assert_eq!(child2.start_byte(), 8);
assert_eq!(child2.end_byte(), 11);
}
// insertion at the edge of a tree's padding:
// expand the tree's padding.
{
let mut tree = tree.clone();
tree.edit(&InputEdit {
start_byte: 2,
old_end_byte: 2,
new_end_byte: 4,
start_position: Point::new(0, 2),
old_end_position: Point::new(0, 2),
new_end_position: Point::new(0, 4),
});
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
let child1 = expr.child(0).unwrap();
let child2 = expr.child(1).unwrap();
assert!(expr.has_changes());
assert_eq!(expr.byte_range(), 4..17);
assert!(child1.has_changes());
assert_eq!(child1.byte_range(), 4..7);
assert!(!child2.has_changes());
assert_eq!(child2.byte_range(), 9..12);
}
// replacement starting at the edge of the tree's padding:
// resize the content and not the padding.
{
let mut tree = tree.clone();
tree.edit(&InputEdit {
start_byte: 2,
old_end_byte: 2,
new_end_byte: 4,
start_position: Point::new(0, 2),
old_end_position: Point::new(0, 2),
new_end_position: Point::new(0, 4),
});
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
let child1 = expr.child(0).unwrap();
let child2 = expr.child(1).unwrap();
assert!(expr.has_changes());
assert_eq!(expr.byte_range(), 4..17);
assert!(child1.has_changes());
assert_eq!(child1.byte_range(), 4..7);
assert!(!child2.has_changes());
assert_eq!(child2.byte_range(), 9..12);
}
// deletion that spans more than one child node:
// shrink subsequent child nodes.
{
let mut tree = tree.clone();
tree.edit(&InputEdit {
start_byte: 1,
old_end_byte: 11,
new_end_byte: 4,
start_position: Point::new(0, 1),
old_end_position: Point::new(0, 11),
new_end_position: Point::new(0, 4),
});
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
let child1 = expr.child(0).unwrap();
let child2 = expr.child(1).unwrap();
let child3 = expr.child(2).unwrap();
assert!(expr.has_changes());
assert_eq!(expr.byte_range(), 4..8);
assert!(child1.has_changes());
assert_eq!(child1.byte_range(), 4..4);
assert!(child2.has_changes());
assert_eq!(child2.byte_range(), 4..4);
assert!(child3.has_changes());
assert_eq!(child3.byte_range(), 5..8);
}
// insertion at the end of the tree:
// extend the tree's content.
{
let mut tree = tree.clone();
tree.edit(&InputEdit {
start_byte: 15,
old_end_byte: 15,
new_end_byte: 16,
start_position: Point::new(0, 15),
old_end_position: Point::new(0, 15),
new_end_position: Point::new(0, 16),
});
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
let child1 = expr.child(0).unwrap();
let child2 = expr.child(1).unwrap();
let child3 = expr.child(2).unwrap();
assert!(expr.has_changes());
assert_eq!(expr.byte_range(), 2..16);
assert!(!child1.has_changes());
assert_eq!(child1.byte_range(), 2..5);
assert!(!child2.has_changes());
assert_eq!(child2.byte_range(), 7..10);
assert!(child3.has_changes());
assert_eq!(child3.byte_range(), 12..16);
}
// replacement that starts within a token and extends beyond the end of the tree:
// resize the token and empty out any subsequent child nodes.
{
let mut tree = tree.clone();
tree.edit(&InputEdit {
start_byte: 3,
old_end_byte: 90,
new_end_byte: 4,
start_position: Point::new(0, 3),
old_end_position: Point::new(0, 90),
new_end_position: Point::new(0, 4),
});
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
let child1 = expr.child(0).unwrap();
let child2 = expr.child(1).unwrap();
let child3 = expr.child(2).unwrap();
assert_eq!(expr.byte_range(), 2..4);
assert!(expr.has_changes());
assert_eq!(child1.byte_range(), 2..4);
assert!(child1.has_changes());
assert_eq!(child2.byte_range(), 4..4);
assert!(child2.has_changes());
assert_eq!(child3.byte_range(), 4..4);
assert!(child3.has_changes());
}
// replacement that starts in whitespace and extends beyond the end of the tree:
// shift the token's start position and empty out its content.
{
let mut tree = tree;
tree.edit(&InputEdit {
start_byte: 6,
old_end_byte: 90,
new_end_byte: 8,
start_position: Point::new(0, 6),
old_end_position: Point::new(0, 90),
new_end_position: Point::new(0, 8),
});
let expr = tree.root_node().child(0).unwrap().child(0).unwrap();
let child1 = expr.child(0).unwrap();
let child2 = expr.child(1).unwrap();
let child3 = expr.child(2).unwrap();
assert_eq!(expr.byte_range(), 2..8);
assert!(expr.has_changes());
assert_eq!(child1.byte_range(), 2..5);
assert!(!child1.has_changes());
assert_eq!(child2.byte_range(), 8..8);
assert!(child2.has_changes());
assert_eq!(child3.byte_range(), 8..8);
assert!(child3.has_changes());
}
}
#[test]
fn test_tree_edit_with_included_ranges() {
let mut parser = Parser::new();
parser.set_language(&get_language("html")).unwrap();
let source = "<div><% if a %><span>a</span><% else %><span>b</span><% end %></div>";
let ranges = [0..5, 15..29, 39..53, 62..68];
parser
.set_included_ranges(
&ranges
.iter()
.map(|range| Range {
start_byte: range.start,
end_byte: range.end,
start_point: Point::new(0, range.start),
end_point: Point::new(0, range.end),
})
.collect::<Vec<_>>(),
)
.unwrap();
let mut tree = parser.parse(source, None).unwrap();
tree.edit(&InputEdit {
start_byte: 29,
old_end_byte: 53,
new_end_byte: 29,
start_position: Point::new(0, 29),
old_end_position: Point::new(0, 53),
new_end_position: Point::new(0, 29),
});
assert_eq!(
tree.included_ranges(),
&[
Range {
start_byte: 0,
end_byte: 5,
start_point: Point::new(0, 0),
end_point: Point::new(0, 5),
},
Range {
start_byte: 15,
end_byte: 29,
start_point: Point::new(0, 15),
end_point: Point::new(0, 29),
},
Range {
start_byte: 29,
end_byte: 29,
start_point: Point::new(0, 29),
end_point: Point::new(0, 29),
},
Range {
start_byte: 38,
end_byte: 44,
start_point: Point::new(0, 38),
end_point: Point::new(0, 44),
}
]
);
}
#[test]
fn test_tree_cursor() {
let mut parser = Parser::new();
parser.set_language(&get_language("rust")).unwrap();
let tree = parser
.parse(
"
struct Stuff {
a: A,
b: Option<B>,
}
",
None,
)
.unwrap();
let mut cursor = tree.walk();
assert_eq!(cursor.node().kind(), "source_file");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "struct_item");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "struct");
assert!(!cursor.node().is_named());
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "type_identifier");
assert!(cursor.node().is_named());
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "field_declaration_list");
assert!(cursor.node().is_named());
assert!(cursor.goto_last_child());
assert_eq!(cursor.node().kind(), "}");
assert!(!cursor.node().is_named());
assert_eq!(cursor.node().start_position(), Point { row: 4, column: 16 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), ",");
assert!(!cursor.node().is_named());
assert_eq!(cursor.node().start_position(), Point { row: 3, column: 32 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "field_declaration");
assert!(cursor.node().is_named());
assert_eq!(cursor.node().start_position(), Point { row: 3, column: 20 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), ",");
assert!(!cursor.node().is_named());
assert_eq!(cursor.node().start_position(), Point { row: 2, column: 24 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "field_declaration");
assert!(cursor.node().is_named());
assert_eq!(cursor.node().start_position(), Point { row: 2, column: 20 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "{");
assert!(!cursor.node().is_named());
assert_eq!(cursor.node().start_position(), Point { row: 1, column: 29 });
let mut copy = tree.walk();
copy.reset_to(&cursor);
assert_eq!(copy.node().kind(), "{");
assert!(!copy.node().is_named());
assert!(copy.goto_parent());
assert_eq!(copy.node().kind(), "field_declaration_list");
assert!(copy.node().is_named());
assert!(copy.goto_parent());
assert_eq!(copy.node().kind(), "struct_item");
}
#[test]
fn test_tree_cursor_previous_sibling_with_aliases() {
let mut parser = Parser::new();
parser
.set_language(&get_test_fixture_language("aliases_in_root"))
.unwrap();
let text = "# comment\n# \nfoo foo";
let tree = parser.parse(text, None).unwrap();
let mut cursor = tree.walk();
assert_eq!(cursor.node().kind(), "document");
cursor.goto_first_child();
assert_eq!(cursor.node().kind(), "comment");
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "comment");
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "bar");
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "comment");
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "comment");
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "comment");
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "bar");
}
#[test]
fn test_tree_cursor_previous_sibling() {
let mut parser = Parser::new();
parser.set_language(&get_language("rust")).unwrap();
let text = "
// Hi there
// This is fun!
// Another one!
";
let tree = parser.parse(text, None).unwrap();
let mut cursor = tree.walk();
assert_eq!(cursor.node().kind(), "source_file");
assert!(cursor.goto_last_child());
assert_eq!(cursor.node().kind(), "line_comment");
assert_eq!(
cursor.node().utf8_text(text.as_bytes()).unwrap(),
"// Another one!"
);
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "line_comment");
assert_eq!(
cursor.node().utf8_text(text.as_bytes()).unwrap(),
"// This is fun!"
);
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "line_comment");
assert_eq!(
cursor.node().utf8_text(text.as_bytes()).unwrap(),
"// Hi there"
);
assert!(!cursor.goto_previous_sibling());
}
#[test]
fn test_tree_cursor_fields() {
let mut parser = Parser::new();
parser.set_language(&get_language("javascript")).unwrap();
let tree = parser
.parse("function /*1*/ bar /*2*/ () {}", None)
.unwrap();
let mut cursor = tree.walk();
assert_eq!(cursor.node().kind(), "program");
cursor.goto_first_child();
assert_eq!(cursor.node().kind(), "function_declaration");
assert_eq!(cursor.field_name(), None);
cursor.goto_first_child();
assert_eq!(cursor.node().kind(), "function");
assert_eq!(cursor.field_name(), None);
cursor.goto_next_sibling();
assert_eq!(cursor.node().kind(), "comment");
assert_eq!(cursor.field_name(), None);
cursor.goto_next_sibling();
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(cursor.field_name(), Some("name"));
cursor.goto_next_sibling();
assert_eq!(cursor.node().kind(), "comment");
assert_eq!(cursor.field_name(), None);
cursor.goto_next_sibling();
assert_eq!(cursor.node().kind(), "formal_parameters");
assert_eq!(cursor.field_name(), Some("parameters"));
}
#[test]
fn test_tree_cursor_child_for_point() {
let mut parser = Parser::new();
parser.set_language(&get_language("javascript")).unwrap();
let source = &"
[
one,
{
two: tree
},
four, five, six
];"[1..];
let tree = parser.parse(source, None).unwrap();
let mut c = tree.walk();
assert_eq!(c.node().kind(), "program");
assert_eq!(c.goto_first_child_for_point(Point::new(7, 0)), None);
assert_eq!(c.goto_first_child_for_point(Point::new(6, 7)), None);
assert_eq!(c.node().kind(), "program");
// descend to expression statement
assert_eq!(c.goto_first_child_for_point(Point::new(6, 5)), Some(0));
assert_eq!(c.node().kind(), "expression_statement");
// step into ';' and back up
assert_eq!(c.goto_first_child_for_point(Point::new(7, 0)), None);
assert_eq!(c.goto_first_child_for_point(Point::new(6, 6)), None);
assert_eq!(c.goto_first_child_for_point(Point::new(6, 5)), Some(1));
assert_eq!(
(c.node().kind(), c.node().start_position()),
(";", Point::new(6, 5))
);
assert!(c.goto_parent());
// descend into array
assert_eq!(c.goto_first_child_for_point(Point::new(6, 4)), Some(0));
assert_eq!(
(c.node().kind(), c.node().start_position()),
("array", Point::new(0, 4))
);
// step into '[' and back up
assert_eq!(c.goto_first_child_for_point(Point::new(0, 4)), Some(0));
assert_eq!(
(c.node().kind(), c.node().start_position()),
("[", Point::new(0, 4))
);
assert!(c.goto_parent());
// step into identifier 'one' and back up
assert_eq!(c.goto_first_child_for_point(Point::new(1, 0)), Some(1));
assert_eq!(
(c.node().kind(), c.node().start_position()),
("identifier", Point::new(1, 8))
);
assert!(c.goto_parent());
assert_eq!(c.goto_first_child_for_point(Point::new(1, 10)), Some(1));
assert_eq!(
(c.node().kind(), c.node().start_position()),
("identifier", Point::new(1, 8))
);
assert!(c.goto_parent());
// step into first ',' and back up
assert_eq!(c.goto_first_child_for_point(Point::new(1, 11)), Some(2));
assert_eq!(
(c.node().kind(), c.node().start_position()),
(",", Point::new(1, 11))
);
assert!(c.goto_parent());
// step into identifier 'four' and back up
assert_eq!(c.goto_first_child_for_point(Point::new(5, 0)), Some(5));
assert_eq!(
(c.node().kind(), c.node().start_position()),
("identifier", Point::new(5, 8))
);
assert!(c.goto_parent());
assert_eq!(c.goto_first_child_for_point(Point::new(5, 0)), Some(5));
assert_eq!(
(c.node().kind(), c.node().start_position()),
("identifier", Point::new(5, 8))
);
assert!(c.goto_parent());
// step into ']' and back up
assert_eq!(c.goto_first_child_for_point(Point::new(6, 0)), Some(10));
assert_eq!(
(c.node().kind(), c.node().start_position()),
("]", Point::new(6, 4))
);
assert!(c.goto_parent());
assert_eq!(c.goto_first_child_for_point(Point::new(6, 0)), Some(10));
assert_eq!(
(c.node().kind(), c.node().start_position()),
("]", Point::new(6, 4))
);
assert!(c.goto_parent());
// descend into object
assert_eq!(c.goto_first_child_for_point(Point::new(2, 0)), Some(3));
assert_eq!(
(c.node().kind(), c.node().start_position()),
("object", Point::new(2, 8))
);
}
#[test]
fn test_tree_node_equality() {
let mut parser = Parser::new();
parser.set_language(&get_language("rust")).unwrap();
let tree = parser.parse("struct A {}", None).unwrap();
let node1 = tree.root_node();
let node2 = tree.root_node();
assert_eq!(node1, node2);
assert_eq!(node1.child(0).unwrap(), node2.child(0).unwrap());
assert_ne!(node1.child(0).unwrap(), node2);
}
#[test]
fn test_get_changed_ranges() {
let source_code = b"{a: null};\n".to_vec();
let mut parser = Parser::new();
parser.set_language(&get_language("javascript")).unwrap();
let tree = parser.parse(&source_code, None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (expression_statement (object (pair key: (property_identifier) value: (null)))))"
);
// Updating one token
{
let mut tree = tree.clone();
let mut source_code = source_code.clone();
// Replace `null` with `nothing` - that token has changed syntax
let edit = Edit {
position: index_of(&source_code, "ull"),
deleted_length: 3,
inserted_text: b"othing".to_vec(),
};
let inverse_edit = invert_edit(&source_code, &edit);
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit);
assert_eq!(ranges, vec![range_of(&source_code, "nothing")]);
// Replace `nothing` with `null` - that token has changed syntax
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit);
assert_eq!(ranges, vec![range_of(&source_code, "null")]);
}
// Changing only leading whitespace
{
let mut tree = tree.clone();
let mut source_code = source_code.clone();
// Insert leading newline - no changed ranges
let edit = Edit {
position: 0,
deleted_length: 0,
inserted_text: b"\n".to_vec(),
};
let inverse_edit = invert_edit(&source_code, &edit);
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit);
assert_eq!(ranges, vec![]);
// Remove leading newline - no changed ranges
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit);
assert_eq!(ranges, vec![]);
}
// Inserting elements
{
let mut tree = tree.clone();
let mut source_code = source_code.clone();
// Insert a key-value pair before the `}` - those tokens are changed
let edit1 = Edit {
position: index_of(&source_code, "}"),
deleted_length: 0,
inserted_text: b", b: false".to_vec(),
};
let inverse_edit1 = invert_edit(&source_code, &edit1);
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit1);
assert_eq!(ranges, vec![range_of(&source_code, ", b: false")]);
let edit2 = Edit {
position: index_of(&source_code, ", b"),
deleted_length: 0,
inserted_text: b", c: 1".to_vec(),
};
let inverse_edit2 = invert_edit(&source_code, &edit2);
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit2);
assert_eq!(ranges, vec![range_of(&source_code, ", c: 1")]);
// Remove the middle pair
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit2);
assert_eq!(ranges, vec![]);
// Remove the second pair
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit1);
assert_eq!(ranges, vec![]);
}
// Wrapping elements in larger expressions
{
let mut tree = tree;
let mut source_code = source_code.clone();
// Replace `null` with the binary expression `b === null`
let edit1 = Edit {
position: index_of(&source_code, "null"),
deleted_length: 0,
inserted_text: b"b === ".to_vec(),
};
let inverse_edit1 = invert_edit(&source_code, &edit1);
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit1);
assert_eq!(ranges, vec![range_of(&source_code, "b === null")]);
// Undo
let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit1);
assert_eq!(ranges, vec![range_of(&source_code, "null")]);
}
}
#[test]
fn test_consistency_with_mid_codepoint_edit() {
let mut parser = Parser::new();
parser.set_language(&get_language("php/php")).unwrap();
let mut source_code =
b"\n<?php\n\n<<<'\xE5\xAD\x97\xE6\xBC\xA2'\n T\n\xE5\xAD\x97\xE6\xBC\xA2;".to_vec();
let mut tree = parser.parse(&source_code, None).unwrap();
let edit = Edit {
position: 17,
deleted_length: 0,
inserted_text: vec![46],
};
perform_edit(&mut tree, &mut source_code, &edit).unwrap();
let mut tree2 = parser.parse(&source_code, Some(&tree)).unwrap();
let inverted = invert_edit(&source_code, &edit);
perform_edit(&mut tree2, &mut source_code, &inverted).unwrap();
let tree3 = parser.parse(&source_code, Some(&tree2)).unwrap();
assert_eq!(tree3.root_node().to_sexp(), tree.root_node().to_sexp());
}
#[test]
fn test_tree_cursor_on_aliased_root_with_extra_child() {
let source = r"
fn main() {
C/* hi */::<D>::E;
}
";
let mut parser = Parser::new();
parser.set_language(&get_language("rust")).unwrap();
let tree = parser.parse(source, None).unwrap();
let function = tree.root_node().child(0).unwrap();
let block = function.child(3).unwrap();
let expression_statement = block.child(1).unwrap();
let scoped_identifier = expression_statement.child(0).unwrap();
let generic_type = scoped_identifier.child(0).unwrap();
assert_eq!(generic_type.kind(), "generic_type");
let mut cursor = generic_type.walk();
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "type_identifier");
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "block_comment");
}
fn index_of(text: &[u8], substring: &str) -> usize {
str::from_utf8(text).unwrap().find(substring).unwrap()
}
fn range_of(text: &[u8], substring: &str) -> Range {
let start_byte = index_of(text, substring);
let end_byte = start_byte + substring.len();
Range {
start_byte,
end_byte,
start_point: Point::new(0, start_byte),
end_point: Point::new(0, end_byte),
}
}
fn get_changed_ranges(
parser: &mut Parser,
tree: &mut Tree,
source_code: &mut Vec<u8>,
edit: &Edit,
) -> Vec<Range> {
perform_edit(tree, source_code, edit).unwrap();
let new_tree = parser.parse(source_code, Some(tree)).unwrap();
let result = tree.changed_ranges(&new_tree).collect();
*tree = new_tree;
result
}

View file

@ -0,0 +1,273 @@
use std::{fs, sync::LazyLock};
use streaming_iterator::StreamingIterator;
use tree_sitter::{
wasmtime::Engine, Parser, Query, QueryCursor, WasmError, WasmErrorKind, WasmStore,
};
use crate::tests::helpers::{allocations, fixtures::WASM_DIR};
static ENGINE: LazyLock<Engine> = LazyLock::new(Engine::default);
#[test]
fn test_wasm_stdlib_symbols() {
let symbols = tree_sitter::wasm_stdlib_symbols().collect::<Vec<_>>();
assert_eq!(
symbols,
{
let mut symbols = symbols.clone();
symbols.sort_unstable();
symbols
},
"symbols aren't sorted"
);
assert!(symbols.contains(&"malloc"));
assert!(symbols.contains(&"free"));
assert!(symbols.contains(&"memset"));
assert!(symbols.contains(&"memcpy"));
}
#[test]
fn test_load_wasm_ruby_language() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-ruby.wasm")).unwrap();
let language = store.load_language("ruby", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("class A; end", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (class name: (constant)))"
);
});
}
#[test]
fn test_load_wasm_html_language() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap();
let language = store.load_language("html", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser
.parse("<div><span></span><p></p></div>", None)
.unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(document (element (start_tag (tag_name)) (element (start_tag (tag_name)) (end_tag (tag_name))) (element (start_tag (tag_name)) (end_tag (tag_name))) (end_tag (tag_name))))"
);
});
}
#[test]
fn test_load_wasm_rust_language() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
let language = store.load_language("rust", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("fn main() {}", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
});
}
#[test]
fn test_load_wasm_javascript_language() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-javascript.wasm")).unwrap();
let language = store.load_language("javascript", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("const a = b\nconst c = d", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(program (lexical_declaration (variable_declarator name: (identifier) value: (identifier))) (lexical_declaration (variable_declarator name: (identifier) value: (identifier))))");
});
}
#[test]
fn test_load_multiple_wasm_languages() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let wasm_cpp = fs::read(WASM_DIR.join("tree-sitter-cpp.wasm")).unwrap();
let wasm_rs = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
let wasm_rb = fs::read(WASM_DIR.join("tree-sitter-ruby.wasm")).unwrap();
let wasm_typescript = fs::read(WASM_DIR.join("tree-sitter-typescript.wasm")).unwrap();
let language_rust = store.load_language("rust", &wasm_rs).unwrap();
let language_cpp = store.load_language("cpp", &wasm_cpp).unwrap();
let language_ruby = store.load_language("ruby", &wasm_rb).unwrap();
let language_typescript = store.load_language("typescript", &wasm_typescript).unwrap();
parser.set_wasm_store(store).unwrap();
let mut parser2 = Parser::new();
parser2
.set_wasm_store(WasmStore::new(&ENGINE).unwrap())
.unwrap();
let mut query_cursor = QueryCursor::new();
// First, parse with the store that originally loaded the languages.
// Then parse with a new parser and wasm store, so that the languages
// are added one-by-one, in between parses.
for mut parser in [parser, parser2] {
for _ in 0..2 {
let query_rust = Query::new(&language_rust, "(const_item) @foo").unwrap();
let query_typescript =
Query::new(&language_typescript, "(class_declaration) @foo").unwrap();
parser.set_language(&language_cpp).unwrap();
let tree = parser.parse("A<B> c = d();", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(translation_unit (declaration type: (template_type name: (type_identifier) arguments: (template_argument_list (type_descriptor type: (type_identifier)))) declarator: (init_declarator declarator: (identifier) value: (call_expression function: (identifier) arguments: (argument_list)))))"
);
parser.set_language(&language_rust).unwrap();
let source = "const A: B = c();";
let tree = parser.parse(source, None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(source_file (const_item name: (identifier) type: (type_identifier) value: (call_expression function: (identifier) arguments: (arguments))))"
);
assert_eq!(
query_cursor
.matches(&query_rust, tree.root_node(), source.as_bytes())
.count(),
1
);
parser.set_language(&language_ruby).unwrap();
let tree = parser.parse("class A; end", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (class name: (constant)))"
);
parser.set_language(&language_typescript).unwrap();
let tree = parser.parse("class A {}", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (class_declaration name: (type_identifier) body: (class_body)))"
);
assert_eq!(
query_cursor
.matches(&query_typescript, tree.root_node(), source.as_bytes())
.count(),
1
);
}
}
});
}
#[test]
fn test_load_and_reload_wasm_language() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let wasm_rust = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
let wasm_typescript = fs::read(WASM_DIR.join("tree-sitter-typescript.wasm")).unwrap();
let language_rust = store.load_language("rust", &wasm_rust).unwrap();
let language_typescript = store.load_language("typescript", &wasm_typescript).unwrap();
assert_eq!(store.language_count(), 2);
// When a language is dropped, stores can release their instances of that language.
drop(language_rust);
assert_eq!(store.language_count(), 1);
let language_rust = store.load_language("rust", &wasm_rust).unwrap();
assert_eq!(store.language_count(), 2);
drop(language_rust);
drop(language_typescript);
assert_eq!(store.language_count(), 0);
});
}
#[test]
fn test_reset_wasm_store() {
allocations::record(|| {
let mut language_store = WasmStore::new(&ENGINE).unwrap();
let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
let language = language_store.load_language("rust", &wasm).unwrap();
let mut parser = Parser::new();
let parser_store = WasmStore::new(&ENGINE).unwrap();
parser.set_wasm_store(parser_store).unwrap();
parser.set_language(&language).unwrap();
let tree = parser.parse("fn main() {}", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
let parser_store = WasmStore::new(&ENGINE).unwrap();
parser.set_wasm_store(parser_store).unwrap();
let tree = parser.parse("fn main() {}", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))");
});
}
#[test]
fn test_load_wasm_errors() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap();
let bad_wasm = &wasm[1..];
assert_eq!(
store.load_language("rust", bad_wasm).unwrap_err(),
WasmError {
kind: WasmErrorKind::Parse,
message: "failed to parse dylink section of wasm module".into(),
}
);
assert_eq!(
store.load_language("not_rust", &wasm).unwrap_err(),
WasmError {
kind: WasmErrorKind::Instantiate,
message: "module did not contain language function: tree_sitter_not_rust".into(),
}
);
let mut bad_wasm = wasm.clone();
bad_wasm[300..500].iter_mut().for_each(|b| *b = 0);
assert_eq!(
store.load_language("rust", &bad_wasm).unwrap_err().kind,
WasmErrorKind::Compile,
);
});
}
#[test]
fn test_wasm_oom() {
allocations::record(|| {
let mut store = WasmStore::new(&ENGINE).unwrap();
let mut parser = Parser::new();
let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap();
let language = store.load_language("html", &wasm).unwrap();
parser.set_wasm_store(store).unwrap();
parser.set_language(&language).unwrap();
let tag_name = "a-b".repeat(2 * 1024 * 1024);
let code = format!("<{tag_name}>hello world</{tag_name}>");
assert!(parser.parse(&code, None).is_none());
let tag_name = "a-b".repeat(20);
let code = format!("<{tag_name}>hello world</{tag_name}>");
parser.set_language(&language).unwrap();
let tree = parser.parse(&code, None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(document (element (start_tag (tag_name)) (text) (end_tag (tag_name))))"
);
});
}

View file

@ -0,0 +1,25 @@
#![doc = include_str!("../README.md")]
pub mod fuzz;
pub mod highlight;
pub mod init;
pub mod input;
pub mod logger;
pub mod parse;
pub mod playground;
pub mod query;
pub mod query_testing;
pub mod tags;
pub mod test;
pub mod test_highlight;
pub mod test_tags;
pub mod util;
pub mod version;
pub mod wasm;
#[cfg(test)]
mod tests;
// To run compile fail tests
#[cfg(doctest)]
mod tests;

130
crates/cli/src/util.rs Normal file
View file

@ -0,0 +1,130 @@
use std::{
path::{Path, PathBuf},
process::{Child, ChildStdin, Command, Stdio},
sync::{
atomic::{AtomicUsize, Ordering},
Arc,
},
};
use anyhow::{anyhow, Context, Result};
use indoc::indoc;
use tree_sitter::{Parser, Tree};
use tree_sitter_config::Config;
use tree_sitter_loader::Config as LoaderConfig;
const HTML_HEADER: &[u8] = b"
<!DOCTYPE html>
<style>
svg { width: 100%; }
</style>
";
#[must_use]
pub fn lang_not_found_for_path(path: &Path, loader_config: &LoaderConfig) -> String {
let path = path.display();
format!(
indoc! {"
No language found for path `{}`
If a language should be associated with this file extension, please ensure the path to `{}` is inside one of the following directories as specified by your 'config.json':\n\n{}\n
If the directory that contains the relevant grammar for `{}` is not listed above, please add the directory to the list of directories in your config file, {}
"},
path,
path,
loader_config
.parser_directories
.iter()
.enumerate()
.map(|(i, d)| format!(" {}. {}", i + 1, d.display()))
.collect::<Vec<_>>()
.join(" \n"),
path,
if let Ok(Some(config_path)) = Config::find_config_file() {
format!("located at {}", config_path.display())
} else {
String::from("which you need to create by running `tree-sitter init-config`")
}
)
}
#[must_use]
pub fn cancel_on_signal() -> Arc<AtomicUsize> {
let result = Arc::new(AtomicUsize::new(0));
ctrlc::set_handler({
let flag = result.clone();
move || {
flag.store(1, Ordering::Relaxed);
}
})
.expect("Error setting Ctrl-C handler");
result
}
pub struct LogSession {
path: PathBuf,
dot_process: Option<Child>,
dot_process_stdin: Option<ChildStdin>,
open_log: bool,
}
pub fn print_tree_graph(tree: &Tree, path: &str, quiet: bool) -> Result<()> {
let session = LogSession::new(path, quiet)?;
tree.print_dot_graph(session.dot_process_stdin.as_ref().unwrap());
Ok(())
}
pub fn log_graphs(parser: &mut Parser, path: &str, open_log: bool) -> Result<LogSession> {
let session = LogSession::new(path, open_log)?;
parser.print_dot_graphs(session.dot_process_stdin.as_ref().unwrap());
Ok(session)
}
impl LogSession {
fn new(path: &str, open_log: bool) -> Result<Self> {
use std::io::Write;
let mut dot_file = std::fs::File::create(path)?;
dot_file.write_all(HTML_HEADER)?;
let mut dot_process = Command::new("dot")
.arg("-Tsvg")
.stdin(Stdio::piped())
.stdout(dot_file)
.spawn()
.with_context(|| {
"Failed to run the `dot` command. Check that graphviz is installed."
})?;
let dot_stdin = dot_process
.stdin
.take()
.ok_or_else(|| anyhow!("Failed to open stdin for `dot` process."))?;
Ok(Self {
path: PathBuf::from(path),
dot_process: Some(dot_process),
dot_process_stdin: Some(dot_stdin),
open_log,
})
}
}
impl Drop for LogSession {
fn drop(&mut self) {
use std::fs;
drop(self.dot_process_stdin.take().unwrap());
let output = self.dot_process.take().unwrap().wait_with_output().unwrap();
if output.status.success() {
if self.open_log && fs::metadata(&self.path).unwrap().len() > HTML_HEADER.len() as u64 {
webbrowser::open(&self.path.to_string_lossy()).unwrap();
}
} else {
eprintln!(
"Dot failed: {} {}",
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
);
}
}
}

264
crates/cli/src/version.rs Normal file
View file

@ -0,0 +1,264 @@
use std::{fs, path::PathBuf, process::Command};
use anyhow::{anyhow, Context, Result};
use regex::Regex;
use tree_sitter_loader::TreeSitterJSON;
pub struct Version {
pub version: String,
pub current_dir: PathBuf,
}
impl Version {
#[must_use]
pub const fn new(version: String, current_dir: PathBuf) -> Self {
Self {
version,
current_dir,
}
}
pub fn run(self) -> Result<()> {
let tree_sitter_json = self.current_dir.join("tree-sitter.json");
let tree_sitter_json =
serde_json::from_str::<TreeSitterJSON>(&fs::read_to_string(tree_sitter_json)?)?;
let is_multigrammar = tree_sitter_json.grammars.len() > 1;
self.update_treesitter_json().with_context(|| {
format!(
"Failed to update tree-sitter.json at {}",
self.current_dir.display()
)
})?;
self.update_cargo_toml().with_context(|| {
format!(
"Failed to update Cargo.toml at {}",
self.current_dir.display()
)
})?;
self.update_package_json().with_context(|| {
format!(
"Failed to update package.json at {}",
self.current_dir.display()
)
})?;
self.update_makefile(is_multigrammar).with_context(|| {
format!(
"Failed to update Makefile at {}",
self.current_dir.display()
)
})?;
self.update_cmakelists_txt().with_context(|| {
format!(
"Failed to update CMakeLists.txt at {}",
self.current_dir.display()
)
})?;
self.update_pyproject_toml().with_context(|| {
format!(
"Failed to update pyproject.toml at {}",
self.current_dir.display()
)
})?;
Ok(())
}
fn update_treesitter_json(&self) -> Result<()> {
let tree_sitter_json = &fs::read_to_string(self.current_dir.join("tree-sitter.json"))?;
let tree_sitter_json = tree_sitter_json
.lines()
.map(|line| {
if line.contains("\"version\":") {
let prefix_index = line.find("\"version\":").unwrap() + "\"version\":".len();
let start_quote = line[prefix_index..].find('"').unwrap() + prefix_index + 1;
let end_quote = line[start_quote + 1..].find('"').unwrap() + start_quote + 1;
format!(
"{}{}{}",
&line[..start_quote],
self.version,
&line[end_quote..]
)
} else {
line.to_string()
}
})
.collect::<Vec<_>>()
.join("\n")
+ "\n";
fs::write(self.current_dir.join("tree-sitter.json"), tree_sitter_json)?;
Ok(())
}
fn update_cargo_toml(&self) -> Result<()> {
if !self.current_dir.join("Cargo.toml").exists() {
return Ok(());
}
let cargo_toml = fs::read_to_string(self.current_dir.join("Cargo.toml"))?;
let cargo_toml = cargo_toml
.lines()
.map(|line| {
if line.starts_with("version =") {
format!("version = \"{}\"", self.version)
} else {
line.to_string()
}
})
.collect::<Vec<_>>()
.join("\n")
+ "\n";
fs::write(self.current_dir.join("Cargo.toml"), cargo_toml)?;
if self.current_dir.join("Cargo.lock").exists() {
let Ok(cmd) = Command::new("cargo")
.arg("generate-lockfile")
.arg("--offline")
.current_dir(&self.current_dir)
.output()
else {
return Ok(()); // cargo is not `executable`, ignore
};
if !cmd.status.success() {
let stderr = String::from_utf8_lossy(&cmd.stderr);
return Err(anyhow!(
"Failed to run `cargo generate-lockfile`:\n{stderr}"
));
}
}
Ok(())
}
fn update_package_json(&self) -> Result<()> {
if !self.current_dir.join("package.json").exists() {
return Ok(());
}
let package_json = &fs::read_to_string(self.current_dir.join("package.json"))?;
let package_json = package_json
.lines()
.map(|line| {
if line.contains("\"version\":") {
let prefix_index = line.find("\"version\":").unwrap() + "\"version\":".len();
let start_quote = line[prefix_index..].find('"').unwrap() + prefix_index + 1;
let end_quote = line[start_quote + 1..].find('"').unwrap() + start_quote + 1;
format!(
"{}{}{}",
&line[..start_quote],
self.version,
&line[end_quote..]
)
} else {
line.to_string()
}
})
.collect::<Vec<_>>()
.join("\n")
+ "\n";
fs::write(self.current_dir.join("package.json"), package_json)?;
if self.current_dir.join("package-lock.json").exists() {
let Ok(cmd) = Command::new("npm")
.arg("install")
.arg("--package-lock-only")
.current_dir(&self.current_dir)
.output()
else {
return Ok(()); // npm is not `executable`, ignore
};
if !cmd.status.success() {
let stderr = String::from_utf8_lossy(&cmd.stderr);
return Err(anyhow!("Failed to run `npm install`:\n{stderr}"));
}
}
Ok(())
}
fn update_makefile(&self, is_multigrammar: bool) -> Result<()> {
let makefile = if is_multigrammar {
if !self.current_dir.join("common").join("common.mak").exists() {
return Ok(());
}
fs::read_to_string(self.current_dir.join("Makefile"))?
} else {
if !self.current_dir.join("Makefile").exists() {
return Ok(());
}
fs::read_to_string(self.current_dir.join("Makefile"))?
};
let makefile = makefile
.lines()
.map(|line| {
if line.starts_with("VERSION") {
format!("VERSION := {}", self.version)
} else {
line.to_string()
}
})
.collect::<Vec<_>>()
.join("\n")
+ "\n";
fs::write(self.current_dir.join("Makefile"), makefile)?;
Ok(())
}
fn update_cmakelists_txt(&self) -> Result<()> {
if !self.current_dir.join("CMakeLists.txt").exists() {
return Ok(());
}
let cmake = fs::read_to_string(self.current_dir.join("CMakeLists.txt"))?;
let re = Regex::new(r#"(\s*VERSION\s+)"[0-9]+\.[0-9]+\.[0-9]+""#)?;
let cmake = re.replace(&cmake, format!(r#"$1"{}""#, self.version));
fs::write(self.current_dir.join("CMakeLists.txt"), cmake.as_bytes())?;
Ok(())
}
fn update_pyproject_toml(&self) -> Result<()> {
if !self.current_dir.join("pyproject.toml").exists() {
return Ok(());
}
let pyproject_toml = fs::read_to_string(self.current_dir.join("pyproject.toml"))?;
let pyproject_toml = pyproject_toml
.lines()
.map(|line| {
if line.starts_with("version =") {
format!("version = \"{}\"", self.version)
} else {
line.to_string()
}
})
.collect::<Vec<_>>()
.join("\n")
+ "\n";
fs::write(self.current_dir.join("pyproject.toml"), pyproject_toml)?;
Ok(())
}
}

117
crates/cli/src/wasm.rs Normal file
View file

@ -0,0 +1,117 @@
use std::{
fs,
path::{Path, PathBuf},
};
use anyhow::{anyhow, Context, Result};
use tree_sitter::wasm_stdlib_symbols;
use tree_sitter_generate::parse_grammar::GrammarJSON;
use tree_sitter_loader::Loader;
use wasmparser::Parser;
pub fn load_language_wasm_file(language_dir: &Path) -> Result<(String, Vec<u8>)> {
let grammar_name = get_grammar_name(language_dir)
.with_context(|| "Failed to get wasm filename")
.unwrap();
let wasm_filename = format!("tree-sitter-{grammar_name}.wasm");
let contents = fs::read(language_dir.join(&wasm_filename)).with_context(|| {
format!("Failed to read {wasm_filename}. Run `tree-sitter build --wasm` first.",)
})?;
Ok((grammar_name, contents))
}
pub fn get_grammar_name(language_dir: &Path) -> Result<String> {
let src_dir = language_dir.join("src");
let grammar_json_path = src_dir.join("grammar.json");
let grammar_json = fs::read_to_string(&grammar_json_path).with_context(|| {
format!(
"Failed to read grammar file {}",
grammar_json_path.display()
)
})?;
let grammar: GrammarJSON = serde_json::from_str(&grammar_json).with_context(|| {
format!(
"Failed to parse grammar file {}",
grammar_json_path.display()
)
})?;
Ok(grammar.name)
}
pub fn compile_language_to_wasm(
loader: &Loader,
root_dir: Option<&Path>,
language_dir: &Path,
output_dir: &Path,
output_file: Option<PathBuf>,
) -> Result<()> {
let grammar_name = get_grammar_name(language_dir)?;
let output_filename =
output_file.unwrap_or_else(|| output_dir.join(format!("tree-sitter-{grammar_name}.wasm")));
let src_path = language_dir.join("src");
let scanner_path = loader.get_scanner_path(&src_path);
loader.compile_parser_to_wasm(
&grammar_name,
root_dir,
&src_path,
scanner_path
.as_ref()
.and_then(|p| Some(Path::new(p.file_name()?))),
&output_filename,
)?;
// Exit with an error if the external scanner uses symbols from the
// C or C++ standard libraries that aren't available to wasm parsers.
let stdlib_symbols = wasm_stdlib_symbols().collect::<Vec<_>>();
let dylink_symbols = [
"__indirect_function_table",
"__memory_base",
"__stack_pointer",
"__table_base",
"__table_base",
"memory",
];
let builtin_symbols = [
"__assert_fail",
"__cxa_atexit",
"abort",
"emscripten_notify_memory_growth",
"tree_sitter_debug_message",
"proc_exit",
];
let mut missing_symbols = Vec::new();
let wasm_bytes = fs::read(&output_filename)?;
let parser = Parser::new(0);
for payload in parser.parse_all(&wasm_bytes) {
if let wasmparser::Payload::ImportSection(imports) = payload? {
for import in imports {
let import = import?.name;
if !builtin_symbols.contains(&import)
&& !stdlib_symbols.contains(&import)
&& !dylink_symbols.contains(&import)
{
missing_symbols.push(import);
}
}
}
}
if !missing_symbols.is_empty() {
Err(anyhow!(
concat!(
"This external scanner uses a symbol that isn't available to wasm parsers.\n",
"\n",
"Missing symbols:\n",
" {}\n",
"\n",
"Available symbols:\n",
" {}",
),
missing_symbols.join("\n "),
stdlib_symbols.join("\n ")
))?;
}
Ok(())
}