Merge branch 'master' into wasm-language

This commit is contained in:
Max Brunsfeld 2023-10-27 11:57:04 +01:00
commit f4e2f68f14
161 changed files with 10293 additions and 4253 deletions

View file

@ -1,47 +1,53 @@
[package]
name = "tree-sitter-cli"
description = "CLI tool for developing, testing, and using Tree-sitter parsers"
version = "0.20.7"
version = "0.20.8"
authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
edition = "2018"
edition = "2021"
license = "MIT"
readme = "README.md"
keywords = ["incremental", "parsing"]
categories = ["command-line-utilities", "parsing"]
repository = "https://github.com/tree-sitter/tree-sitter"
rust-version.workspace = true
[[bin]]
name = "tree-sitter"
path = "src/main.rs"
doc = false
[[bench]]
name = "benchmark"
harness = false
[dependencies]
ansi_term = "0.12"
anyhow = "1.0"
atty = "0.2"
ansi_term = "0.12.1"
anyhow = "1.0.72"
atty = "0.2.14"
clap = "2.32"
difference = "2.0"
dirs = "3.0"
glob = "0.3.0"
html-escape = "0.2.6"
indexmap = "1"
lazy_static = "1.2.0"
regex = "1"
regex-syntax = "0.6.4"
rustc-hash = "1"
semver = "1.0"
serde = { version = "1.0.130", features = ["derive"] }
ctrlc = { version = "3.4.0", features = ["termination"] }
difference = "2.0.0"
dirs = "5.0.1"
glob = "0.3.1"
html-escape = "0.2.13"
indexmap = "2.0.0"
lazy_static = "1.4.0"
memchr = "2.6.3"
path-slash = "0.2.1"
regex = "1.9.1"
regex-syntax = "0.7.4"
rustc-hash = "1.1.0"
semver = "1.0.18"
# Due to https://github.com/serde-rs/serde/issues/2538
serde = { version = "1.0, < 1.0.172", features = ["derive"] }
smallbitvec = "2.5.1"
tiny_http = "0.8"
walkdir = "2.3"
webbrowser = "0.5.1"
which = "4.1.0"
tiny_http = "0.12.0"
walkdir = "2.3.3"
webbrowser = "0.8.10"
which = "4.4.0"
[dependencies.tree-sitter]
version = "0.20.3"
version = "0.20.10"
path = "../lib"
features = ["wasm"]
@ -67,14 +73,18 @@ version = "1.0"
features = ["preserve_order"]
[dependencies.log]
version = "0.4.6"
version = "0.4.19"
features = ["std"]
[dev-dependencies]
rand = "0.8"
tempfile = "3"
pretty_assertions = "0.7.2"
ctor = "0.1"
tree_sitter_proc_macro = { path = "src/tests/proc_macro", package = "tree-sitter-tests-proc-macro" }
rand = "0.8.5"
tempfile = "3.6.0"
pretty_assertions = "1.4.0"
ctor = "0.2.4"
unindent = "0.2.2"
indoc = "2.0.3"
[build-dependencies]
toml = "0.5"
toml = "0.7.6"

View file

@ -1,9 +1,11 @@
Tree-sitter CLI
===============
# Tree-sitter CLI
[![Build Status](https://travis-ci.org/tree-sitter/tree-sitter.svg?branch=master)](https://travis-ci.org/tree-sitter/tree-sitter)
[![Build status](https://ci.appveyor.com/api/projects/status/vtmbd6i92e97l55w/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/tree-sitter/branch/master)
[![Crates.io](https://img.shields.io/crates/v/tree-sitter-cli.svg)](https://crates.io/crates/tree-sitter-cli)
[![crates.io badge]][crates.io] [![npmjs.com badge]][npmjs.com]
[crates.io]: https://crates.io/crates/tree-sitter-cli
[crates.io badge]: https://img.shields.io/crates/v/tree-sitter-cli.svg?color=%23B48723
[npmjs.com]: https://www.npmjs.org/package/tree-sitter-cli
[npmjs.com badge]: https://img.shields.io/npm/v/tree-sitter-cli.svg?color=%23BF4A4A
The Tree-sitter CLI allows you to develop, test, and use Tree-sitter grammars from the command line. It works on MacOS, Linux, and Windows.
@ -21,7 +23,7 @@ or with `npm`:
npm install tree-sitter-cli
```
You can also download a pre-built binary for your platform from [the releases page](https://github.com/tree-sitter/tree-sitter/releases/latest).
You can also download a pre-built binary for your platform from [the releases page].
### Dependencies
@ -32,8 +34,11 @@ The `tree-sitter` binary itself has no dependencies, but specific commands have
### Commands
* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current working directory. See [the documentation](http://tree-sitter.github.io/tree-sitter/creating-parsers) for more information.
* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current working directory. See [the documentation] for more information.
* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation](http://tree-sitter.github.io/tree-sitter/creating-parsers) for more information.
* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation] for more information.
* `parse` - The `tree-sitter parse` command will parse a file (or list of files) using Tree-sitter parsers.
[the documentation]: https://tree-sitter.github.io/tree-sitter/creating-parsers
[the releases page]: https://github.com/tree-sitter/tree-sitter/releases/latest

View file

@ -104,6 +104,7 @@ fn main() {
parse(&path, max_path_length, |source| {
Query::new(language, str::from_utf8(source).unwrap())
.with_context(|| format!("Query file path: {path:?}"))
.expect("Failed to parse query");
});
}

View file

@ -1,3 +1,4 @@
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
use std::{env, fs};
@ -60,7 +61,39 @@ fn read_git_sha() -> Option<String> {
// If we're on a branch, read the SHA from the ref file.
if head_content.starts_with("ref: ") {
head_content.replace_range(0.."ref: ".len(), "");
let ref_filename = git_dir_path.join(&head_content);
let ref_filename = {
// Go to real non-worktree gitdir
let git_dir_path = git_dir_path
.parent()
.map(|p| {
p.file_name()
.map(|n| n == OsStr::new("worktrees"))
.and_then(|x| x.then(|| p.parent()))
})
.flatten()
.flatten()
.unwrap_or(&git_dir_path);
let file = git_dir_path.join(&head_content);
if file.is_file() {
file
} else {
let packed_refs = git_dir_path.join("packed-refs");
if let Ok(packed_refs_content) = fs::read_to_string(&packed_refs) {
for line in packed_refs_content.lines() {
if let Some((hash, r#ref)) = line.split_once(' ') {
if r#ref == head_content {
if let Some(path) = packed_refs.to_str() {
println!("cargo:rerun-if-changed={}", path);
}
return Some(hash.to_string());
}
}
}
}
return None;
}
};
if let Some(path) = ref_filename.to_str() {
println!("cargo:rerun-if-changed={}", path);
}

View file

@ -9,12 +9,14 @@ readme = "README.md"
keywords = ["incremental", "parsing"]
categories = ["command-line-utilities", "parsing"]
repository = "https://github.com/tree-sitter/tree-sitter"
rust-version.workspace = true
[dependencies]
anyhow = "1.0"
dirs = "3.0"
serde = { version = "1.0.130", features = ["derive"] }
# Due to https://github.com/serde-rs/serde/issues/2538
serde = { version = "1.0, < 1.0.172", features = ["derive"] }
[dependencies.serde_json]
version = "1.0.45"
version = "1.0"
features = ["preserve_order"]

View file

@ -1,5 +1,7 @@
# `tree-sitter-config`
# Tree-sitter Config
Manages Tree-sitter's configuration file.
You can use a configuration file to control the behavior of the `tree-sitter`
command-line program. This crate implements the logic for finding and the
command-line program. This crate implements the logic for finding and the
parsing the contents of the configuration file.

View file

@ -1,4 +1,4 @@
//! Manages tree-sitter's configuration file.
#![doc = include_str!("../README.md")]
use anyhow::{anyhow, Context, Result};
use serde::{Deserialize, Serialize};
@ -25,6 +25,9 @@ impl Config {
if let Ok(path) = env::var("TREE_SITTER_DIR") {
let mut path = PathBuf::from(path);
path.push("config.json");
if !path.exists() {
return Ok(None);
}
if path.is_file() {
return Ok(Some(path));
}
@ -37,7 +40,8 @@ impl Config {
let legacy_path = dirs::home_dir()
.ok_or(anyhow!("Cannot determine home directory"))?
.join(".tree-sitter/config.json");
.join(".tree-sitter")
.join("config.json");
if legacy_path.is_file() {
return Ok(Some(legacy_path));
}
@ -48,7 +52,8 @@ impl Config {
fn xdg_config_file() -> Result<PathBuf> {
let xdg_path = dirs::config_dir()
.ok_or(anyhow!("Cannot determine config directory"))?
.join("tree-sitter/config.json");
.join("tree-sitter")
.join("config.json");
Ok(xdg_path)
}
@ -79,7 +84,13 @@ impl Config {
///
/// (Note that this is typically only done by the `tree-sitter init-config` command.)
pub fn initial() -> Result<Config> {
let location = Self::xdg_config_file()?;
let location = if let Ok(path) = env::var("TREE_SITTER_DIR") {
let mut path = PathBuf::from(path);
path.push("config.json");
path
} else {
Self::xdg_config_file()?
};
let config = serde_json::json!({});
Ok(Config { location, config })
}

View file

@ -9,6 +9,7 @@ readme = "README.md"
keywords = ["incremental", "parsing"]
categories = ["command-line-utilities", "parsing"]
repository = "https://github.com/tree-sitter/tree-sitter"
rust-version.workspace = true
[features]
wasm = ["tree-sitter/wasm"]
@ -20,7 +21,8 @@ dirs = "3.0"
libloading = "0.7"
once_cell = "1.7"
regex = "1"
serde = { version = "1.0.130", features = ["derive"] }
# Due to https://github.com/serde-rs/serde/issues/2538
serde = { version = "1.0, < 1.0.172", features = ["derive"] }
which = "4.1.0"
[dependencies.serde_json]

View file

@ -1,6 +1,6 @@
# `tree-sitter-loader`
# Tree-sitter Loader
The `tree-sitter` command-line program will dynamically find and build grammars
at runtime, if you have cloned the grammars' repositories to your local
filesystem. This helper crate implements that logic, so that you can use it in
filesystem. This helper crate implements that logic, so that you can use it in
your own program analysis tools, as well.

View file

@ -1 +1 @@
3.1.25
3.1.37

View file

@ -1,3 +1,5 @@
#![doc = include_str!("../README.md")]
use anyhow::{anyhow, Context, Error, Result};
use libloading::{Library, Symbol};
use once_cell::unsync::OnceCell;
@ -11,7 +13,7 @@ use std::path::{Path, PathBuf};
use std::process::Command;
use std::sync::Mutex;
use std::time::SystemTime;
use std::{fs, mem};
use std::{env, fs, mem};
use tree_sitter::{Language, QueryError, QueryErrorKind};
use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
@ -72,12 +74,12 @@ impl Config {
}
#[cfg(unix)]
const DYLIB_EXTENSION: &'static str = "so";
const DYLIB_EXTENSION: &str = "so";
#[cfg(windows)]
const DYLIB_EXTENSION: &'static str = "dll";
const BUILD_TARGET: &'static str = env!("BUILD_TARGET");
const BUILD_TARGET: &str = env!("BUILD_TARGET");
pub struct LanguageConfiguration<'a> {
pub scope: Option<String>,
@ -90,6 +92,7 @@ pub struct LanguageConfiguration<'a> {
pub injections_filenames: Option<Vec<String>>,
pub locals_filenames: Option<Vec<String>>,
pub tags_filenames: Option<Vec<String>>,
pub language_name: String,
language_id: usize,
highlight_config: OnceCell<Option<HighlightConfiguration>>,
tags_config: OnceCell<Option<TagsConfiguration>>,
@ -102,6 +105,7 @@ pub struct Loader {
languages_by_id: Vec<(PathBuf, OnceCell<Language>)>,
language_configurations: Vec<LanguageConfiguration<'static>>,
language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
language_configuration_in_current_path: Option<usize>,
highlight_names: Box<Mutex<Vec<String>>>,
use_all_highlight_names: bool,
debug_build: bool,
@ -115,9 +119,13 @@ unsafe impl Sync for Loader {}
impl Loader {
pub fn new() -> Result<Self> {
let parser_lib_path = dirs::cache_dir()
.ok_or(anyhow!("Cannot determine cache directory"))?
.join("tree-sitter/lib");
let parser_lib_path = match env::var("TREE_SITTER_LIBDIR") {
Ok(path) => PathBuf::from(path),
_ => dirs::cache_dir()
.ok_or(anyhow!("Cannot determine cache directory"))?
.join("tree-sitter")
.join("lib"),
};
Ok(Self::with_parser_lib_path(parser_lib_path))
}
@ -127,6 +135,7 @@ impl Loader {
languages_by_id: Vec::new(),
language_configurations: Vec::new(),
language_configuration_ids_by_file_type: HashMap::new(),
language_configuration_in_current_path: None,
highlight_names: Box::new(Mutex::new(Vec::new())),
use_all_highlight_names: true,
debug_build: false,
@ -136,7 +145,7 @@ impl Loader {
}
}
pub fn configure_highlights(&mut self, names: &Vec<String>) {
pub fn configure_highlights(&mut self, names: &[String]) {
self.use_all_highlight_names = false;
let mut highlights = self.highlight_names.lock().unwrap();
highlights.clear();
@ -152,8 +161,7 @@ impl Loader {
eprintln!("Warning: You have not configured any parser directories!");
eprintln!("Please run `tree-sitter init-config` and edit the resulting");
eprintln!("configuration file to indicate where we should look for");
eprintln!("language grammars.");
eprintln!("");
eprintln!("language grammars.\n");
}
for parser_container_dir in &config.parser_directories {
if let Ok(entries) = fs::read_dir(parser_container_dir) {
@ -163,6 +171,7 @@ impl Loader {
if parser_dir_name.starts_with("tree-sitter-") {
self.find_language_configurations_at_path(
&parser_container_dir.join(parser_dir_name),
false,
)
.ok();
}
@ -174,7 +183,7 @@ impl Loader {
}
pub fn languages_at_path(&mut self, path: &Path) -> Result<Vec<Language>> {
if let Ok(configurations) = self.find_language_configurations_at_path(path) {
if let Ok(configurations) = self.find_language_configurations_at_path(path, true) {
let mut language_ids = configurations
.iter()
.map(|c| c.language_id)
@ -365,7 +374,7 @@ impl Loader {
library_path.set_extension(DYLIB_EXTENSION);
}
let recompile = needs_recompile(&library_path, &parser_path, &scanner_path)
let recompile = needs_recompile(&library_path, &parser_path, scanner_path.as_deref())
.with_context(|| "Failed to compare source and binary timestamps")?;
if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() {
@ -411,7 +420,7 @@ impl Loader {
header_path: &Path,
parser_path: &Path,
scanner_path: &Option<PathBuf>,
output_path: &PathBuf,
library_path: &PathBuf,
) -> Result<(), Error> {
let mut config = cc::Build::new();
config
@ -419,13 +428,15 @@ impl Loader {
.opt_level(2)
.cargo_metadata(false)
.target(BUILD_TARGET)
.host(BUILD_TARGET);
.host(BUILD_TARGET)
.flag_if_supported("-Werror=implicit-function-declaration");
let compiler = config.get_compiler();
let mut command = Command::new(compiler.path());
for (key, value) in compiler.env() {
command.env(key, value);
}
if cfg!(windows) {
if compiler.is_like_msvc() {
command.args(&["/nologo", "/LD", "/I"]).arg(header_path);
if self.debug_build {
command.arg("/Od");
@ -438,17 +449,20 @@ impl Loader {
}
command
.arg("/link")
.arg(format!("/out:{}", output_path.to_str().unwrap()));
.arg(format!("/out:{}", library_path.to_str().unwrap()));
} else {
command
.arg("-shared")
.arg("-fPIC")
.arg("-fno-exceptions")
.arg("-g")
.arg("-I")
.arg(header_path)
.arg("-o")
.arg(output_path);
.arg(&library_path);
if !cfg!(windows) {
command.arg("-fPIC");
}
if self.debug_build {
command.arg("-O0");
@ -469,6 +483,7 @@ impl Loader {
}
command.arg("-xc").arg(parser_path);
}
let output = command
.output()
.with_context(|| "Failed to execute C compiler")?;
@ -479,6 +494,37 @@ impl Loader {
String::from_utf8_lossy(&output.stderr)
));
}
#[cfg(any(target_os = "macos", target_os = "linux"))]
if scanner_path.is_some() {
let command = Command::new("nm")
.arg("-W")
.arg("-U")
.arg(&library_path)
.output();
if let Ok(output) = command {
if output.status.success() {
let mut found_non_static = false;
for line in String::from_utf8_lossy(&output.stdout).lines() {
if line.contains(" T ") && !line.contains("tree_sitter_") {
if let Some(function_name) =
line.split_whitespace().collect::<Vec<_>>().get(2)
{
if !found_non_static {
found_non_static = true;
eprintln!("Warning: Found non-static non-tree-sitter functions in external scannner");
}
eprintln!(" `{function_name}`");
}
}
}
if found_non_static {
eprintln!("Consider making these functions static, they can cause conflicts when another tree-sitter project uses the same function name");
}
}
}
}
Ok(())
}
@ -581,6 +627,7 @@ impl Loader {
pub fn highlight_config_for_injection_string<'a>(
&'a self,
string: &str,
apply_all_captures: bool,
) -> Option<&'a HighlightConfiguration> {
match self.language_configuration_for_injection_string(string) {
Err(e) => {
@ -591,38 +638,36 @@ impl Loader {
None
}
Ok(None) => None,
Ok(Some((language, configuration))) => match configuration.highlight_config(language) {
Err(e) => {
eprintln!(
"Failed to load property sheet for injection string '{}': {}",
string, e
);
None
Ok(Some((language, configuration))) => {
match configuration.highlight_config(language, apply_all_captures, None) {
Err(e) => {
eprintln!(
"Failed to load property sheet for injection string '{}': {}",
string, e
);
None
}
Ok(None) => None,
Ok(Some(config)) => Some(config),
}
Ok(None) => None,
Ok(Some(config)) => Some(config),
},
}
}
}
pub fn find_language_configurations_at_path<'a>(
&'a mut self,
pub fn find_language_configurations_at_path(
&mut self,
parser_path: &Path,
set_current_path_config: bool,
) -> Result<&[LanguageConfiguration]> {
#[derive(Deserialize)]
#[derive(Default, Deserialize)]
#[serde(untagged)]
enum PathsJSON {
#[default]
Empty,
Single(String),
Multiple(Vec<String>),
}
impl Default for PathsJSON {
fn default() -> Self {
PathsJSON::Empty
}
}
impl PathsJSON {
fn into_vec(self) -> Option<Vec<String>> {
match self {
@ -663,6 +708,11 @@ impl Loader {
tree_sitter: Vec<LanguageConfigurationJSON>,
}
#[derive(Deserialize)]
struct GrammarJSON {
name: String,
}
let initial_language_configuration_count = self.language_configurations.len();
if let Ok(package_json_contents) = fs::read_to_string(&parser_path.join("package.json")) {
@ -674,6 +724,13 @@ impl Loader {
// the package.json, but defaults to the directory containing the package.json.
let language_path = parser_path.join(config_json.path);
let grammar_path = language_path.join("src").join("grammar.json");
let mut grammar_file = fs::File::open(grammar_path)
.with_context(|| "Failed to read grammar.json")?;
let grammar_json: GrammarJSON =
serde_json::from_reader(BufReader::new(&mut grammar_file))
.with_context(|| "Failed to parse grammar.json")?;
// Determine if a previous language configuration in this package.json file
// already uses the same language.
let mut language_id = None;
@ -693,6 +750,7 @@ impl Loader {
let configuration = LanguageConfiguration {
root_path: parser_path.to_path_buf(),
language_name: grammar_json.name.clone(),
scope: config_json.scope,
language_id,
file_types: config_json.file_types.unwrap_or(Vec::new()),
@ -705,19 +763,26 @@ impl Loader {
highlights_filenames: config_json.highlights.into_vec(),
highlight_config: OnceCell::new(),
tags_config: OnceCell::new(),
highlight_names: &*self.highlight_names,
highlight_names: &self.highlight_names,
use_all_highlight_names: self.use_all_highlight_names,
};
for file_type in &configuration.file_types {
self.language_configuration_ids_by_file_type
.entry(file_type.to_string())
.or_insert(Vec::new())
.or_default()
.push(self.language_configurations.len());
}
self.language_configurations
.push(unsafe { mem::transmute(configuration) });
if set_current_path_config
&& self.language_configuration_in_current_path.is_none()
{
self.language_configuration_in_current_path =
Some(self.language_configurations.len() - 1);
}
}
}
}
@ -725,8 +790,15 @@ impl Loader {
if self.language_configurations.len() == initial_language_configuration_count
&& parser_path.join("src").join("grammar.json").exists()
{
let grammar_path = parser_path.join("src").join("grammar.json");
let mut grammar_file =
fs::File::open(grammar_path).with_context(|| "Failed to read grammar.json")?;
let grammar_json: GrammarJSON =
serde_json::from_reader(BufReader::new(&mut grammar_file))
.with_context(|| "Failed to parse grammar.json")?;
let configuration = LanguageConfiguration {
root_path: parser_path.to_owned(),
language_name: grammar_json.name,
language_id: self.languages_by_id.len(),
file_types: Vec::new(),
scope: None,
@ -739,7 +811,7 @@ impl Loader {
tags_filenames: None,
highlight_config: OnceCell::new(),
tags_config: OnceCell::new(),
highlight_names: &*self.highlight_names,
highlight_names: &self.highlight_names,
use_all_highlight_names: self.use_all_highlight_names,
};
self.language_configurations
@ -764,11 +836,11 @@ impl Loader {
if let Some(scope) = scope {
if let Some(config) = self
.language_configuration_for_scope(scope)
.with_context(|| format!("Failed to load language for scope '{}'", scope))?
.with_context(|| format!("Failed to load language for scope '{scope}'"))?
{
Ok(config.0)
} else {
return Err(anyhow!("Unknown scope '{}'", scope));
Err(anyhow!("Unknown scope '{scope}'"))
}
} else if let Some((lang, _)) = self
.language_configuration_for_file_name(path)
@ -780,8 +852,10 @@ impl Loader {
})?
{
Ok(lang)
} else if let Some(id) = self.language_configuration_in_current_path {
Ok(self.language_for_id(self.language_configurations[id].language_id)?)
} else if let Some(lang) = self
.languages_at_path(&current_dir)
.languages_at_path(current_dir)
.with_context(|| "Failed to load language in current directory")?
.first()
.cloned()
@ -803,25 +877,76 @@ impl Loader {
}
impl<'a> LanguageConfiguration<'a> {
pub fn highlight_config(&self, language: Language) -> Result<Option<&HighlightConfiguration>> {
pub fn highlight_config(
&self,
language: Language,
apply_all_captures: bool,
paths: Option<&[String]>,
) -> Result<Option<&HighlightConfiguration>> {
let (highlights_filenames, injections_filenames, locals_filenames) = match paths {
Some(paths) => (
Some(
paths
.iter()
.filter(|p| p.ends_with("highlights.scm"))
.cloned()
.collect::<Vec<_>>(),
),
Some(
paths
.iter()
.filter(|p| p.ends_with("tags.scm"))
.cloned()
.collect::<Vec<_>>(),
),
Some(
paths
.iter()
.filter(|p| p.ends_with("locals.scm"))
.cloned()
.collect::<Vec<_>>(),
),
),
None => (None, None, None),
};
return self
.highlight_config
.get_or_try_init(|| {
let (highlights_query, highlight_ranges) =
self.read_queries(&self.highlights_filenames, "highlights.scm")?;
let (injections_query, injection_ranges) =
self.read_queries(&self.injections_filenames, "injections.scm")?;
let (locals_query, locals_ranges) =
self.read_queries(&self.locals_filenames, "locals.scm")?;
let (highlights_query, highlight_ranges) = self.read_queries(
if highlights_filenames.is_some() {
highlights_filenames.as_deref()
} else {
self.highlights_filenames.as_deref()
},
"highlights.scm",
)?;
let (injections_query, injection_ranges) = self.read_queries(
if injections_filenames.is_some() {
injections_filenames.as_deref()
} else {
self.injections_filenames.as_deref()
},
"injections.scm",
)?;
let (locals_query, locals_ranges) = self.read_queries(
if locals_filenames.is_some() {
locals_filenames.as_deref()
} else {
self.locals_filenames.as_deref()
},
"locals.scm",
)?;
if highlights_query.is_empty() {
Ok(None)
} else {
let mut result = HighlightConfiguration::new(
language,
&self.language_name,
&highlights_query,
&injections_query,
&locals_query,
apply_all_captures,
)
.map_err(|error| match error.kind {
QueryErrorKind::Language => Error::from(error),
@ -853,12 +978,12 @@ impl<'a> LanguageConfiguration<'a> {
let mut all_highlight_names = self.highlight_names.lock().unwrap();
if self.use_all_highlight_names {
for capture_name in result.query.capture_names() {
if !all_highlight_names.contains(capture_name) {
all_highlight_names.push(capture_name.clone());
if !all_highlight_names.iter().any(|x| x == capture_name) {
all_highlight_names.push(capture_name.to_string());
}
}
}
result.configure(&all_highlight_names.as_slice());
result.configure(all_highlight_names.as_slice());
Ok(Some(result))
}
})
@ -869,9 +994,9 @@ impl<'a> LanguageConfiguration<'a> {
self.tags_config
.get_or_try_init(|| {
let (tags_query, tags_ranges) =
self.read_queries(&self.tags_filenames, "tags.scm")?;
self.read_queries(self.tags_filenames.as_deref(), "tags.scm")?;
let (locals_query, locals_ranges) =
self.read_queries(&self.locals_filenames, "locals.scm")?;
self.read_queries(self.locals_filenames.as_deref(), "locals.scm")?;
if tags_query.is_empty() {
Ok(None)
} else {
@ -894,7 +1019,6 @@ impl<'a> LanguageConfiguration<'a> {
locals_query.len(),
)
}
.into()
} else {
error.into()
}
@ -904,9 +1028,9 @@ impl<'a> LanguageConfiguration<'a> {
.map(Option::as_ref)
}
fn include_path_in_query_error<'b>(
fn include_path_in_query_error(
mut error: QueryError,
ranges: &'b Vec<(String, Range<usize>)>,
ranges: &[(String, Range<usize>)],
source: &str,
start_offset: usize,
) -> Error {
@ -914,7 +1038,7 @@ impl<'a> LanguageConfiguration<'a> {
let (path, range) = ranges
.iter()
.find(|(_, range)| range.contains(&offset_within_section))
.unwrap();
.unwrap_or(ranges.last().unwrap());
error.offset = offset_within_section - range.start;
error.row = source[range.start..offset_within_section]
.chars()
@ -925,12 +1049,12 @@ impl<'a> LanguageConfiguration<'a> {
fn read_queries(
&self,
paths: &Option<Vec<String>>,
paths: Option<&[String]>,
default_path: &str,
) -> Result<(String, Vec<(String, Range<usize>)>)> {
let mut query = String::new();
let mut path_ranges = Vec::new();
if let Some(paths) = paths.as_ref() {
if let Some(paths) = paths {
for path in paths {
let abs_path = self.root_path.join(path);
let prev_query_len = query.len();
@ -955,7 +1079,7 @@ impl<'a> LanguageConfiguration<'a> {
fn needs_recompile(
lib_path: &Path,
parser_c_path: &Path,
scanner_path: &Option<PathBuf>,
scanner_path: Option<&Path>,
) -> Result<bool> {
if !lib_path.exists() {
return Ok(true);

1
cli/npm/.gitignore vendored
View file

@ -3,3 +3,4 @@ tree-sitter.exe
*.gz
*.tgz
LICENSE
README.md

62
cli/npm/dsl.d.ts vendored
View file

@ -1,19 +1,19 @@
type AliasRule = {type: 'ALIAS'; named: boolean; content: Rule; value: string};
type BlankRule = {type: 'BLANK'};
type ChoiceRule = {type: 'CHOICE'; members: Rule[]};
type FieldRule = {type: 'FIELD'; name: string; content: Rule};
type ImmediateTokenRule = {type: 'IMMEDIATE_TOKEN'; content: Rule};
type PatternRule = {type: 'PATTERN'; value: string};
type PrecDynamicRule = {type: 'PREC_DYNAMIC'; content: Rule; value: number};
type PrecLeftRule = {type: 'PREC_LEFT'; content: Rule; value: number};
type PrecRightRule = {type: 'PREC_RIGHT'; content: Rule; value: number};
type PrecRule = {type: 'PREC'; content: Rule; value: number};
type Repeat1Rule = {type: 'REPEAT1'; content: Rule};
type RepeatRule = {type: 'REPEAT'; content: Rule};
type SeqRule = {type: 'SEQ'; members: Rule[]};
type StringRule = {type: 'STRING'; value: string};
type SymbolRule<Name extends string> = {type: 'SYMBOL'; name: Name};
type TokenRule = {type: 'TOKEN'; content: Rule};
type AliasRule = { type: 'ALIAS'; named: boolean; content: Rule; value: string };
type BlankRule = { type: 'BLANK' };
type ChoiceRule = { type: 'CHOICE'; members: Rule[] };
type FieldRule = { type: 'FIELD'; name: string; content: Rule };
type ImmediateTokenRule = { type: 'IMMEDIATE_TOKEN'; content: Rule };
type PatternRule = { type: 'PATTERN'; value: string };
type PrecDynamicRule = { type: 'PREC_DYNAMIC'; content: Rule; value: number };
type PrecLeftRule = { type: 'PREC_LEFT'; content: Rule; value: number };
type PrecRightRule = { type: 'PREC_RIGHT'; content: Rule; value: number };
type PrecRule = { type: 'PREC'; content: Rule; value: number };
type Repeat1Rule = { type: 'REPEAT1'; content: Rule };
type RepeatRule = { type: 'REPEAT'; content: Rule };
type SeqRule = { type: 'SEQ'; members: Rule[] };
type StringRule = { type: 'STRING'; value: string };
type SymbolRule<Name extends string> = { type: 'SYMBOL'; name: Name };
type TokenRule = { type: 'TOKEN'; content: Rule };
type Rule =
| AliasRule
@ -42,14 +42,15 @@ type GrammarSymbols<RuleName extends string> = {
type RuleBuilder<RuleName extends string> = (
$: GrammarSymbols<RuleName>,
previous: Rule,
) => RuleOrLiteral;
type RuleBuilders<
RuleName extends string,
BaseGrammarRuleName extends string
> = {
[name in RuleName]: RuleBuilder<RuleName | BaseGrammarRuleName>;
};
[name in RuleName]: RuleBuilder<RuleName | BaseGrammarRuleName>;
};
interface Grammar<
RuleName extends string,
@ -68,11 +69,17 @@ interface Grammar<
rules: Rules;
/**
* An array of arrays of precedence names. Each inner array represents
* a *descending* ordering. Names listed earlier in one of these arrays
* have higher precedence than any names listed later in the same array.
* An array of arrays of precedence names or rules. Each inner array represents
* a *descending* ordering. Names/rules listed earlier in one of these arrays
* have higher precedence than any names/rules listed later in the same array.
*
* Using rules is just a shorthand way for using a name then calling prec()
* with that name. It is just a convenience.
*/
precedences?: () => String[][],
precedences?: (
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
previous: Rule[][],
) => RuleOrLiteral[][],
/**
* An array of arrays of rule names. Each inner array represents a set of
@ -86,6 +93,7 @@ interface Grammar<
*/
conflicts?: (
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
previous: Rule[][],
) => RuleOrLiteral[][];
/**
@ -102,7 +110,7 @@ interface Grammar<
externals?: (
$: Record<string, SymbolRule<string>>,
previous: Rule[],
) => SymbolRule<string>[];
) => RuleOrLiteral[];
/**
* An array of tokens that may appear anywhere in the language. This
@ -126,6 +134,7 @@ interface Grammar<
*/
inline?: (
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
previous: Rule[],
) => RuleOrLiteral[];
/**
@ -134,10 +143,11 @@ interface Grammar<
*
* @param $ grammar rules
*
* @see http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
* @see https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
*/
supertypes?: (
$: GrammarSymbols<RuleName | BaseGrammarRuleName>,
previous: Rule[],
) => RuleOrLiteral[];
/**
@ -153,8 +163,8 @@ interface Grammar<
type GrammarSchema<RuleName extends string> = {
[K in keyof Grammar<RuleName>]: K extends 'rules'
? Record<RuleName, Rule>
: Grammar<RuleName>[K];
? Record<RuleName, Rule>
: Grammar<RuleName>[K];
};
/**

View file

@ -6,25 +6,54 @@ const http = require('http');
const https = require('https');
const packageJSON = require('./package.json');
// Determine the URL of the file.
const platformName = {
'darwin': 'macos',
'linux': 'linux',
'win32': 'windows'
}[process.platform];
let archName = {
'x64': 'x64',
'x86': 'x86',
'ia32': 'x86'
}[process.arch];
// ARM macs can run x64 binaries via Rosetta. Rely on that for now.
if (platformName === 'macos' && process.arch === 'arm64') {
archName = 'x64';
// Look to a results table in https://github.com/tree-sitter/tree-sitter/issues/2196
const matrix = {
platform: {
'darwin': {
name: 'macos',
arch: {
'arm64': { name: 'arm64' },
'x64': { name: 'x64' },
}
},
'linux': {
name: 'linux',
arch: {
'arm64': { name: 'arm64' },
'arm': { name: 'arm' },
'armv7l': { name: 'armv7l' },
'x64': { name: 'x64' },
'x86': { name: 'x86' },
'i586': { name: 'i586' },
'mips': { name: 'mips' },
'mips64': { name: 'mips64' },
'mipsel': { name: 'mipsel' },
'mips64el': { name: 'mips64el' },
'ppc': { name: 'powerpc' },
'ppc64': { name: 'powerpc64' },
'ppc64el': { name: 'powerpc64el' },
'riscv64gc': { name: 'riscv64gc' },
's390x': { name: 's390x' },
'sparc64': { name: 'sparc64' },
}
},
'win32': {
name: 'windows',
arch: {
'arm64': { name: 'arm64' },
'x64': { name: 'x64' },
'x86': { name: 'x86' },
'ia32': { name: 'x86' },
}
},
},
}
if (!platformName || !archName) {
// Determine the URL of the file.
const platform = matrix.platform[process.platform];
const arch = platform && platform.arch[process.arch];
if (!platform || !platform.name || !arch || !arch.name) {
console.error(
`Cannot install tree-sitter-cli for platform ${process.platform}, architecture ${process.arch}`
);
@ -32,7 +61,7 @@ if (!platformName || !archName) {
}
const releaseURL = `https://github.com/tree-sitter/tree-sitter/releases/download/v${packageJSON.version}`;
const assetName = `tree-sitter-${platformName}-${archName}.gz`;
const assetName = `tree-sitter-${platform.name}-${arch.name}.gz`;
const assetURL = `${releaseURL}/${assetName}`;
// Remove previously-downloaded files.

View file

@ -1,11 +1,11 @@
{
"name": "tree-sitter-cli",
"version": "0.20.7",
"version": "0.20.8",
"author": "Max Brunsfeld",
"license": "MIT",
"repository": {
"type": "git",
"url": "http://github.com/tree-sitter/tree-sitter.git"
"url": "https://github.com/tree-sitter/tree-sitter.git"
},
"description": "CLI for generating fast incremental parsers",
"keywords": [
@ -15,7 +15,8 @@
"main": "lib/api/index.js",
"scripts": {
"install": "node install.js",
"prepack": "cp ../../LICENSE ."
"prepack": "cp ../../LICENSE ../README.md .",
"postpack": "rm LICENSE README.md"
},
"bin": {
"tree-sitter": "cli.js"

View file

@ -57,6 +57,7 @@ struct ParseTableBuilder<'a> {
parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
parse_state_queue: VecDeque<ParseStateQueueEntry>,
non_terminal_extra_states: Vec<(Symbol, usize)>,
actual_conflicts: HashSet<Vec<Symbol>>,
parse_table: ParseTable,
}
@ -132,6 +133,20 @@ impl<'a> ParseTableBuilder<'a> {
)?;
}
if !self.actual_conflicts.is_empty() {
println!("Warning: unnecessary conflicts");
for conflict in &self.actual_conflicts {
println!(
" {}",
conflict
.iter()
.map(|symbol| format!("`{}`", self.symbol_name(symbol)))
.collect::<Vec<_>>()
.join(", ")
);
}
}
Ok((self.parse_table, self.parse_state_info_by_id))
}
@ -582,6 +597,7 @@ impl<'a> ParseTableBuilder<'a> {
.expected_conflicts
.contains(&actual_conflict)
{
self.actual_conflicts.remove(&actual_conflict);
return Ok(());
}
@ -964,6 +980,7 @@ pub(crate) fn build_parse_table<'a>(
inlines: &'a InlinedProductionMap,
variable_info: &'a Vec<VariableInfo>,
) -> Result<(ParseTable, Vec<TokenSet>, Vec<ParseStateInfo<'a>>)> {
let actual_conflicts = syntax_grammar.expected_conflicts.iter().cloned().collect();
let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
let mut following_tokens = vec![TokenSet::new(); lexical_grammar.variables.len()];
populate_following_tokens(
@ -979,6 +996,7 @@ pub(crate) fn build_parse_table<'a>(
item_set_builder,
variable_info,
non_terminal_extra_states: Vec::new(),
actual_conflicts,
state_ids_by_item_set: IndexMap::default(),
core_ids_by_core: HashMap::new(),
parse_state_info_by_id: Vec::new(),

View file

@ -390,12 +390,12 @@ mod tests {
Variable {
name: "token_0".to_string(),
kind: VariableType::Named,
rule: Rule::pattern("[a-f]1|0x\\d"),
rule: Rule::pattern("[a-f]1|0x\\d", ""),
},
Variable {
name: "token_1".to_string(),
kind: VariableType::Named,
rule: Rule::pattern("d*ef"),
rule: Rule::pattern("d*ef", ""),
},
],
})
@ -426,7 +426,7 @@ mod tests {
Variable {
name: "identifier".to_string(),
kind: VariableType::Named,
rule: Rule::pattern("\\w+"),
rule: Rule::pattern("\\w+", ""),
},
Variable {
name: "instanceof".to_string(),
@ -471,7 +471,7 @@ mod tests {
#[test]
fn test_token_conflicts_with_separators() {
let grammar = expand_tokens(ExtractedLexicalGrammar {
separators: vec![Rule::pattern("\\s")],
separators: vec![Rule::pattern("\\s", "")],
variables: vec![
Variable {
name: "x".to_string(),
@ -498,7 +498,7 @@ mod tests {
#[test]
fn test_token_conflicts_with_open_ended_tokens() {
let grammar = expand_tokens(ExtractedLexicalGrammar {
separators: vec![Rule::pattern("\\s")],
separators: vec![Rule::pattern("\\s", "")],
variables: vec![
Variable {
name: "x".to_string(),
@ -508,7 +508,7 @@ mod tests {
Variable {
name: "anything".to_string(),
kind: VariableType::Named,
rule: Rule::prec(Precedence::Integer(-1), Rule::pattern(".*")),
rule: Rule::prec(Precedence::Integer(-1), Rule::pattern(".*", "")),
},
],
})

View file

@ -181,7 +181,11 @@ function normalize(value) {
value
};
case RegExp:
return {
return value.flags ? {
type: 'PATTERN',
value: value.source,
flags: value.flags
} : {
type: 'PATTERN',
value: value.source
};

View file

@ -63,7 +63,7 @@
},
"supertypes": {
"description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.",
"description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.",
"type": "array",
"items": {
"description": "the name of a rule in `rules` or `extras`",

View file

@ -21,10 +21,10 @@ use anyhow::{anyhow, Context, Result};
use lazy_static::lazy_static;
use regex::{Regex, RegexBuilder};
use semver::Version;
use std::fs;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::{env, fs};
lazy_static! {
static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*")
@ -44,25 +44,27 @@ pub fn generate_parser_in_directory(
abi_version: usize,
generate_bindings: bool,
report_symbol_name: Option<&str>,
js_runtime: Option<&str>,
) -> Result<()> {
let src_path = repo_path.join("src");
let header_path = src_path.join("tree_sitter");
// Read the grammar.json.
let grammar_json = match grammar_path {
Some(path) => load_grammar_file(path.as_ref(), js_runtime)?,
None => {
let grammar_js_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into());
load_grammar_file(&grammar_js_path, js_runtime)?
}
};
// Ensure that the output directories exist.
fs::create_dir_all(&src_path)?;
fs::create_dir_all(&header_path)?;
// Read the grammar.json.
let grammar_json;
match grammar_path {
Some(path) => {
grammar_json = load_grammar_file(path.as_ref())?;
}
None => {
let grammar_js_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into());
grammar_json = load_grammar_file(&grammar_js_path)?;
fs::write(&src_path.join("grammar.json"), &grammar_json)?;
}
if grammar_path.is_none() {
fs::write(&src_path.join("grammar.json"), &grammar_json)
.with_context(|| format!("Failed to write grammar.json to {:?}", src_path))?;
}
// Parse and preprocess the grammar.
@ -155,10 +157,18 @@ fn generate_parser_for_grammar_with_opts(
})
}
pub fn load_grammar_file(grammar_path: &Path) -> Result<String> {
pub fn load_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result<String> {
if grammar_path.is_dir() {
return Err(anyhow!(
"Path to a grammar file with `.js` or `.json` extension is required"
));
}
match grammar_path.extension().and_then(|e| e.to_str()) {
Some("js") => Ok(load_js_grammar_file(grammar_path)?),
Some("json") => Ok(fs::read_to_string(grammar_path)?),
Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime)
.with_context(|| "Failed to load grammar.js")?),
Some("json") => {
Ok(fs::read_to_string(grammar_path).with_context(|| "Failed to load grammar.json")?)
}
_ => Err(anyhow!(
"Unknown grammar file extension: {:?}",
grammar_path
@ -166,21 +176,24 @@ pub fn load_grammar_file(grammar_path: &Path) -> Result<String> {
}
}
fn load_js_grammar_file(grammar_path: &Path) -> Result<String> {
fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result<String> {
let grammar_path = fs::canonicalize(grammar_path)?;
let mut node_process = Command::new("node")
let js_runtime = js_runtime.unwrap_or("node");
let mut node_process = Command::new(js_runtime)
.env("TREE_SITTER_GRAMMAR_PATH", grammar_path)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.expect("Failed to run `node`");
.with_context(|| format!("Failed to run `{js_runtime}`"))?;
let mut node_stdin = node_process
.stdin
.take()
.expect("Failed to open stdin for node");
.with_context(|| "Failed to open stdin for node")?;
let cli_version = Version::parse(env!("CARGO_PKG_VERSION"))
.expect("Could not parse this package's version as semver.");
.with_context(|| "Could not parse this package's version as semver.")?;
write!(
node_stdin,
"global.TREE_SITTER_CLI_VERSION_MAJOR = {};
@ -188,22 +201,22 @@ fn load_js_grammar_file(grammar_path: &Path) -> Result<String> {
global.TREE_SITTER_CLI_VERSION_PATCH = {};",
cli_version.major, cli_version.minor, cli_version.patch,
)
.expect("Failed to write tree-sitter version to node's stdin");
.with_context(|| "Failed to write tree-sitter version to node's stdin")?;
let javascript_code = include_bytes!("./dsl.js");
node_stdin
.write(javascript_code)
.expect("Failed to write grammar dsl to node's stdin");
.with_context(|| "Failed to write grammar dsl to node's stdin")?;
drop(node_stdin);
let output = node_process
.wait_with_output()
.expect("Failed to read output from node");
.with_context(|| "Failed to read output from node")?;
match output.status.code() {
None => panic!("Node process was killed"),
Some(0) => {}
Some(code) => return Err(anyhow!("Node process exited with status {}", code)),
}
let mut result = String::from_utf8(output.stdout).expect("Got invalid UTF8 from node");
let mut result =
String::from_utf8(output.stdout).with_context(|| "Got invalid UTF8 from node")?;
result.push('\n');
Ok(result)
}

View file

@ -1172,12 +1172,12 @@ mod tests {
Variable {
name: "identifier".to_string(),
kind: VariableType::Named,
rule: Rule::pattern("\\w+"),
rule: Rule::pattern("\\w+", ""),
},
Variable {
name: "foo_identifier".to_string(),
kind: VariableType::Named,
rule: Rule::pattern("[\\w-]+"),
rule: Rule::pattern("[\\w-]+", ""),
},
],
..Default::default()
@ -1275,8 +1275,8 @@ mod tests {
name: "script".to_string(),
kind: VariableType::Named,
rule: Rule::seq(vec![
Rule::field("a".to_string(), Rule::pattern("hi")),
Rule::field("b".to_string(), Rule::pattern("bye")),
Rule::field("a".to_string(), Rule::pattern("hi", "")),
Rule::field("b".to_string(), Rule::pattern("bye", "")),
]),
}],
..Default::default()

View file

@ -19,6 +19,7 @@ enum RuleJSON {
},
PATTERN {
value: String,
flags: Option<String>,
},
SYMBOL {
name: String,
@ -143,7 +144,21 @@ fn parse_rule(json: RuleJSON) -> Rule {
} => Rule::alias(parse_rule(*content), value, named),
RuleJSON::BLANK => Rule::Blank,
RuleJSON::STRING { value } => Rule::String(value),
RuleJSON::PATTERN { value } => Rule::Pattern(value),
RuleJSON::PATTERN { value, flags } => Rule::Pattern(
value,
flags.map_or(String::new(), |f| {
f.chars()
.filter(|c| {
if *c != 'i' {
eprintln!("Warning: unsupported flag {c}");
false
} else {
*c != 'u' // silently ignore unicode flag
}
})
.collect()
}),
),
RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name),
RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
RuleJSON::FIELD { content, name } => Rule::field(name, parse_rule(*content)),

View file

@ -139,10 +139,10 @@ pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<Lexi
impl NfaBuilder {
fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> Result<bool> {
match rule {
Rule::Pattern(s) => {
Rule::Pattern(s, f) => {
let s = preprocess_regex(s);
let ast = parse::Parser::new().parse(&s)?;
self.expand_regex(&ast, next_state_id)
self.expand_regex(&ast, next_state_id, f.contains('i'))
}
Rule::String(s) => {
for c in s.chars().rev() {
@ -210,12 +210,42 @@ impl NfaBuilder {
}
}
fn expand_regex(&mut self, ast: &Ast, mut next_state_id: u32) -> Result<bool> {
fn expand_regex(
&mut self,
ast: &Ast,
mut next_state_id: u32,
case_insensitive: bool,
) -> Result<bool> {
fn inverse_char(c: char) -> char {
match c {
'a'..='z' => (c as u8 - b'a' + b'A') as char,
'A'..='Z' => (c as u8 - b'A' + b'a') as char,
c => c,
}
}
fn with_inverse_char(mut chars: CharacterSet) -> CharacterSet {
for char in chars.clone().chars() {
let inverted = inverse_char(char);
if char != inverted {
chars = chars.add_char(inverted);
}
}
chars
}
match ast {
Ast::Empty(_) => Ok(false),
Ast::Flags(_) => Err(anyhow!("Regex error: Flags are not supported")),
Ast::Literal(literal) => {
self.push_advance(CharacterSet::from_char(literal.c), next_state_id);
let mut char_set = CharacterSet::from_char(literal.c);
if case_insensitive {
let inverted = inverse_char(literal.c);
if literal.c != inverted {
char_set = char_set.add_char(inverted);
}
}
self.push_advance(char_set, next_state_id);
Ok(true)
}
Ast::Dot(_) => {
@ -229,6 +259,9 @@ impl NfaBuilder {
if class.negated {
chars = chars.negate();
}
if case_insensitive {
chars = with_inverse_char(chars);
}
self.push_advance(chars, next_state_id);
Ok(true)
}
@ -237,6 +270,9 @@ impl NfaBuilder {
if class.negated {
chars = chars.negate();
}
if case_insensitive {
chars = with_inverse_char(chars);
}
self.push_advance(chars, next_state_id);
Ok(true)
}
@ -245,48 +281,56 @@ impl NfaBuilder {
if class.negated {
chars = chars.negate();
}
if case_insensitive {
chars = with_inverse_char(chars);
}
self.push_advance(chars, next_state_id);
Ok(true)
}
},
Ast::Repetition(repetition) => match repetition.op.kind {
RepetitionKind::ZeroOrOne => {
self.expand_zero_or_one(&repetition.ast, next_state_id)
self.expand_zero_or_one(&repetition.ast, next_state_id, case_insensitive)
}
RepetitionKind::OneOrMore => {
self.expand_one_or_more(&repetition.ast, next_state_id)
self.expand_one_or_more(&repetition.ast, next_state_id, case_insensitive)
}
RepetitionKind::ZeroOrMore => {
self.expand_zero_or_more(&repetition.ast, next_state_id)
self.expand_zero_or_more(&repetition.ast, next_state_id, case_insensitive)
}
RepetitionKind::Range(RepetitionRange::Exactly(count)) => {
self.expand_count(&repetition.ast, count, next_state_id)
self.expand_count(&repetition.ast, count, next_state_id, case_insensitive)
}
RepetitionKind::Range(RepetitionRange::AtLeast(min)) => {
if self.expand_zero_or_more(&repetition.ast, next_state_id)? {
self.expand_count(&repetition.ast, min, next_state_id)
if self.expand_zero_or_more(&repetition.ast, next_state_id, case_insensitive)? {
self.expand_count(&repetition.ast, min, next_state_id, case_insensitive)
} else {
Ok(false)
}
}
RepetitionKind::Range(RepetitionRange::Bounded(min, max)) => {
let mut result = self.expand_count(&repetition.ast, min, next_state_id)?;
let mut result =
self.expand_count(&repetition.ast, min, next_state_id, case_insensitive)?;
for _ in min..max {
if result {
next_state_id = self.nfa.last_state_id();
}
if self.expand_zero_or_one(&repetition.ast, next_state_id)? {
if self.expand_zero_or_one(
&repetition.ast,
next_state_id,
case_insensitive,
)? {
result = true;
}
}
Ok(result)
}
},
Ast::Group(group) => self.expand_regex(&group.ast, next_state_id),
Ast::Group(group) => self.expand_regex(&group.ast, next_state_id, case_insensitive),
Ast::Alternation(alternation) => {
let mut alternative_state_ids = Vec::new();
for ast in alternation.asts.iter() {
if self.expand_regex(&ast, next_state_id)? {
if self.expand_regex(&ast, next_state_id, case_insensitive)? {
alternative_state_ids.push(self.nfa.last_state_id());
} else {
alternative_state_ids.push(next_state_id);
@ -304,7 +348,7 @@ impl NfaBuilder {
Ast::Concat(concat) => {
let mut result = false;
for ast in concat.asts.iter().rev() {
if self.expand_regex(&ast, next_state_id)? {
if self.expand_regex(&ast, next_state_id, case_insensitive)? {
result = true;
next_state_id = self.nfa.last_state_id();
}
@ -335,13 +379,18 @@ impl NfaBuilder {
}
}
fn expand_one_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
fn expand_one_or_more(
&mut self,
ast: &Ast,
next_state_id: u32,
case_insensitive: bool,
) -> Result<bool> {
self.nfa.states.push(NfaState::Accept {
variable_index: 0,
precedence: 0,
}); // Placeholder for split
let split_state_id = self.nfa.last_state_id();
if self.expand_regex(&ast, split_state_id)? {
if self.expand_regex(&ast, split_state_id, case_insensitive)? {
self.nfa.states[split_state_id as usize] =
NfaState::Split(self.nfa.last_state_id(), next_state_id);
Ok(true)
@ -351,8 +400,13 @@ impl NfaBuilder {
}
}
fn expand_zero_or_one(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
if self.expand_regex(ast, next_state_id)? {
fn expand_zero_or_one(
&mut self,
ast: &Ast,
next_state_id: u32,
case_insensitive: bool,
) -> Result<bool> {
if self.expand_regex(ast, next_state_id, case_insensitive)? {
self.push_split(next_state_id);
Ok(true)
} else {
@ -360,8 +414,13 @@ impl NfaBuilder {
}
}
fn expand_zero_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
if self.expand_one_or_more(&ast, next_state_id)? {
fn expand_zero_or_more(
&mut self,
ast: &Ast,
next_state_id: u32,
case_insensitive: bool,
) -> Result<bool> {
if self.expand_one_or_more(&ast, next_state_id, case_insensitive)? {
self.push_split(next_state_id);
Ok(true)
} else {
@ -369,10 +428,16 @@ impl NfaBuilder {
}
}
fn expand_count(&mut self, ast: &Ast, count: u32, mut next_state_id: u32) -> Result<bool> {
fn expand_count(
&mut self,
ast: &Ast,
count: u32,
mut next_state_id: u32,
case_insensitive: bool,
) -> Result<bool> {
let mut result = false;
for _ in 0..count {
if self.expand_regex(ast, next_state_id)? {
if self.expand_regex(ast, next_state_id, case_insensitive)? {
result = true;
next_state_id = self.nfa.last_state_id();
}
@ -475,7 +540,9 @@ impl NfaBuilder {
.add_char(' ')
.add_char('\t')
.add_char('\r')
.add_char('\n'),
.add_char('\n')
.add_char('\x0B')
.add_char('\x0C'),
ClassPerlKind::Word => CharacterSet::empty()
.add_char('_')
.add_range('A', 'Z')
@ -563,7 +630,7 @@ mod tests {
let table = [
// regex with sequences and alternatives
Row {
rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?")],
rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?", "")],
separators: vec![],
examples: vec![
("ade1", Some((0, "ade"))),
@ -574,13 +641,13 @@ mod tests {
},
// regex with repeats
Row {
rules: vec![Rule::pattern("a*")],
rules: vec![Rule::pattern("a*", "")],
separators: vec![],
examples: vec![("aaa1", Some((0, "aaa"))), ("b", Some((0, "")))],
},
// regex with repeats in sequences
Row {
rules: vec![Rule::pattern("a((bc)+|(de)*)f")],
rules: vec![Rule::pattern("a((bc)+|(de)*)f", "")],
separators: vec![],
examples: vec![
("af1", Some((0, "af"))),
@ -591,13 +658,13 @@ mod tests {
},
// regex with character ranges
Row {
rules: vec![Rule::pattern("[a-fA-F0-9]+")],
rules: vec![Rule::pattern("[a-fA-F0-9]+", "")],
separators: vec![],
examples: vec![("A1ff0.", Some((0, "A1ff0")))],
},
// regex with perl character classes
Row {
rules: vec![Rule::pattern("\\w\\d\\s")],
rules: vec![Rule::pattern("\\w\\d\\s", "")],
separators: vec![],
examples: vec![("_0 ", Some((0, "_0 ")))],
},
@ -611,7 +678,7 @@ mod tests {
Row {
rules: vec![Rule::repeat(Rule::seq(vec![
Rule::string("{"),
Rule::pattern("[a-f]+"),
Rule::pattern("[a-f]+", ""),
Rule::string("}"),
]))],
separators: vec![],
@ -624,9 +691,9 @@ mod tests {
// longest match rule
Row {
rules: vec![
Rule::pattern("a|bc"),
Rule::pattern("aa"),
Rule::pattern("bcd"),
Rule::pattern("a|bc", ""),
Rule::pattern("aa", ""),
Rule::pattern("bcd", ""),
],
separators: vec![],
examples: vec![
@ -640,7 +707,7 @@ mod tests {
},
// regex with an alternative including the empty string
Row {
rules: vec![Rule::pattern("a(b|)+c")],
rules: vec![Rule::pattern("a(b|)+c", "")],
separators: vec![],
examples: vec![
("ac.", Some((0, "ac"))),
@ -650,8 +717,8 @@ mod tests {
},
// separators
Row {
rules: vec![Rule::pattern("[a-f]+")],
separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")],
rules: vec![Rule::pattern("[a-f]+", "")],
separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")],
examples: vec![
(" a", Some((0, "a"))),
(" \nb", Some((0, "b"))),
@ -662,11 +729,11 @@ mod tests {
// shorter tokens with higher precedence
Row {
rules: vec![
Rule::prec(Precedence::Integer(2), Rule::pattern("abc")),
Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e")),
Rule::pattern("[a-e]+"),
Rule::prec(Precedence::Integer(2), Rule::pattern("abc", "")),
Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e", "")),
Rule::pattern("[a-e]+", ""),
],
separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")],
separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")],
examples: vec![
("abceef", Some((0, "abc"))),
("abdeef", Some((1, "abde"))),
@ -676,13 +743,13 @@ mod tests {
// immediate tokens with higher precedence
Row {
rules: vec![
Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+")),
Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+", "")),
Rule::immediate_token(Rule::prec(
Precedence::Integer(2),
Rule::pattern("[^ab]+"),
Rule::pattern("[^ab]+", ""),
)),
],
separators: vec![Rule::pattern("\\s")],
separators: vec![Rule::pattern("\\s", "")],
examples: vec![("cccb", Some((1, "ccc")))],
},
Row {
@ -704,7 +771,7 @@ mod tests {
// nested choices within sequences
Row {
rules: vec![Rule::seq(vec![
Rule::pattern("[0-9]+"),
Rule::pattern("[0-9]+", ""),
Rule::choice(vec![
Rule::Blank,
Rule::choice(vec![Rule::seq(vec![
@ -713,7 +780,7 @@ mod tests {
Rule::Blank,
Rule::choice(vec![Rule::string("+"), Rule::string("-")]),
]),
Rule::pattern("[0-9]+"),
Rule::pattern("[0-9]+", ""),
])]),
]),
])],
@ -730,7 +797,7 @@ mod tests {
},
// nested groups
Row {
rules: vec![Rule::seq(vec![Rule::pattern(r#"([^x\\]|\\(.|\n))+"#)])],
rules: vec![Rule::seq(vec![Rule::pattern(r#"([^x\\]|\\(.|\n))+"#, "")])],
separators: vec![],
examples: vec![("abcx", Some((0, "abc"))), ("abc\\0x", Some((0, "abc\\0")))],
},
@ -738,11 +805,11 @@ mod tests {
Row {
rules: vec![
// Escaped forward slash (used in JS because '/' is the regex delimiter)
Rule::pattern(r#"\/"#),
Rule::pattern(r#"\/"#, ""),
// Escaped quotes
Rule::pattern(r#"\"\'"#),
Rule::pattern(r#"\"\'"#, ""),
// Quote preceded by a literal backslash
Rule::pattern(r#"[\\']+"#),
Rule::pattern(r#"[\\']+"#, ""),
],
separators: vec![],
examples: vec![
@ -754,8 +821,8 @@ mod tests {
// unicode property escapes
Row {
rules: vec![
Rule::pattern(r#"\p{L}+\P{L}+"#),
Rule::pattern(r#"\p{White_Space}+\P{White_Space}+[\p{White_Space}]*"#),
Rule::pattern(r#"\p{L}+\P{L}+"#, ""),
Rule::pattern(r#"\p{White_Space}+\P{White_Space}+[\p{White_Space}]*"#, ""),
],
separators: vec![],
examples: vec![
@ -765,17 +832,17 @@ mod tests {
},
// unicode property escapes in bracketed sets
Row {
rules: vec![Rule::pattern(r#"[\p{L}\p{Nd}]+"#)],
rules: vec![Rule::pattern(r#"[\p{L}\p{Nd}]+"#, "")],
separators: vec![],
examples: vec![("abΨ12٣٣, ok", Some((0, "abΨ12٣٣")))],
},
// unicode character escapes
Row {
rules: vec![
Rule::pattern(r#"\u{00dc}"#),
Rule::pattern(r#"\U{000000dd}"#),
Rule::pattern(r#"\u00de"#),
Rule::pattern(r#"\U000000df"#),
Rule::pattern(r#"\u{00dc}"#, ""),
Rule::pattern(r#"\U{000000dd}"#, ""),
Rule::pattern(r#"\u00de"#, ""),
Rule::pattern(r#"\U000000df"#, ""),
],
separators: vec![],
examples: vec![
@ -789,13 +856,13 @@ mod tests {
Row {
rules: vec![
// Un-escaped curly braces
Rule::pattern(r#"u{[0-9a-fA-F]+}"#),
Rule::pattern(r#"u{[0-9a-fA-F]+}"#, ""),
// Already-escaped curly braces
Rule::pattern(r#"\{[ab]{3}\}"#),
Rule::pattern(r#"\{[ab]{3}\}"#, ""),
// Unicode codepoints
Rule::pattern(r#"\u{1000A}"#),
Rule::pattern(r#"\u{1000A}"#, ""),
// Unicode codepoints (lowercase)
Rule::pattern(r#"\u{1000b}"#),
Rule::pattern(r#"\u{1000b}"#, ""),
],
separators: vec![],
examples: vec![
@ -807,7 +874,7 @@ mod tests {
},
// Emojis
Row {
rules: vec![Rule::pattern(r"\p{Emoji}+")],
rules: vec![Rule::pattern(r"\p{Emoji}+", "")],
separators: vec![],
examples: vec![
("🐎", Some((0, "🐎"))),
@ -820,7 +887,7 @@ mod tests {
},
// Intersection
Row {
rules: vec![Rule::pattern(r"[[0-7]&&[4-9]]+")],
rules: vec![Rule::pattern(r"[[0-7]&&[4-9]]+", "")],
separators: vec![],
examples: vec![
("456", Some((0, "456"))),
@ -833,7 +900,7 @@ mod tests {
},
// Difference
Row {
rules: vec![Rule::pattern(r"[[0-9]--[4-7]]+")],
rules: vec![Rule::pattern(r"[[0-9]--[4-7]]+", "")],
separators: vec![],
examples: vec![
("123", Some((0, "123"))),
@ -846,7 +913,7 @@ mod tests {
},
// Symmetric difference
Row {
rules: vec![Rule::pattern(r"[[0-7]~~[4-9]]+")],
rules: vec![Rule::pattern(r"[[0-7]~~[4-9]]+", "")],
separators: vec![],
examples: vec![
("123", Some((0, "123"))),
@ -867,7 +934,7 @@ mod tests {
// [6-7]: y y
// [3-9]--[5-7]: y y y y y
// final regex: y y y y y y
rules: vec![Rule::pattern(r"[[[0-5]--[2-4]]~~[[3-9]--[6-7]]]+")],
rules: vec![Rule::pattern(r"[[[0-5]--[2-4]]~~[[3-9]--[6-7]]]+", "")],
separators: vec![],
examples: vec![
("01", Some((0, "01"))),

View file

@ -31,7 +31,7 @@ pub(super) fn extract_default_aliases(
for variable in syntax_grammar.variables.iter() {
for production in variable.productions.iter() {
for step in production.steps.iter() {
let mut status = match step.symbol.kind {
let status = match step.symbol.kind {
SymbolType::External => &mut external_status_list[step.symbol.index],
SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
@ -63,7 +63,7 @@ pub(super) fn extract_default_aliases(
}
for symbol in syntax_grammar.extra_symbols.iter() {
let mut status = match symbol.kind {
let status = match symbol.kind {
SymbolType::External => &mut external_status_list[symbol.index],
SymbolType::NonTerminal => &mut non_terminal_status_list[symbol.index],
SymbolType::Terminal => &mut terminal_status_list[symbol.index],

View file

@ -49,7 +49,7 @@ pub(super) fn extract_tokens(
}) = variable.rule
{
if i > 0 && extractor.extracted_usage_counts[index] == 1 {
let mut lexical_variable = &mut lexical_variables[index];
let lexical_variable = &mut lexical_variables[index];
lexical_variable.kind = variable.kind;
lexical_variable.name = variable.name;
symbol_replacer.replacements.insert(i, index);
@ -209,7 +209,7 @@ impl TokenExtractor {
} else {
Rule::Metadata {
params: params.clone(),
rule: Box::new(self.extract_tokens_in_rule((&rule).clone())),
rule: Box::new(self.extract_tokens_in_rule(&rule)),
}
}
}
@ -320,7 +320,7 @@ mod test {
"rule_0",
Rule::repeat(Rule::seq(vec![
Rule::string("a"),
Rule::pattern("b"),
Rule::pattern("b", ""),
Rule::choice(vec![
Rule::non_terminal(1),
Rule::non_terminal(2),
@ -331,8 +331,8 @@ mod test {
]),
])),
),
Variable::named("rule_1", Rule::pattern("e")),
Variable::named("rule_2", Rule::pattern("b")),
Variable::named("rule_1", Rule::pattern("e", "")),
Variable::named("rule_2", Rule::pattern("b", "")),
Variable::named(
"rule_3",
Rule::seq(vec![Rule::non_terminal(2), Rule::Blank]),
@ -378,12 +378,12 @@ mod test {
lexical_grammar.variables,
vec![
Variable::anonymous("a", Rule::string("a")),
Variable::auxiliary("rule_0_token1", Rule::pattern("b")),
Variable::auxiliary("rule_0_token1", Rule::pattern("b", "")),
Variable::auxiliary(
"rule_0_token2",
Rule::repeat(Rule::choice(vec![Rule::string("c"), Rule::string("d"),]))
),
Variable::named("rule_1", Rule::pattern("e")),
Variable::named("rule_1", Rule::pattern("e", "")),
]
);
}
@ -411,7 +411,7 @@ mod test {
fn test_extracting_extra_symbols() {
let mut grammar = build_grammar(vec![
Variable::named("rule_0", Rule::string("x")),
Variable::named("comment", Rule::pattern("//.*")),
Variable::named("comment", Rule::pattern("//.*", "")),
]);
grammar.extra_symbols = vec![Rule::string(" "), Rule::non_terminal(1)];

View file

@ -203,6 +203,12 @@ pub(super) fn process_inlines(
lexical_grammar.variables[symbol.index].name,
))
}
SymbolType::NonTerminal if symbol.index == 0 => {
return Err(anyhow!(
"Rule `{}` cannot be inlined because it is the first rule",
grammar.variables[symbol.index].name,
))
}
_ => {}
}
}

View file

@ -129,6 +129,7 @@ impl Generator {
}
self.add_lex_modes_list();
self.add_parse_table();
if !self.syntax_grammar.external_tokens.is_empty() {
self.add_external_token_enum();
@ -136,7 +137,6 @@ impl Generator {
self.add_external_scanner_states_list();
}
self.add_parse_table();
self.add_parser_export();
self.buffer
@ -152,49 +152,51 @@ impl Generator {
self.symbol_ids[&Symbol::end()].clone(),
);
self.symbol_map = self
.parse_table
.symbols
.iter()
.map(|symbol| {
let mut mapping = symbol;
self.symbol_map = HashMap::new();
// There can be multiple symbols in the grammar that have the same name and kind,
// due to simple aliases. When that happens, ensure that they map to the same
// public-facing symbol. If one of the symbols is not aliased, choose that one
// to be the public-facing symbol. Otherwise, pick the symbol with the lowest
// numeric value.
if let Some(alias) = self.default_aliases.get(symbol) {
let kind = alias.kind();
for other_symbol in &self.parse_table.symbols {
if let Some(other_alias) = self.default_aliases.get(other_symbol) {
if other_symbol < mapping && other_alias == alias {
mapping = other_symbol;
for symbol in self.parse_table.symbols.iter() {
let mut mapping = symbol;
// There can be multiple symbols in the grammar that have the same name and kind,
// due to simple aliases. When that happens, ensure that they map to the same
// public-facing symbol. If one of the symbols is not aliased, choose that one
// to be the public-facing symbol. Otherwise, pick the symbol with the lowest
// numeric value.
if let Some(alias) = self.default_aliases.get(symbol) {
let kind = alias.kind();
for other_symbol in &self.parse_table.symbols {
if let Some(other_alias) = self.default_aliases.get(other_symbol) {
if other_symbol < mapping && other_alias == alias {
mapping = other_symbol;
}
} else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) {
mapping = other_symbol;
break;
}
}
}
// Two anonymous tokens with different flags but the same string value
// should be represented with the same symbol in the public API. Examples:
// * "<" and token(prec(1, "<"))
// * "(" and token.immediate("(")
else if symbol.is_terminal() {
let metadata = self.metadata_for_symbol(*symbol);
for other_symbol in &self.parse_table.symbols {
let other_metadata = self.metadata_for_symbol(*other_symbol);
if other_metadata == metadata {
if let Some(mapped) = self.symbol_map.get(other_symbol) {
if mapped == symbol {
break;
}
} else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) {
mapping = other_symbol;
break;
}
}
}
// Two anonymous tokens with different flags but the same string value
// should be represented with the same symbol in the public API. Examples:
// * "<" and token(prec(1, "<"))
// * "(" and token.immediate("(")
else if symbol.is_terminal() {
let metadata = self.metadata_for_symbol(*symbol);
for other_symbol in &self.parse_table.symbols {
let other_metadata = self.metadata_for_symbol(*other_symbol);
if other_metadata == metadata {
mapping = other_symbol;
break;
}
mapping = other_symbol;
break;
}
}
}
(*symbol, *mapping)
})
.collect();
self.symbol_map.insert(*symbol, *mapping);
}
for production_info in &self.parse_table.production_infos {
// Build a list of all field names
@ -254,7 +256,7 @@ impl Generator {
}
fn add_includes(&mut self) {
add_line!(self, "#include <tree_sitter/parser.h>");
add_line!(self, "#include \"tree_sitter/parser.h\"");
add_line!(self, "");
}
@ -336,7 +338,7 @@ impl Generator {
}
fn add_symbol_enum(&mut self) {
add_line!(self, "enum {{");
add_line!(self, "enum ts_symbol_identifiers {{");
indent!(self);
self.symbol_order.insert(Symbol::end(), 0);
let mut i = 1;
@ -408,7 +410,7 @@ impl Generator {
}
fn add_field_name_enum(&mut self) {
add_line!(self, "enum {{");
add_line!(self, "enum ts_field_identifiers {{");
indent!(self);
for (i, field_name) in self.field_names.iter().enumerate() {
add_line!(self, "{} = {},", self.field_id(field_name), i + 1);
@ -764,7 +766,6 @@ impl Generator {
indent!(self);
add_line!(self, "START_LEXER();");
add_line!(self, "eof = lexer->eof(lexer);");
add_line!(self, "switch (state) {{");
indent!(self);
@ -879,14 +880,23 @@ impl Generator {
add!(self, " ||{}", line_break);
}
if range.end == range.start {
if range.start == '\0' {
add!(self, "!eof && ");
}
add!(self, "lookahead == ");
self.add_character(range.start);
} else if range.end as u32 == range.start as u32 + 1 {
if range.start == '\0' {
add!(self, "!eof && ");
}
add!(self, "lookahead == ");
self.add_character(range.start);
add!(self, " ||{}lookahead == ", line_break);
self.add_character(range.end);
} else {
if range.start == '\0' {
add!(self, "!eof && ");
}
add!(self, "(");
self.add_character(range.start);
add!(self, " <= lookahead && lookahead <= ");
@ -1016,7 +1026,7 @@ impl Generator {
}
fn add_external_token_enum(&mut self) {
add_line!(self, "enum {{");
add_line!(self, "enum ts_external_scanner_symbol_identifiers {{");
indent!(self);
for i in 0..self.syntax_grammar.external_tokens.len() {
add_line!(
@ -1525,54 +1535,93 @@ impl Generator {
fn sanitize_identifier(&self, name: &str) -> String {
let mut result = String::with_capacity(name.len());
for c in name.chars() {
if ('a' <= c && c <= 'z')
|| ('A' <= c && c <= 'Z')
|| ('0' <= c && c <= '9')
|| c == '_'
{
if c.is_ascii_alphanumeric() || c == '_' {
result.push(c);
} else {
let replacement = match c {
'~' => "TILDE",
'`' => "BQUOTE",
'!' => "BANG",
'@' => "AT",
'#' => "POUND",
'$' => "DOLLAR",
'%' => "PERCENT",
'^' => "CARET",
'&' => "AMP",
'*' => "STAR",
'(' => "LPAREN",
')' => "RPAREN",
'-' => "DASH",
'+' => "PLUS",
'=' => "EQ",
'{' => "LBRACE",
'}' => "RBRACE",
'[' => "LBRACK",
']' => "RBRACK",
'\\' => "BSLASH",
'|' => "PIPE",
':' => "COLON",
';' => "SEMI",
'"' => "DQUOTE",
'\'' => "SQUOTE",
'<' => "LT",
'>' => "GT",
',' => "COMMA",
'.' => "DOT",
'?' => "QMARK",
'/' => "SLASH",
'\n' => "LF",
'\r' => "CR",
'\t' => "TAB",
_ => continue,
};
if !result.is_empty() && !result.ends_with("_") {
result.push('_');
'special_chars: {
let replacement = match c {
' ' if name.len() == 1 => "SPACE",
'~' => "TILDE",
'`' => "BQUOTE",
'!' => "BANG",
'@' => "AT",
'#' => "POUND",
'$' => "DOLLAR",
'%' => "PERCENT",
'^' => "CARET",
'&' => "AMP",
'*' => "STAR",
'(' => "LPAREN",
')' => "RPAREN",
'-' => "DASH",
'+' => "PLUS",
'=' => "EQ",
'{' => "LBRACE",
'}' => "RBRACE",
'[' => "LBRACK",
']' => "RBRACK",
'\\' => "BSLASH",
'|' => "PIPE",
':' => "COLON",
';' => "SEMI",
'"' => "DQUOTE",
'\'' => "SQUOTE",
'<' => "LT",
'>' => "GT",
',' => "COMMA",
'.' => "DOT",
'?' => "QMARK",
'/' => "SLASH",
'\n' => "LF",
'\r' => "CR",
'\t' => "TAB",
'\0' => "NULL",
'\u{0001}' => "SOH",
'\u{0002}' => "STX",
'\u{0003}' => "ETX",
'\u{0004}' => "EOT",
'\u{0005}' => "ENQ",
'\u{0006}' => "ACK",
'\u{0007}' => "BEL",
'\u{0008}' => "BS",
'\u{000b}' => "VTAB",
'\u{000c}' => "FF",
'\u{000e}' => "SO",
'\u{000f}' => "SI",
'\u{0010}' => "DLE",
'\u{0011}' => "DC1",
'\u{0012}' => "DC2",
'\u{0013}' => "DC3",
'\u{0014}' => "DC4",
'\u{0015}' => "NAK",
'\u{0016}' => "SYN",
'\u{0017}' => "ETB",
'\u{0018}' => "CAN",
'\u{0019}' => "EM",
'\u{001a}' => "SUB",
'\u{001b}' => "ESC",
'\u{001c}' => "FS",
'\u{001d}' => "GS",
'\u{001e}' => "RS",
'\u{001f}' => "US",
'\u{007F}' => "DEL",
'\u{FEFF}' => "BOM",
'\u{0080}'..='\u{FFFF}' => {
result.push_str(&format!("u{:04x}", c as u32));
break 'special_chars;
}
'\u{10000}'..='\u{10FFFF}' => {
result.push_str(&format!("U{:08x}", c as u32));
break 'special_chars;
}
'0'..='9' | 'a'..='z' | 'A'..='Z' | '_' => unreachable!(),
' ' => break 'special_chars,
};
if !result.is_empty() && !result.ends_with("_") {
result.push('_');
}
result += replacement;
}
result += replacement;
}
}
result
@ -1585,10 +1634,19 @@ impl Generator {
'\"' => result += "\\\"",
'?' => result += "\\?",
'\\' => result += "\\\\",
'\u{0007}' => result += "\\a",
'\u{0008}' => result += "\\b",
'\u{000b}' => result += "\\v",
'\u{000c}' => result += "\\f",
'\n' => result += "\\n",
'\r' => result += "\\r",
'\t' => result += "\\t",
'\0' => result += "\\0",
'\u{0001}'..='\u{001f}' => result += &format!("\\x{:02x}", c as u32),
'\u{007F}'..='\u{FFFF}' => result += &format!("\\u{:04x}", c as u32),
'\u{10000}'..='\u{10FFFF}' => {
result.push_str(&format!("\\U{:08x}", c as u32));
}
_ => result.push(c),
}
}

View file

@ -56,7 +56,7 @@ pub(crate) struct Symbol {
pub(crate) enum Rule {
Blank,
String(String),
Pattern(String),
Pattern(String, String),
NamedSymbol(String),
Symbol(Symbol),
Choice(Vec<Rule>),
@ -187,8 +187,8 @@ impl Rule {
Rule::String(value.to_string())
}
pub fn pattern(value: &'static str) -> Self {
Rule::Pattern(value.to_string())
pub fn pattern(value: &'static str, flags: &'static str) -> Self {
Rule::Pattern(value.to_string(), flags.to_string())
}
}

View file

@ -1,4 +1,3 @@
use super::util;
use ansi_term::Color;
use anyhow::Result;
use lazy_static::lazy_static;
@ -281,7 +280,7 @@ fn style_to_css(style: ansi_term::Style) -> String {
fn write_color(buffer: &mut String, color: Color) {
if let Color::RGB(r, g, b) = &color {
write!(buffer, "color: #{:x?}{:x?}{:x?}", r, g, b).unwrap()
write!(buffer, "color: #{r:02x}{g:02x}{b:02x}").unwrap()
} else {
write!(
buffer,
@ -349,7 +348,7 @@ pub fn ansi(
let mut highlighter = Highlighter::new();
let events = highlighter.highlight(config, source, cancellation_flag, |string| {
loader.highlight_config_for_injection_string(string)
loader.highlight_config_for_injection_string(string, config.apply_all_captures)
})?;
let mut style_stack = vec![theme.default_style().ansi];
@ -385,17 +384,17 @@ pub fn html(
config: &HighlightConfiguration,
quiet: bool,
print_time: bool,
cancellation_flag: Option<&AtomicUsize>,
) -> Result<()> {
use std::io::Write;
let stdout = io::stdout();
let mut stdout = stdout.lock();
let time = Instant::now();
let cancellation_flag = util::cancel_on_stdin();
let mut highlighter = Highlighter::new();
let events = highlighter.highlight(config, source, Some(&cancellation_flag), |string| {
loader.highlight_config_for_injection_string(string)
let events = highlighter.highlight(config, source, cancellation_flag, |string| {
loader.highlight_config_for_injection_string(string, config.apply_all_captures)
})?;
let mut renderer = HtmlRenderer::new();
@ -448,7 +447,7 @@ mod tests {
env::set_var("COLORTERM", "");
parse_style(&mut style, Value::String(DARK_CYAN.to_string()));
assert_eq!(style.ansi.foreground, Some(Color::Fixed(36)));
assert_eq!(style.css, Some("style=\'color: #0af87\'".to_string()));
assert_eq!(style.css, Some("style=\'color: #00af87\'".to_string()));
// junglegreen is not an ANSI color and is preserved when the terminal supports it
env::set_var("COLORTERM", "truecolor");

View file

@ -1,3 +1,5 @@
#![doc = include_str!("../README.md")]
pub mod generate;
pub mod highlight;
pub mod logger;
@ -14,3 +16,7 @@ pub mod wasm;
#[cfg(test)]
mod tests;
// To run compile fail tests
#[cfg(doctest)]
mod tests;

View file

@ -1,12 +1,14 @@
use anyhow::{anyhow, Context, Result};
use anyhow::{anyhow, Context, Error, Result};
use clap::{App, AppSettings, Arg, SubCommand};
use glob::glob;
use std::path::Path;
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use std::{env, fs, u64};
use tree_sitter::{Parser, WasmStore};
use tree_sitter::{ffi, Parser, Point, WasmStore};
use tree_sitter_cli::{
generate, highlight, logger, parse, playground, query, tags, test, test_highlight, test_tags,
util, wasm,
generate, highlight, logger,
parse::{self, ParseFileOptions, ParseOutput},
playground, query, tags, test, test_highlight, test_tags, util, wasm,
};
use tree_sitter_config::Config;
use tree_sitter_highlight::Highlighter;
@ -82,6 +84,9 @@ fn run() -> Result<()> {
let wasm_arg = Arg::with_name("wasm")
.long("wasm")
.help("compile parsers to wasm instead of native dynamic libraries");
let apply_all_captures_arg = Arg::with_name("apply-all-captures")
.help("Apply all captures to highlights")
.long("apply-all-captures");
let matches = App::new("tree-sitter")
.author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
@ -113,13 +118,33 @@ fn run() -> Result<()> {
)),
)
.arg(Arg::with_name("no-bindings").long("no-bindings"))
.arg(
Arg::with_name("build")
.long("build")
.short("b")
.help("Compile all defined languages in the current dir"),
)
.arg(&debug_build_arg)
.arg(
Arg::with_name("libdir")
.long("libdir")
.takes_value(true)
.value_name("path"),
)
.arg(
Arg::with_name("report-states-for-rule")
.long("report-states-for-rule")
.value_name("rule-name")
.takes_value(true),
)
.arg(Arg::with_name("no-minimize").long("no-minimize")),
.arg(
Arg::with_name("js-runtime")
.long("js-runtime")
.takes_value(true)
.value_name("executable")
.env("TREE_SITTER_JS_RUNTIME")
.help("Use a JavaScript runtime other than node"),
),
)
.subcommand(
SubCommand::with_name("parse")
@ -132,7 +157,8 @@ fn run() -> Result<()> {
.arg(&debug_build_arg)
.arg(&debug_graph_arg)
.arg(&wasm_arg)
.arg(Arg::with_name("debug-xml").long("xml").short("x"))
.arg(Arg::with_name("output-dot").long("dot"))
.arg(Arg::with_name("output-xml").long("xml").short("x"))
.arg(
Arg::with_name("stat")
.help("Show parsing statistic")
@ -155,6 +181,12 @@ fn run() -> Result<()> {
.takes_value(true)
.multiple(true)
.number_of_values(1),
)
.arg(
Arg::with_name("encoding")
.help("The encoding of the input files")
.long("encoding")
.takes_value(true),
),
)
.subcommand(
@ -167,6 +199,8 @@ fn run() -> Result<()> {
.index(1)
.required(true),
)
.arg(&time_arg)
.arg(&quiet_arg)
.arg(&paths_file_arg)
.arg(&paths_arg.clone().index(2))
.arg(
@ -175,6 +209,12 @@ fn run() -> Result<()> {
.long("byte-range")
.takes_value(true),
)
.arg(
Arg::with_name("row-range")
.help("The range of rows in which the query will be executed")
.long("row-range")
.takes_value(true),
)
.arg(&scope_arg)
.arg(Arg::with_name("captures").long("captures").short("c"))
.arg(Arg::with_name("test").long("test")),
@ -208,7 +248,8 @@ fn run() -> Result<()> {
.arg(&debug_arg)
.arg(&debug_build_arg)
.arg(&debug_graph_arg)
.arg(&wasm_arg),
.arg(&wasm_arg)
.arg(&apply_all_captures_arg),
)
.subcommand(
SubCommand::with_name("highlight")
@ -219,11 +260,31 @@ fn run() -> Result<()> {
.long("html")
.short("H"),
)
.arg(
Arg::with_name("check")
.help("Check that highlighting captures conform strictly to standards")
.long("check"),
)
.arg(
Arg::with_name("captures-path")
.help("Path to a file with captures")
.long("captures-path")
.takes_value(true),
)
.arg(
Arg::with_name("query-paths")
.help("Paths to files with queries")
.long("query-paths")
.takes_value(true)
.multiple(true)
.number_of_values(1),
)
.arg(&scope_arg)
.arg(&time_arg)
.arg(&quiet_arg)
.arg(&paths_file_arg)
.arg(&paths_arg),
.arg(&paths_arg)
.arg(&apply_all_captures_arg),
)
.subcommand(
SubCommand::with_name("build-wasm")
@ -279,6 +340,10 @@ fn run() -> Result<()> {
("generate", Some(matches)) => {
let grammar_path = matches.value_of("grammar-path");
let debug_build = matches.is_present("debug-build");
let build = matches.is_present("build");
let libdir = matches.value_of("libdir");
let js_runtime = matches.value_of("js-runtime");
let report_symbol_name = matches.value_of("report-states-for-rule").or_else(|| {
if matches.is_present("report-states") {
Some("")
@ -289,16 +354,18 @@ fn run() -> Result<()> {
if matches.is_present("log") {
logger::init();
}
let abi_version =
matches
.value_of("abi-version")
.map_or(DEFAULT_GENERATE_ABI_VERSION, |version| {
if version == "latest" {
tree_sitter::LANGUAGE_VERSION
} else {
version.parse().expect("invalid abi version flag")
}
});
let abi_version = matches.value_of("abi-version").map_or(
Ok::<_, Error>(DEFAULT_GENERATE_ABI_VERSION),
|version| {
Ok(if version == "latest" {
tree_sitter::LANGUAGE_VERSION
} else {
version
.parse()
.with_context(|| "invalid abi version flag")?
})
},
)?;
let generate_bindings = !matches.is_present("no-bindings");
generate::generate_parser_in_directory(
&current_dir,
@ -306,7 +373,15 @@ fn run() -> Result<()> {
abi_version,
generate_bindings,
report_symbol_name,
js_runtime,
)?;
if build {
if let Some(path) = libdir {
loader = loader::Loader::with_parser_lib_path(PathBuf::from(path));
}
loader.use_debug_build(debug_build);
loader.languages_at_path(&current_dir)?;
}
}
("test", Some(matches)) => {
@ -317,6 +392,12 @@ fn run() -> Result<()> {
let filter = matches.value_of("filter");
let wasm = matches.is_present("wasm");
let mut parser = Parser::new();
let apply_all_captures = matches.is_present("apply-all-captures");
if debug {
// For augmenting debug logging in external scanners
env::set_var("TREE_SITTER_DEBUG", "1");
}
loader.use_debug_build(debug_build);
@ -364,7 +445,12 @@ fn run() -> Result<()> {
if let Some(store) = store.take() {
highlighter.parser().set_wasm_store(store).unwrap();
}
test_highlight::test_highlights(&loader, &mut highlighter, &test_highlight_dir)?;
test_highlight::test_highlights(
&loader,
&mut highlighter,
&test_highlight_dir,
apply_all_captures,
)?;
store = highlighter.parser().take_wasm_store();
}
@ -382,14 +468,33 @@ fn run() -> Result<()> {
let debug = matches.is_present("debug");
let debug_graph = matches.is_present("debug-graph");
let debug_build = matches.is_present("debug-build");
let debug_xml = matches.is_present("debug-xml");
let quiet = matches.is_present("quiet");
let output = if matches.is_present("output-dot") {
ParseOutput::Dot
} else if matches.is_present("output-xml") {
ParseOutput::Xml
} else if matches.is_present("quiet") {
ParseOutput::Quiet
} else {
ParseOutput::Normal
};
let encoding =
matches
.values_of("encoding")
.map_or(Ok(None), |mut e| match e.next() {
Some("utf16") => Ok(Some(ffi::TSInputEncodingUTF16)),
Some("utf8") => Ok(Some(ffi::TSInputEncodingUTF8)),
Some(_) => Err(anyhow!("Invalid encoding. Expected one of: utf8, utf16")),
None => Ok(None),
})?;
let time = matches.is_present("time");
let wasm = matches.is_present("wasm");
let edits = matches
.values_of("edits")
.map_or(Vec::new(), |e| e.collect());
let cancellation_flag = util::cancel_on_stdin();
let cancellation_flag = util::cancel_on_signal();
let mut parser = Parser::new();
if debug {
@ -430,19 +535,21 @@ fn run() -> Result<()> {
.set_language(language)
.context("incompatible language")?;
let this_file_errored = parse::parse_file_at_path(
&mut parser,
let opts = ParseFileOptions {
language,
path,
&edits,
edits: &edits,
max_path_length,
quiet,
time,
output,
print_time: time,
timeout,
debug,
debug_graph,
debug_xml,
Some(&cancellation_flag),
)?;
cancellation_flag: Some(&cancellation_flag),
encoding,
};
let this_file_errored = parse::parse_file_at_path(&mut parser, opts)?;
if should_track_stats {
stats.total_parses += 1;
@ -465,6 +572,8 @@ fn run() -> Result<()> {
("query", Some(matches)) => {
let ordered_captures = matches.values_of("captures").is_some();
let quiet = matches.values_of("quiet").is_some();
let time = matches.values_of("time").is_some();
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
let loader_config = config.get()?;
loader.find_all_languages(&loader_config)?;
@ -474,9 +583,17 @@ fn run() -> Result<()> {
matches.value_of("scope"),
)?;
let query_path = Path::new(matches.value_of("query-path").unwrap());
let range = matches.value_of("byte-range").map(|br| {
let r: Vec<&str> = br.split(":").collect();
r[0].parse().unwrap()..r[1].parse().unwrap()
let byte_range = matches.value_of("byte-range").and_then(|arg| {
let mut parts = arg.split(":");
let start = parts.next()?.parse().ok()?;
let end = parts.next().unwrap().parse().ok()?;
Some(start..end)
});
let point_range = matches.value_of("row-range").and_then(|arg| {
let mut parts = arg.split(":");
let start = parts.next()?.parse().ok()?;
let end = parts.next().unwrap().parse().ok()?;
Some(Point::new(start, 0)..Point::new(end, 0))
});
let should_test = matches.is_present("test");
query::query_files_at_paths(
@ -484,8 +601,11 @@ fn run() -> Result<()> {
paths,
query_path,
ordered_captures,
range,
byte_range,
point_range,
should_test,
quiet,
time,
)?;
}
@ -511,13 +631,15 @@ fn run() -> Result<()> {
let time = matches.is_present("time");
let quiet = matches.is_present("quiet");
let html_mode = quiet || matches.is_present("html");
let should_check = matches.is_present("check");
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
let apply_all_captures = matches.is_present("apply-all-captures");
if html_mode && !quiet {
println!("{}", highlight::HTML_HEADER);
}
let cancellation_flag = util::cancel_on_stdin();
let cancellation_flag = util::cancel_on_signal();
let mut lang = None;
if let Some(scope) = matches.value_of("scope") {
@ -527,6 +649,15 @@ fn run() -> Result<()> {
}
}
let query_paths = matches.values_of("query-paths").map_or(None, |e| {
Some(
e.collect::<Vec<_>>()
.into_iter()
.map(|s| s.to_string())
.collect::<Vec<_>>(),
)
});
for path in paths {
let path = Path::new(&path);
let (language, language_config) = match lang {
@ -540,7 +671,45 @@ fn run() -> Result<()> {
},
};
if let Some(highlight_config) = language_config.highlight_config(language)? {
if let Some(highlight_config) = language_config.highlight_config(
language,
apply_all_captures,
query_paths.as_deref(),
)? {
if should_check {
let names = if let Some(path) = matches.value_of("captures-path") {
let path = Path::new(path);
let file = fs::read_to_string(path)?;
let capture_names = file
.lines()
.filter_map(|line| {
if line.trim().is_empty() || line.trim().starts_with(';') {
return None;
}
line.split(';').next().map(|s| s.trim().trim_matches('"'))
})
.collect::<HashSet<_>>();
highlight_config.nonconformant_capture_names(&capture_names)
} else {
highlight_config.nonconformant_capture_names(&HashSet::new())
};
if names.is_empty() {
eprintln!("All highlight captures conform to standards.");
} else {
eprintln!(
"Non-standard highlight {} detected:",
if names.len() > 1 {
"captures"
} else {
"capture"
}
);
for name in names {
eprintln!("* {}", name);
}
}
}
let source = fs::read(path)?;
if html_mode {
highlight::html(
@ -550,6 +719,7 @@ fn run() -> Result<()> {
highlight_config,
quiet,
time,
Some(&cancellation_flag),
)?;
} else {
highlight::ansi(
@ -582,7 +752,7 @@ fn run() -> Result<()> {
("playground", Some(matches)) => {
let open_in_browser = !matches.is_present("quiet");
playground::serve(&current_dir, open_in_browser);
playground::serve(&current_dir, open_in_browser)?;
}
("dump-languages", Some(_)) => {

View file

@ -5,7 +5,7 @@ use std::path::Path;
use std::sync::atomic::AtomicUsize;
use std::time::Instant;
use std::{fmt, fs, usize};
use tree_sitter::{InputEdit, LogType, Parser, Point, Tree};
use tree_sitter::{ffi, InputEdit, Language, LogType, Parser, Point, Tree};
#[derive(Debug)]
pub struct Edit {
@ -30,36 +30,47 @@ impl fmt::Display for Stats {
}
}
pub fn parse_file_at_path(
parser: &mut Parser,
path: &Path,
edits: &Vec<&str>,
max_path_length: usize,
quiet: bool,
print_time: bool,
timeout: u64,
debug: bool,
debug_graph: bool,
debug_xml: bool,
cancellation_flag: Option<&AtomicUsize>,
) -> Result<bool> {
#[derive(Copy, Clone)]
pub enum ParseOutput {
Normal,
Quiet,
Xml,
Dot,
}
pub struct ParseFileOptions<'a> {
pub language: Language,
pub path: &'a Path,
pub edits: &'a [&'a str],
pub max_path_length: usize,
pub output: ParseOutput,
pub print_time: bool,
pub timeout: u64,
pub debug: bool,
pub debug_graph: bool,
pub cancellation_flag: Option<&'a AtomicUsize>,
pub encoding: Option<u32>,
}
pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result<bool> {
let mut _log_session = None;
let mut source_code =
fs::read(path).with_context(|| format!("Error reading source file {:?}", path))?;
parser.set_language(opts.language)?;
let mut source_code = fs::read(opts.path)
.with_context(|| format!("Error reading source file {:?}", opts.path))?;
// If the `--cancel` flag was passed, then cancel the parse
// when the user types a newline.
unsafe { parser.set_cancellation_flag(cancellation_flag) };
unsafe { parser.set_cancellation_flag(opts.cancellation_flag) };
// Set a timeout based on the `--time` flag.
parser.set_timeout_micros(timeout);
parser.set_timeout_micros(opts.timeout);
// Render an HTML graph if `--debug-graph` was passed
if debug_graph {
if opts.debug_graph {
_log_session = Some(util::log_graphs(parser, "log.html")?);
}
// Log to stderr if `--debug` was passed
else if debug {
else if opts.debug {
parser.set_logger(Some(Box::new(|log_type, message| {
if log_type == LogType::Lex {
io::stderr().write(b" ").unwrap();
@ -69,22 +80,44 @@ pub fn parse_file_at_path(
}
let time = Instant::now();
let tree = parser.parse(&source_code, None);
#[inline(always)]
fn is_utf16_bom(bom_bytes: &[u8]) -> bool {
bom_bytes == [0xFF, 0xFE] || bom_bytes == [0xFE, 0xFF]
}
let tree = match opts.encoding {
Some(encoding) if encoding == ffi::TSInputEncodingUTF16 => {
let source_code_utf16 = source_code
.chunks_exact(2)
.map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
.collect::<Vec<_>>();
parser.parse_utf16(&source_code_utf16, None)
}
None if source_code.len() >= 2 && is_utf16_bom(&source_code[0..2]) => {
let source_code_utf16 = source_code
.chunks_exact(2)
.map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
.collect::<Vec<_>>();
parser.parse_utf16(&source_code_utf16, None)
}
_ => parser.parse(&source_code, None),
};
let stdout = io::stdout();
let mut stdout = stdout.lock();
if let Some(mut tree) = tree {
if debug_graph && !edits.is_empty() {
if opts.debug_graph && !opts.edits.is_empty() {
println!("BEFORE:\n{}", String::from_utf8_lossy(&source_code));
}
for (i, edit) in edits.iter().enumerate() {
for (i, edit) in opts.edits.iter().enumerate() {
let edit = parse_edit_flag(&source_code, edit)?;
perform_edit(&mut tree, &mut source_code, &edit);
perform_edit(&mut tree, &mut source_code, &edit)?;
tree = parser.parse(&source_code, Some(&tree)).unwrap();
if debug_graph {
if opts.debug_graph {
println!("AFTER {}:\n{}", i, String::from_utf8_lossy(&source_code));
}
}
@ -93,7 +126,7 @@ pub fn parse_file_at_path(
let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
let mut cursor = tree.walk();
if !quiet {
if matches!(opts.output, ParseOutput::Normal) {
let mut needs_newline = false;
let mut indent_level = 0;
let mut did_visit_children = false;
@ -149,7 +182,7 @@ pub fn parse_file_at_path(
println!("");
}
if debug_xml {
if matches!(opts.output, ParseOutput::Xml) {
let mut needs_newline = false;
let mut indent_level = 0;
let mut did_visit_children = false;
@ -204,6 +237,10 @@ pub fn parse_file_at_path(
println!("");
}
if matches!(opts.output, ParseOutput::Dot) {
util::print_tree_graph(&tree, "log.html").unwrap();
}
let mut first_error = None;
loop {
let node = cursor.node();
@ -221,13 +258,13 @@ pub fn parse_file_at_path(
}
}
if first_error.is_some() || print_time {
if first_error.is_some() || opts.print_time {
write!(
&mut stdout,
"{:width$}\t{} ms",
path.to_str().unwrap(),
opts.path.to_str().unwrap(),
duration_ms,
width = max_path_length
width = opts.max_path_length
)?;
if let Some(node) = first_error {
let start = node.start_position();
@ -256,29 +293,29 @@ pub fn parse_file_at_path(
}
return Ok(first_error.is_some());
} else if print_time {
} else if opts.print_time {
let duration = time.elapsed();
let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
writeln!(
&mut stdout,
"{:width$}\t{} ms (timed out)",
path.to_str().unwrap(),
opts.path.to_str().unwrap(),
duration_ms,
width = max_path_length
width = opts.max_path_length
)?;
}
Ok(false)
}
pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> InputEdit {
pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> Result<InputEdit> {
let start_byte = edit.position;
let old_end_byte = edit.position + edit.deleted_length;
let new_end_byte = edit.position + edit.inserted_text.len();
let start_position = position_for_offset(input, start_byte);
let old_end_position = position_for_offset(input, old_end_byte);
let start_position = position_for_offset(input, start_byte)?;
let old_end_position = position_for_offset(input, old_end_byte)?;
input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned());
let new_end_position = position_for_offset(input, new_end_byte);
let new_end_position = position_for_offset(input, new_end_byte)?;
let edit = InputEdit {
start_byte,
old_end_byte,
@ -288,7 +325,7 @@ pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> InputE
new_end_position,
};
tree.edit(&edit);
edit
Ok(edit)
}
fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
@ -317,7 +354,7 @@ fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
let row = usize::from_str_radix(row, 10).map_err(|_| error())?;
let column = parts.next().ok_or_else(error)?;
let column = usize::from_str_radix(column, 10).map_err(|_| error())?;
offset_for_position(source_code, Point { row, column })
offset_for_position(source_code, Point { row, column })?
} else {
usize::from_str_radix(position, 10).map_err(|_| error())?
};
@ -332,31 +369,48 @@ fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
})
}
fn offset_for_position(input: &Vec<u8>, position: Point) -> usize {
let mut current_position = Point { row: 0, column: 0 };
for (i, c) in input.iter().enumerate() {
if *c as char == '\n' {
current_position.row += 1;
current_position.column = 0;
} else {
current_position.column += 1;
}
if current_position > position {
return i;
pub fn offset_for_position(input: &[u8], position: Point) -> Result<usize> {
let mut row = 0;
let mut offset = 0;
let mut iter = memchr::memchr_iter(b'\n', input);
loop {
if let Some(pos) = iter.next() {
if row < position.row {
row += 1;
offset = pos;
continue;
}
}
offset += 1;
break;
}
return input.len();
if position.row - row > 0 {
return Err(anyhow!("Failed to address a row: {}", position.row));
}
if let Some(pos) = iter.next() {
if (pos - offset < position.column) || (input[offset] == b'\n' && position.column > 0) {
return Err(anyhow!("Failed to address a column: {}", position.column));
};
} else if input.len() - offset < position.column {
return Err(anyhow!("Failed to address a column over the end"));
}
Ok(offset + position.column)
}
fn position_for_offset(input: &Vec<u8>, offset: usize) -> Point {
let mut result = Point { row: 0, column: 0 };
for c in &input[0..offset] {
if *c as char == '\n' {
result.row += 1;
result.column = 0;
} else {
result.column += 1;
}
pub fn position_for_offset(input: &[u8], offset: usize) -> Result<Point> {
if offset > input.len() {
return Err(anyhow!("Failed to address an offset: {offset}"));
}
result
let mut result = Point { row: 0, column: 0 };
let mut last = 0;
for pos in memchr::memchr_iter(b'\n', &input[..offset]) {
result.row += 1;
last = pos;
}
result.column = if result.row > 0 {
offset - last - 1
} else {
offset
};
Ok(result)
}

View file

@ -3,8 +3,8 @@
<title>tree-sitter THE_LANGUAGE_NAME</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/codemirror/5.45.0/codemirror.min.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/clusterize.js/0.18.0/clusterize.min.css">
<link rel="icon" type="image/png" href="http://tree-sitter.github.io/tree-sitter/assets/images/favicon-32x32.png" sizes="32x32" />
<link rel="icon" type="image/png" href="http://tree-sitter.github.io/tree-sitter/assets/images/favicon-16x16.png" sizes="16x16" />
<link rel="icon" type="image/png" href="https://tree-sitter.github.io/tree-sitter/assets/images/favicon-32x32.png" sizes="32x32" />
<link rel="icon" type="image/png" href="https://tree-sitter.github.io/tree-sitter/assets/images/favicon-16x16.png" sizes="16x16" />
</head>
<body>

View file

@ -1,4 +1,5 @@
use super::wasm;
use anyhow::{anyhow, Context, Result};
use std::{
borrow::Cow,
env, fs,
@ -7,12 +8,11 @@ use std::{
str::{self, FromStr as _},
};
use tiny_http::{Header, Response, Server};
use webbrowser;
macro_rules! optional_resource {
($name: tt, $path: tt) => {
#[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Cow<'static, [u8]> {
fn $name(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> {
if let Some(tree_sitter_dir) = tree_sitter_dir {
Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap())
} else {
@ -21,7 +21,7 @@ macro_rules! optional_resource {
}
#[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))]
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Cow<'static, [u8]> {
fn $name(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> {
if let Some(tree_sitter_dir) = tree_sitter_dir {
Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap())
} else {
@ -35,7 +35,7 @@ optional_resource!(get_playground_js, "docs/assets/js/playground.js");
optional_resource!(get_lib_js, "lib/binding_web/tree-sitter.js");
optional_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm");
fn get_main_html(tree_sitter_dir: &Option<PathBuf>) -> Cow<'static, [u8]> {
fn get_main_html(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> {
if let Some(tree_sitter_dir) = tree_sitter_dir {
Cow::Owned(fs::read(tree_sitter_dir.join("cli/src/playground.html")).unwrap())
} else {
@ -43,23 +43,10 @@ fn get_main_html(tree_sitter_dir: &Option<PathBuf>) -> Cow<'static, [u8]> {
}
}
pub fn serve(grammar_path: &Path, open_in_browser: bool) {
let port = env::var("TREE_SITTER_PLAYGROUND_PORT")
.map(|v| v.parse::<u16>().expect("Invalid port specification"))
.unwrap_or_else(
|_| get_available_port().expect(
"Couldn't find an available port, try providing a port number via the TREE_SITTER_PLAYGROUND_PORT \
environment variable"
)
);
let addr = format!(
"{}:{}",
env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or("127.0.0.1".to_owned()),
port
);
pub fn serve(grammar_path: &Path, open_in_browser: bool) -> Result<()> {
let server = get_server()?;
let (grammar_name, language_wasm) = wasm::load_language_wasm_file(&grammar_path).unwrap();
let server = Server::http(&addr).expect("Failed to start web server");
let url = format!("http://{}", addr);
let url = format!("http://{}", server.server_addr());
println!("Started playground on: {}", url);
if open_in_browser {
if let Err(_) = webbrowser::open(&url) {
@ -68,13 +55,13 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) {
}
let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok();
let main_html = str::from_utf8(&get_main_html(&tree_sitter_dir))
let main_html = str::from_utf8(&get_main_html(tree_sitter_dir.as_ref()))
.unwrap()
.replace("THE_LANGUAGE_NAME", &grammar_name)
.into_bytes();
let playground_js = get_playground_js(&tree_sitter_dir);
let lib_js = get_lib_js(&tree_sitter_dir);
let lib_wasm = get_lib_wasm(&tree_sitter_dir);
let playground_js = get_playground_js(tree_sitter_dir.as_ref());
let lib_js = get_lib_js(tree_sitter_dir.as_ref());
let lib_wasm = get_lib_wasm(tree_sitter_dir.as_ref());
let html_header = Header::from_str("Content-Type: text/html").unwrap();
let js_header = Header::from_str("Content-Type: application/javascript").unwrap();
@ -107,8 +94,12 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) {
}
_ => response(b"Not found", &html_header).with_status_code(404),
};
request.respond(res).expect("Failed to write HTTP response");
request
.respond(res)
.with_context(|| "Failed to write HTTP response")?;
}
Ok(())
}
fn redirect<'a>(url: &'a str) -> Response<&'a [u8]> {
@ -123,10 +114,30 @@ fn response<'a>(data: &'a [u8], header: &Header) -> Response<&'a [u8]> {
.with_header(header.clone())
}
fn get_available_port() -> Option<u16> {
(8000..12000).find(port_is_available)
fn get_server() -> Result<Server> {
let addr = env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or("127.0.0.1".to_owned());
let port = env::var("TREE_SITTER_PLAYGROUND_PORT")
.map(|v| {
v.parse::<u16>()
.with_context(|| "Invalid port specification")
})
.ok();
let listener = match port {
Some(port) => {
bind_to(&*addr, port?).with_context(|| "Failed to bind to the specified port")?
}
None => get_listener_on_available_port(&*addr)
.with_context(|| "Failed to find a free port to bind to it")?,
};
let server =
Server::from_listener(listener, None).map_err(|_| anyhow!("Failed to start web server"))?;
Ok(server)
}
fn port_is_available(port: &u16) -> bool {
TcpListener::bind(("127.0.0.1", *port)).is_ok()
fn get_listener_on_available_port(addr: &str) -> Option<TcpListener> {
(8000..12000).find_map(|port| bind_to(addr, port))
}
fn bind_to(addr: &str, port: u16) -> Option<TcpListener> {
TcpListener::bind(format!("{addr}:{port}")).ok()
}

View file

@ -5,16 +5,20 @@ use std::{
io::{self, Write},
ops::Range,
path::Path,
time::Instant,
};
use tree_sitter::{Language, Parser, Query, QueryCursor};
use tree_sitter::{Language, Parser, Point, Query, QueryCursor};
pub fn query_files_at_paths(
language: Language,
paths: Vec<String>,
query_path: &Path,
ordered_captures: bool,
range: Option<Range<usize>>,
byte_range: Option<Range<usize>>,
point_range: Option<Range<Point>>,
should_test: bool,
quiet: bool,
print_time: bool,
) -> Result<()> {
let stdout = io::stdout();
let mut stdout = stdout.lock();
@ -24,9 +28,12 @@ pub fn query_files_at_paths(
let query = Query::new(language, &query_source).with_context(|| "Query compilation failed")?;
let mut query_cursor = QueryCursor::new();
if let Some(range) = range {
if let Some(range) = byte_range {
query_cursor.set_byte_range(range);
}
if let Some(range) = point_range {
query_cursor.set_point_range(range);
}
let mut parser = Parser::new();
parser.set_language(language)?;
@ -40,22 +47,25 @@ pub fn query_files_at_paths(
fs::read(&path).with_context(|| format!("Error reading source file {:?}", path))?;
let tree = parser.parse(&source_code, None).unwrap();
let start = Instant::now();
if ordered_captures {
for (mat, capture_index) in
query_cursor.captures(&query, tree.root_node(), source_code.as_slice())
{
let capture = mat.captures[capture_index];
let capture_name = &query.capture_names()[capture.index as usize];
writeln!(
&mut stdout,
" pattern: {:>2}, capture: {} - {}, start: {}, end: {}, text: `{}`",
mat.pattern_index,
capture.index,
capture_name,
capture.node.start_position(),
capture.node.end_position(),
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
if !quiet {
writeln!(
&mut stdout,
" pattern: {:>2}, capture: {} - {}, start: {}, end: {}, text: `{}`",
mat.pattern_index,
capture.index,
capture_name,
capture.node.start_position(),
capture.node.end_position(),
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
}
results.push(query_testing::CaptureInfo {
name: capture_name.to_string(),
start: capture.node.start_position(),
@ -64,27 +74,31 @@ pub fn query_files_at_paths(
}
} else {
for m in query_cursor.matches(&query, tree.root_node(), source_code.as_slice()) {
writeln!(&mut stdout, " pattern: {}", m.pattern_index)?;
if !quiet {
writeln!(&mut stdout, " pattern: {}", m.pattern_index)?;
}
for capture in m.captures {
let start = capture.node.start_position();
let end = capture.node.end_position();
let capture_name = &query.capture_names()[capture.index as usize];
if end.row == start.row {
writeln!(
&mut stdout,
" capture: {} - {}, start: {}, end: {}, text: `{}`",
capture.index,
capture_name,
start,
end,
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
} else {
writeln!(
&mut stdout,
" capture: {}, start: {}, end: {}",
capture_name, start, end,
)?;
if !quiet {
if end.row == start.row {
writeln!(
&mut stdout,
" capture: {} - {}, start: {}, end: {}, text: `{}`",
capture.index,
capture_name,
start,
end,
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
} else {
writeln!(
&mut stdout,
" capture: {}, start: {}, end: {}",
capture_name, start, end,
)?;
}
}
results.push(query_testing::CaptureInfo {
name: capture_name.to_string(),
@ -103,6 +117,9 @@ pub fn query_files_at_paths(
if should_test {
query_testing::assert_expected_captures(results, path, &mut parser, language)?
}
if print_time {
writeln!(&mut stdout, "{:?}", start.elapsed())?;
}
}
Ok(())

View file

@ -18,9 +18,20 @@ pub struct CaptureInfo {
#[derive(Debug, PartialEq, Eq)]
pub struct Assertion {
pub position: Point,
pub negative: bool,
pub expected_capture_name: String,
}
impl Assertion {
pub fn new(row: usize, col: usize, negative: bool, expected_capture_name: String) -> Self {
Self {
position: Point::new(row, col),
negative,
expected_capture_name,
}
}
}
/// Parse the given source code, finding all of the comments that contain
/// highlighting assertions. Return a vector of (position, expected highlight name)
/// pairs.
@ -54,6 +65,7 @@ pub fn parse_position_comments(
// to its own column.
let mut has_left_caret = false;
let mut has_arrow = false;
let mut negative = false;
let mut arrow_end = 0;
for (i, c) in text.char_indices() {
arrow_end = i + 1;
@ -69,6 +81,19 @@ pub fn parse_position_comments(
has_left_caret = c == '<';
}
// find any ! after arrows but before capture name
if has_arrow {
for (i, c) in text[arrow_end..].char_indices() {
if c == '!' {
negative = true;
arrow_end += i + 1;
break;
} else if !c.is_whitespace() {
break;
}
}
}
// If the comment node contains an arrow and a highlight name, record the
// highlight name and the position.
if let (true, Some(mat)) =
@ -76,7 +101,8 @@ pub fn parse_position_comments(
{
assertion_ranges.push((node.start_position(), node.end_position()));
result.push(Assertion {
position: position,
position,
negative,
expected_capture_name: mat.as_str().to_string(),
});
}

View file

@ -23,7 +23,7 @@ pub fn generate_tags(
}
let mut context = TagsContext::new();
let cancellation_flag = util::cancel_on_stdin();
let cancellation_flag = util::cancel_on_signal();
let stdout = io::stdout();
let mut stdout = stdout.lock();

View file

@ -16,11 +16,11 @@ use walkdir::WalkDir;
lazy_static! {
static ref HEADER_REGEX: ByteRegex =
ByteRegexBuilder::new(r"^===+(?P<suffix1>[^=\r\n][^\r\n]*)?\r?\n(?P<test_name>([^=\r\n][^\r\n]*\r?\n)+)===+(?P<suffix2>[^=\r\n][^\r\n]*)?\r?\n")
ByteRegexBuilder::new(r"^(?P<equals>(?:=+){3,})(?P<suffix1>[^=\r\n][^\r\n]*)?\r?\n(?P<test_name>([^=\r\n][^\r\n]*\r?\n)+)===+(?P<suffix2>[^=\r\n][^\r\n]*)?\r?\n")
.multi_line(true)
.build()
.unwrap();
static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^---+(?P<suffix>[^-\r\n][^\r\n]*)?\r?\n")
static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^(?P<hyphens>(?:-+){3,})(?P<suffix>[^-\r\n][^\r\n]*)?\r?\n")
.multi_line(true)
.build()
.unwrap();
@ -40,6 +40,8 @@ pub enum TestEntry {
name: String,
input: Vec<u8>,
output: String,
header_delim_len: usize,
divider_delim_len: usize,
has_fields: bool,
},
}
@ -177,13 +179,15 @@ fn run_tests(
mut indent_level: i32,
failures: &mut Vec<(String, String, String)>,
update: bool,
corrected_entries: &mut Vec<(String, String, String)>,
corrected_entries: &mut Vec<(String, String, String, usize, usize)>,
) -> Result<()> {
match test_entry {
TestEntry::Example {
name,
input,
output,
header_delim_len,
divider_delim_len,
has_fields,
} => {
if let Some(filter) = filter {
@ -191,7 +195,13 @@ fn run_tests(
if update {
let input = String::from_utf8(input).unwrap();
let output = format_sexp(&output);
corrected_entries.push((name, input, output));
corrected_entries.push((
name,
input,
output,
header_delim_len,
divider_delim_len,
));
}
return Ok(());
}
@ -201,21 +211,31 @@ fn run_tests(
if !has_fields {
actual = strip_sexp_fields(actual);
}
for _ in 0..indent_level {
print!(" ");
}
print!("{}", " ".repeat(indent_level as usize));
if actual == output {
println!("{}", Colour::Green.paint(&name));
if update {
let input = String::from_utf8(input).unwrap();
let output = format_sexp(&output);
corrected_entries.push((name, input, output));
corrected_entries.push((
name,
input,
output,
header_delim_len,
divider_delim_len,
));
}
} else {
if update {
let input = String::from_utf8(input).unwrap();
let output = format_sexp(&actual);
corrected_entries.push((name.clone(), input, output));
corrected_entries.push((
name.clone(),
input,
output,
header_delim_len,
divider_delim_len,
));
println!("{}", Colour::Blue.paint(&name));
} else {
println!("{}", Colour::Red.paint(&name));
@ -229,9 +249,7 @@ fn run_tests(
file_path,
} => {
if indent_level > 0 {
for _ in 0..indent_level {
print!(" ");
}
print!("{}", " ".repeat(indent_level as usize));
println!("{}:", name);
}
@ -312,27 +330,32 @@ fn format_sexp_indented(sexp: &String, initial_indent_level: u32) -> String {
formatted
}
fn write_tests(file_path: &Path, corrected_entries: &Vec<(String, String, String)>) -> Result<()> {
fn write_tests(
file_path: &Path,
corrected_entries: &Vec<(String, String, String, usize, usize)>,
) -> Result<()> {
let mut buffer = fs::File::create(file_path)?;
write_tests_to_buffer(&mut buffer, corrected_entries)
}
fn write_tests_to_buffer(
buffer: &mut impl Write,
corrected_entries: &Vec<(String, String, String)>,
corrected_entries: &Vec<(String, String, String, usize, usize)>,
) -> Result<()> {
for (i, (name, input, output)) in corrected_entries.iter().enumerate() {
for (i, (name, input, output, header_delim_len, divider_delim_len)) in
corrected_entries.iter().enumerate()
{
if i > 0 {
write!(buffer, "\n")?;
}
write!(
buffer,
"{}\n{}\n{}\n{}\n{}\n\n{}\n",
"=".repeat(80),
"=".repeat(*header_delim_len),
name,
"=".repeat(80),
"=".repeat(*header_delim_len),
input,
"-".repeat(80),
"-".repeat(*divider_delim_len),
output.trim()
)?;
}
@ -351,9 +374,18 @@ pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
let entry = entry?;
let hidden = entry.file_name().to_str().unwrap_or("").starts_with(".");
if !hidden {
children.push(parse_tests(&entry.path())?);
children.push(entry.path());
}
}
children.sort_by(|a, b| {
a.file_name()
.unwrap_or_default()
.cmp(&b.file_name().unwrap_or_default())
});
let children = children
.iter()
.map(|path| parse_tests(path))
.collect::<io::Result<Vec<TestEntry>>>()?;
Ok(TestEntry::Group {
name,
children,
@ -387,6 +419,7 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
// Ignore any matches whose suffix does not match the first header
// suffix in the file.
let header_matches = HEADER_REGEX.captures_iter(&bytes).filter_map(|c| {
let header_delim_len = c.name("equals").map(|n| n.as_bytes().len()).unwrap_or(80);
let suffix1 = c
.name("suffix1")
.map(|m| String::from_utf8_lossy(m.as_bytes()));
@ -398,13 +431,17 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
let test_name = c
.name("test_name")
.map(|c| String::from_utf8_lossy(c.as_bytes()).trim_end().to_string());
Some((header_range, test_name))
let res = Some((header_delim_len, header_range, test_name));
res
} else {
None
}
});
for (header_range, test_name) in header_matches.chain(Some((bytes.len()..bytes.len(), None))) {
let mut prev_header_len = 80;
for (header_delim_len, header_range, test_name) in
header_matches.chain(Some((80, bytes.len()..bytes.len(), None)))
{
// Find the longest line of dashes following each test description. That line
// separates the input from the expected output. Ignore any matches whose suffix
// does not match the first suffix in the file.
@ -412,19 +449,25 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
let divider_range = DIVIDER_REGEX
.captures_iter(&bytes[prev_header_end..header_range.start])
.filter_map(|m| {
let divider_delim_len =
m.name("hyphens").map(|m| m.as_bytes().len()).unwrap_or(80);
let suffix = m
.name("suffix")
.map(|m| String::from_utf8_lossy(m.as_bytes()));
if suffix == first_suffix {
let range = m.get(0).unwrap().range();
Some((prev_header_end + range.start)..(prev_header_end + range.end))
let res = Some((
divider_delim_len,
(prev_header_end + range.start)..(prev_header_end + range.end),
));
res
} else {
None
}
})
.max_by_key(|range| range.len());
.max_by_key(|(_, range)| range.len());
if let Some(divider_range) = divider_range {
if let Some((divider_delim_len, divider_range)) = divider_range {
if let Ok(output) = str::from_utf8(&bytes[divider_range.end..header_range.start]) {
let mut input = bytes[prev_header_end..divider_range.start].to_vec();
@ -449,12 +492,15 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
name: prev_name,
input,
output,
header_delim_len: prev_header_len,
divider_delim_len,
has_fields,
});
}
}
}
prev_name = test_name.unwrap_or(String::new());
prev_header_len = header_delim_len;
prev_header_end = header_range.end;
}
TestEntry::Group {
@ -505,12 +551,16 @@ d
name: "The first test".to_string(),
input: "\na b c\n".as_bytes().to_vec(),
output: "(a (b c))".to_string(),
header_delim_len: 15,
divider_delim_len: 3,
has_fields: false,
},
TestEntry::Example {
name: "The second test".to_string(),
input: "d".as_bytes().to_vec(),
output: "(d)".to_string(),
header_delim_len: 16,
divider_delim_len: 3,
has_fields: false,
},
],
@ -559,12 +609,16 @@ abc
name: "Code with dashes".to_string(),
input: "abc\n---\ndefg\n----\nhijkl".as_bytes().to_vec(),
output: "(a (b))".to_string(),
header_delim_len: 18,
divider_delim_len: 7,
has_fields: false,
},
TestEntry::Example {
name: "Code ending with dashes".to_string(),
input: "abc\n-----------".as_bytes().to_vec(),
output: "(c (d))".to_string(),
header_delim_len: 25,
divider_delim_len: 19,
has_fields: false,
},
],
@ -608,11 +662,15 @@ abc
"title 1".to_string(),
"input 1".to_string(),
"output 1".to_string(),
80,
80,
),
(
"title 2".to_string(),
"input 2".to_string(),
"output 2".to_string(),
80,
80,
),
];
write_tests_to_buffer(&mut buffer, &corrected_entries).unwrap();
@ -689,18 +747,24 @@ code
name: "sexp with comment".to_string(),
input: "code".as_bytes().to_vec(),
output: "(a (b))".to_string(),
header_delim_len: 18,
divider_delim_len: 3,
has_fields: false,
},
TestEntry::Example {
name: "sexp with comment between".to_string(),
input: "code".as_bytes().to_vec(),
output: "(a (b))".to_string(),
header_delim_len: 18,
divider_delim_len: 3,
has_fields: false,
},
TestEntry::Example {
name: "sexp with ';'".to_string(),
input: "code".as_bytes().to_vec(),
output: "(MISSING \";\")".to_string(),
header_delim_len: 25,
divider_delim_len: 3,
has_fields: false,
}
],
@ -773,18 +837,24 @@ NOT A TEST HEADER
name: "First test".to_string(),
input: expected_input.clone(),
output: "(a)".to_string(),
header_delim_len: 18,
divider_delim_len: 3,
has_fields: false,
},
TestEntry::Example {
name: "Second test".to_string(),
input: expected_input.clone(),
output: "(a)".to_string(),
header_delim_len: 18,
divider_delim_len: 3,
has_fields: false,
},
TestEntry::Example {
name: "Test name with = symbol".to_string(),
input: expected_input.clone(),
output: "(a)".to_string(),
header_delim_len: 25,
divider_delim_len: 3,
has_fields: false,
}
],
@ -828,12 +898,16 @@ code with ----
name: "name\nwith\nnewlines".to_string(),
input: b"a".to_vec(),
output: "(b)".to_string(),
header_delim_len: 15,
divider_delim_len: 3,
has_fields: false,
},
TestEntry::Example {
name: "name with === signs".to_string(),
input: b"code with ----".to_vec(),
output: "(d)".to_string(),
header_delim_len: 20,
divider_delim_len: 3,
has_fields: false,
}
]

View file

@ -42,41 +42,74 @@ pub fn test_highlights(
loader: &Loader,
highlighter: &mut Highlighter,
directory: &Path,
apply_all_captures: bool,
) -> Result<()> {
println!("syntax highlighting:");
test_highlights_indented(loader, highlighter, directory, apply_all_captures, 2)
}
fn test_highlights_indented(
loader: &Loader,
highlighter: &mut Highlighter,
directory: &Path,
apply_all_captures: bool,
indent_level: usize,
) -> Result<()> {
let mut failed = false;
println!("syntax highlighting:");
for highlight_test_file in fs::read_dir(directory)? {
let highlight_test_file = highlight_test_file?;
let test_file_path = highlight_test_file.path();
let test_file_name = highlight_test_file.file_name();
let (language, language_config) = loader
.language_configuration_for_file_name(&test_file_path)?
.ok_or_else(|| anyhow!("No language found for path {:?}", test_file_path))?;
let highlight_config = language_config
.highlight_config(language)?
.ok_or_else(|| anyhow!("No highlighting config found for {:?}", test_file_path))?;
match test_highlight(
&loader,
highlighter,
highlight_config,
fs::read(&test_file_path)?.as_slice(),
) {
Ok(assertion_count) => {
println!(
" ✓ {} ({} assertions)",
Colour::Green.paint(test_file_name.to_string_lossy().as_ref()),
assertion_count
);
}
Err(e) => {
println!(
" ✗ {}",
Colour::Red.paint(test_file_name.to_string_lossy().as_ref())
);
println!(" {}", e);
print!(
"{indent:indent_level$}",
indent = "",
indent_level = indent_level * 2
);
if test_file_path.is_dir() && !test_file_path.read_dir()?.next().is_none() {
println!("{}:", test_file_name.into_string().unwrap());
if let Err(_) = test_highlights_indented(
loader,
highlighter,
&test_file_path,
apply_all_captures,
indent_level + 1,
) {
failed = true;
}
} else {
let (language, language_config) = loader
.language_configuration_for_file_name(&test_file_path)?
.ok_or_else(|| anyhow!("No language found for path {:?}", test_file_path))?;
let highlight_config = language_config
.highlight_config(language, apply_all_captures, None)?
.ok_or_else(|| anyhow!("No highlighting config found for {:?}", test_file_path))?;
match test_highlight(
&loader,
highlighter,
highlight_config,
fs::read(&test_file_path)?.as_slice(),
) {
Ok(assertion_count) => {
println!(
"✓ {} ({} assertions)",
Colour::Green.paint(test_file_name.to_string_lossy().as_ref()),
assertion_count
);
}
Err(e) => {
println!(
"✗ {}",
Colour::Red.paint(test_file_name.to_string_lossy().as_ref())
);
println!(
"{indent:indent_level$} {e}",
indent = "",
indent_level = indent_level * 2
);
failed = true;
}
}
}
}
@ -94,9 +127,10 @@ pub fn iterate_assertions(
// Iterate through all of the highlighting assertions, checking each one against the
// actual highlights.
let mut i = 0;
let mut actual_highlights = Vec::<&String>::new();
let mut actual_highlights = Vec::new();
for Assertion {
position,
negative,
expected_capture_name: expected_highlight,
} in assertions
{
@ -120,12 +154,13 @@ pub fn iterate_assertions(
break 'highlight_loop;
}
// If the highlight matches the assertion, this test passes. Otherwise,
// If the highlight matches the assertion, or if the highlight doesn't
// match the assertion but it's negative, this test passes. Otherwise,
// add this highlight to the list of actual highlights that span the
// assertion's position, in order to generate an error message in the event
// of a failure.
let highlight_name = &highlight_names[(highlight.2).0];
if *highlight_name == *expected_highlight {
if (*highlight_name == *expected_highlight) == !negative {
passed = true;
break 'highlight_loop;
} else {
@ -165,68 +200,7 @@ pub fn test_highlight(
let assertions =
parse_position_comments(highlighter.parser(), highlight_config.language, source)?;
iterate_assertions(&assertions, &highlights, &highlight_names)?;
// Iterate through all of the highlighting assertions, checking each one against the
// actual highlights.
let mut i = 0;
let mut actual_highlights = Vec::<&String>::new();
for Assertion {
position,
expected_capture_name: expected_highlight,
} in &assertions
{
let mut passed = false;
actual_highlights.clear();
'highlight_loop: loop {
// The assertions are ordered by position, so skip past all of the highlights that
// end at or before this assertion's position.
if let Some(highlight) = highlights.get(i) {
if highlight.1 <= *position {
i += 1;
continue;
}
// Iterate through all of the highlights that start at or before this assertion's,
// position, looking for one that matches the assertion.
let mut j = i;
while let (false, Some(highlight)) = (passed, highlights.get(j)) {
if highlight.0 > *position {
break 'highlight_loop;
}
// If the highlight matches the assertion, this test passes. Otherwise,
// add this highlight to the list of actual highlights that span the
// assertion's position, in order to generate an error message in the event
// of a failure.
let highlight_name = &highlight_names[(highlight.2).0];
if *highlight_name == *expected_highlight {
passed = true;
break 'highlight_loop;
} else {
actual_highlights.push(highlight_name);
}
j += 1;
}
} else {
break;
}
}
if !passed {
return Err(Failure {
row: position.row,
column: position.column,
expected_highlight: expected_highlight.clone(),
actual_highlights: actual_highlights.into_iter().cloned().collect(),
}
.into());
}
}
Ok(assertions.len())
iterate_assertions(&assertions, &highlights, &highlight_names)
}
pub fn get_highlight_positions(
@ -244,7 +218,7 @@ pub fn get_highlight_positions(
let source = String::from_utf8_lossy(source);
let mut char_indices = source.char_indices();
for event in highlighter.highlight(highlight_config, source.as_bytes(), None, |string| {
loader.highlight_config_for_injection_string(string)
loader.highlight_config_for_injection_string(string, highlight_config.apply_all_captures)
})? {
match event? {
HighlightEvent::HighlightStart(h) => highlight_stack.push(h),

View file

@ -95,6 +95,7 @@ pub fn test_tag(
let mut actual_tags = Vec::<&String>::new();
for Assertion {
position,
negative,
expected_capture_name: expected_tag,
} in &assertions
{
@ -116,7 +117,7 @@ pub fn test_tag(
}
let tag_name = &tag.2;
if *tag_name == *expected_tag {
if (*tag_name == *expected_tag) == !negative {
passed = true;
break 'tag_loop;
} else {
@ -124,6 +125,9 @@ pub fn test_tag(
}
j += 1;
if tag == tags.last().unwrap() {
break 'tag_loop;
}
}
} else {
break;

View file

@ -0,0 +1,279 @@
use super::helpers::fixtures::get_language;
use std::future::Future;
use std::pin::{pin, Pin};
use std::ptr;
use std::task::{self, Context, Poll, RawWaker, RawWakerVTable, Waker};
use tree_sitter::Parser;
#[test]
fn test_node_in_fut() {
let (ret, pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("bash");
parser.set_language(language).unwrap();
let tree = parser.parse("#", None).unwrap();
let root = tree.root_node();
let root_ref = &root;
let fut_val_fn = || async {
// eprintln!("fut_val_fn: {}", root.child(0).unwrap().kind());
yield_now().await;
root.child(0).unwrap().kind()
};
yield_now().await;
let fut_ref_fn = || async {
// eprintln!("fut_ref_fn: {}", root_ref.child(0).unwrap().kind());
yield_now().await;
root_ref.child(0).unwrap().kind()
};
let f1 = fut_val_fn().await;
let f2 = fut_ref_fn().await;
assert_eq!(f1, f2);
let fut_val = async {
// eprintln!("fut_val: {}", root.child(0).unwrap().kind());
yield_now().await;
root.child(0).unwrap().kind()
};
let fut_ref = async {
// eprintln!("fut_ref: {}", root_ref.child(0).unwrap().kind());
yield_now().await;
root_ref.child(0).unwrap().kind()
};
let f1 = fut_val.await;
let f2 = fut_ref.await;
assert_eq!(f1, f2);
f1
})
.join();
// eprintln!("pended: {pended:?}");
assert_eq!(ret, "comment");
assert_eq!(pended, 5);
}
#[test]
fn test_node_and_cursor_ref_in_fut() {
let (_, pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("bash");
parser.set_language(language).unwrap();
let tree = parser.parse("#", None).unwrap();
let root = tree.root_node();
let root_ref = &root;
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
cursor_ref.goto_first_child();
let fut_val = async {
yield_now().await;
root.to_sexp();
};
yield_now().await;
let fut_ref = async {
yield_now().await;
root_ref.to_sexp();
cursor_ref.goto_first_child();
};
fut_val.await;
fut_ref.await;
cursor_ref.goto_first_child();
})
.join();
assert_eq!(pended, 3);
}
#[test]
fn test_node_and_cursor_ref_in_fut_with_fut_fabrics() {
let (_, pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("bash");
parser.set_language(language).unwrap();
let tree = parser.parse("#", None).unwrap();
let root = tree.root_node();
let root_ref = &root;
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
cursor_ref.goto_first_child();
let fut_val = || async {
yield_now().await;
root.to_sexp();
};
yield_now().await;
let fut_ref = || async move {
yield_now().await;
root_ref.to_sexp();
cursor_ref.goto_first_child();
};
fut_val().await;
fut_val().await;
fut_ref().await;
})
.join();
assert_eq!(pended, 4);
}
#[test]
fn test_node_and_cursor_ref_in_fut_with_inner_spawns() {
let (ret, pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("bash");
parser.set_language(language).unwrap();
let tree = parser.parse("#", None).unwrap();
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
cursor_ref.goto_first_child();
let fut_val = || {
let tree = tree.clone();
async move {
let root = tree.root_node();
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
yield_now().await;
root.to_sexp();
cursor_ref.goto_first_child();
}
};
yield_now().await;
let fut_ref = || {
let tree = tree.clone();
async move {
let root = tree.root_node();
let root_ref = &root;
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
yield_now().await;
root_ref.to_sexp();
cursor_ref.goto_first_child();
}
};
let (_, p1) = tokio_like_spawn(fut_val()).await.unwrap();
let (_, p2) = tokio_like_spawn(fut_ref()).await.unwrap();
cursor_ref.goto_first_child();
fut_val().await;
fut_val().await;
fut_ref().await;
cursor_ref.goto_first_child();
p1 + p2
})
.join();
assert_eq!(pended, 4);
assert_eq!(ret, 2);
}
fn tokio_like_spawn<T>(future: T) -> JoinHandle<(T::Output, usize)>
where
T: Future + Send + 'static,
T::Output: Send + 'static,
{
// No runtime, just noop waker
let waker = noop_waker();
let mut cx = task::Context::from_waker(&waker);
let mut pending = 0;
let mut future = pin!(future);
let ret = loop {
match future.as_mut().poll(&mut cx) {
Poll::Pending => pending += 1,
Poll::Ready(r) => {
// eprintln!("ready, pended: {pending}");
break r;
}
}
};
JoinHandle::new((ret, pending))
}
async fn yield_now() {
struct SimpleYieldNow {
yielded: bool,
}
impl Future for SimpleYieldNow {
type Output = ();
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> {
cx.waker().clone().wake();
if self.yielded {
return Poll::Ready(());
}
self.yielded = true;
Poll::Pending
}
}
SimpleYieldNow { yielded: false }.await
}
pub fn noop_waker() -> Waker {
const VTABLE: RawWakerVTable = RawWakerVTable::new(
// Cloning just returns a new no-op raw waker
|_| RAW,
// `wake` does nothing
|_| {},
// `wake_by_ref` does nothing
|_| {},
// Dropping does nothing as we don't allocate anything
|_| {},
);
const RAW: RawWaker = RawWaker::new(ptr::null(), &VTABLE);
unsafe { Waker::from_raw(RAW) }
}
struct JoinHandle<T> {
data: Option<T>,
}
impl<T> JoinHandle<T> {
fn new(data: T) -> Self {
Self { data: Some(data) }
}
fn join(&mut self) -> T {
self.data.take().unwrap()
}
}
impl<T: Unpin> Future for JoinHandle<T> {
type Output = std::result::Result<T, ()>;
fn poll(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Self::Output> {
let data = self.get_mut().data.take().unwrap();
Poll::Ready(Ok(data))
}
}

View file

@ -1,7 +1,8 @@
use super::helpers::{
allocations,
edits::{get_random_edit, invert_edit},
fixtures::{fixtures_dir, get_language, get_test_language},
fixtures::{fixtures_dir, get_language, get_test_language, SCRATCH_BASE_DIR},
new_seed,
random::Rand,
scope_sequence::ScopeSequence,
EDIT_COUNT, EXAMPLE_FILTER, ITERATION_COUNT, LANGUAGE_FILTER, LOG_ENABLED, LOG_GRAPH_ENABLED,
@ -13,70 +14,81 @@ use crate::{
test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
util,
};
use std::fs;
use std::{collections::HashMap, env, fs};
use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};
use tree_sitter_proc_macro::test_with_seed;
#[test]
fn test_bash_corpus() {
test_language_corpus("bash");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_bash(seed: usize) {
test_language_corpus(
"bash",
seed,
Some(&[
// Fragile tests where edit customization changes
// lead to significant parse tree structure changes.
"bash - corpus - commands - Nested Heredocs",
"bash - corpus - commands - Quoted Heredocs",
"bash - corpus - commands - Heredocs with weird characters",
]),
);
}
#[test]
fn test_c_corpus() {
test_language_corpus("c");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_c(seed: usize) {
test_language_corpus("c", seed, None);
}
#[test]
fn test_cpp_corpus() {
test_language_corpus("cpp");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_cpp(seed: usize) {
test_language_corpus("cpp", seed, None);
}
#[test]
fn test_embedded_template_corpus() {
test_language_corpus("embedded-template");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_embedded_template(seed: usize) {
test_language_corpus("embedded-template", seed, None);
}
#[test]
fn test_go_corpus() {
test_language_corpus("go");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_go(seed: usize) {
test_language_corpus("go", seed, None);
}
#[test]
fn test_html_corpus() {
test_language_corpus("html");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_html(seed: usize) {
test_language_corpus("html", seed, None);
}
#[test]
fn test_javascript_corpus() {
test_language_corpus("javascript");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_javascript(seed: usize) {
test_language_corpus("javascript", seed, None);
}
#[test]
fn test_json_corpus() {
test_language_corpus("json");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_json(seed: usize) {
test_language_corpus("json", seed, None);
}
#[test]
fn test_php_corpus() {
test_language_corpus("php");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_php(seed: usize) {
test_language_corpus("php", seed, None);
}
#[test]
fn test_python_corpus() {
test_language_corpus("python");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_python(seed: usize) {
test_language_corpus("python", seed, None);
}
#[test]
fn test_ruby_corpus() {
test_language_corpus("ruby");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_ruby(seed: usize) {
test_language_corpus("ruby", seed, None);
}
#[test]
fn test_rust_corpus() {
test_language_corpus("rust");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_rust(seed: usize) {
test_language_corpus("rust", seed, None);
}
fn test_language_corpus(language_name: &str) {
fn test_language_corpus(language_name: &str, start_seed: usize, skipped: Option<&[&str]>) {
let grammars_dir = fixtures_dir().join("grammars");
let error_corpus_dir = fixtures_dir().join("error_corpus");
let template_corpus_dir = fixtures_dir().join("template_corpus");
@ -98,10 +110,30 @@ fn test_language_corpus(language_name: &str) {
t
}));
let mut skipped = skipped.map(|x| HashMap::<&str, usize>::from_iter(x.iter().map(|x| (*x, 0))));
let language = get_language(language_name);
let mut failure_count = 0;
for test in tests {
println!(" {} example - {}", language_name, test.name);
let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
if log_seed {
println!(" start seed: {}", start_seed);
}
println!();
for (test_index, test) in tests.iter().enumerate() {
let test_name = format!("{language_name} - {}", test.name);
if let Some(skipped) = skipped.as_mut() {
if let Some(counter) = skipped.get_mut(test_name.as_str()) {
println!(" {test_index}. {test_name} - SKIPPED");
*counter += 1;
continue;
}
}
println!(" {test_index}. {test_name}");
let passed = allocations::record(|| {
let mut log_session = None;
@ -116,10 +148,7 @@ fn test_language_corpus(language_name: &str) {
}
if actual_output != test.output {
println!(
"Incorrect initial parse for {} - {}",
language_name, test.name,
);
println!("Incorrect initial parse for {test_name}");
print_diff_key();
print_diff(&actual_output, &test.output);
println!("");
@ -140,7 +169,7 @@ fn test_language_corpus(language_name: &str) {
drop(parser);
for trial in 0..*ITERATION_COUNT {
let seed = *START_SEED + trial;
let seed = start_seed + trial;
let passed = allocations::record(|| {
let mut rand = Rand::new(seed);
let mut log_session = None;
@ -158,10 +187,21 @@ fn test_language_corpus(language_name: &str) {
for _ in 0..1 + rand.unsigned(*EDIT_COUNT) {
let edit = get_random_edit(&mut rand, &input);
undo_stack.push(invert_edit(&input, &edit));
perform_edit(&mut tree, &mut input, &edit);
perform_edit(&mut tree, &mut input, &edit).unwrap();
}
// println!(" seed: {}", seed);
if log_seed {
println!(" {test_index}.{trial:<2} seed: {}", seed);
}
if dump_edits {
fs::write(
SCRATCH_BASE_DIR
.join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
&input,
)
.unwrap();
}
if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));
@ -173,16 +213,13 @@ fn test_language_corpus(language_name: &str) {
// Check that the new tree is consistent.
check_consistent_sizes(&tree2, &input);
if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
println!(
"\nUnexpected scope change in seed {}\n{}\n\n",
seed, message
);
println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
return false;
}
// Undo all of the edits and re-parse again.
while let Some(edit) = undo_stack.pop() {
perform_edit(&mut tree2, &mut input, &edit);
perform_edit(&mut tree2, &mut input, &edit).unwrap();
}
if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));
@ -198,10 +235,7 @@ fn test_language_corpus(language_name: &str) {
}
if actual_output != test.output {
println!(
"Incorrect parse for {} - {} - seed {}",
language_name, test.name, seed
);
println!("Incorrect parse for {test_name} - seed {seed}");
print_diff_key();
print_diff(&actual_output, &test.output);
println!("");
@ -211,7 +245,7 @@ fn test_language_corpus(language_name: &str) {
// Check that the edited tree is consistent.
check_consistent_sizes(&tree3, &input);
if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
eprintln!("Unexpected scope change in seed {}\n{}\n\n", seed, message);
println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
return false;
}
@ -228,6 +262,18 @@ fn test_language_corpus(language_name: &str) {
if failure_count > 0 {
panic!("{} {} corpus tests failed", failure_count, language_name);
}
if let Some(skipped) = skipped.as_mut() {
skipped.retain(|_, v| *v == 0);
if skipped.len() > 0 {
println!("Non matchable skip definitions:");
for k in skipped.keys() {
println!(" {k}");
}
panic!("Non matchable skip definitions needs to be removed");
}
}
}
#[test]
@ -255,7 +301,7 @@ fn test_feature_corpus_files() {
grammar_path = test_path.join("grammar.json");
}
let error_message_path = test_path.join("expected_error.txt");
let grammar_json = generate::load_grammar_file(&grammar_path).unwrap();
let grammar_json = generate::load_grammar_file(&grammar_path, None).unwrap();
let generate_result = generate::generate_parser_for_grammar(&grammar_json);
if error_message_path.exists() {
@ -424,7 +470,12 @@ fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&s
let mut ranges = Vec::new();
let mut ix = 0;
while ix < input.len() {
let Some(mut start_ix) = input[ix..].windows(2).position(|win| win == start.as_bytes()) else { break };
let Some(mut start_ix) = input[ix..]
.windows(2)
.position(|win| win == start.as_bytes())
else {
break;
};
start_ix += ix + start.len();
let end_ix = input[start_ix..]
.windows(2)
@ -492,6 +543,7 @@ fn flatten_tests(test: TestEntry) -> Vec<FlattenedTest> {
input,
output,
has_fields,
..
} => {
if !prefix.is_empty() {
name.insert_str(0, " - ");

View file

@ -0,0 +1,42 @@
// Tests in this mod need be executed with enabled UBSAN library:
// ```
// UBSAN_OPTIONS="halt_on_error=1" \
// CFLAGS="-fsanitize=undefined" \
// RUSTFLAGS="-lubsan" \
// cargo test --target $(rustc -vV | sed -nr 's/^host: //p') -- --test-threads 1
// ```
use super::helpers::query_helpers::assert_query_matches;
use crate::tests::helpers::fixtures::get_language;
use indoc::indoc;
use tree_sitter::Query;
#[test]
fn issue_2162_out_of_bound() {
let language = get_language("java");
assert!(Query::new(language, "(package_declaration _ (_) @name _)").is_ok());
}
#[test]
fn issue_2107_first_child_group_anchor_had_no_effect() {
let language = get_language("c");
let source_code = indoc! {r#"
void fun(int a, char b, int c) { };
"#};
let query = indoc! {r#"
(parameter_list
.
(
(parameter_declaration) @constant
(#match? @constant "^int")
)
)
"#};
let query = Query::new(language, query).unwrap();
assert_query_matches(
language,
&query,
source_code,
&[(0, vec![("constant", "int a")])],
);
}

View file

@ -2,7 +2,7 @@ use std::{
collections::HashMap,
os::raw::c_void,
sync::{
atomic::{AtomicBool, AtomicU64, Ordering::SeqCst},
atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst},
Mutex,
},
};
@ -25,8 +25,8 @@ unsafe impl Sync for Allocation {}
#[derive(Default)]
struct AllocationRecorder {
enabled: AtomicBool,
allocation_count: AtomicU64,
outstanding_allocations: Mutex<HashMap<Allocation, u64>>,
allocation_count: AtomicUsize,
outstanding_allocations: Mutex<HashMap<Allocation, usize>>,
}
thread_local! {
@ -83,6 +83,9 @@ fn record_alloc(ptr: *mut c_void) {
}
fn record_dealloc(ptr: *mut c_void) {
if ptr.is_null() {
panic!("Zero pointer deallocation!");
}
RECORDER.with(|recorder| {
if recorder.enabled.load(SeqCst) {
recorder
@ -107,9 +110,13 @@ unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void
}
unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void {
record_dealloc(ptr);
let result = realloc(ptr, size);
record_alloc(result);
if ptr.is_null() {
record_alloc(result);
} else if ptr != result {
record_dealloc(ptr);
record_alloc(result);
}
result
}

View file

@ -1,11 +1,46 @@
lazy_static! {
static ref ROOT_DIR: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).parent().unwrap().to_owned();
static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars");
static ref SCRATCH_DIR: PathBuf = {
pub static ref ROOT_DIR: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).parent().unwrap().to_owned();
pub static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
pub static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
pub static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars");
pub static ref SCRATCH_BASE_DIR: PathBuf = {
let result = ROOT_DIR.join("target").join("scratch");
fs::create_dir_all(&result).unwrap();
result
};
pub static ref SCRATCH_DIR: PathBuf = {
// https://doc.rust-lang.org/reference/conditional-compilation.html
let vendor = if cfg!(target_vendor = "apple") {
"apple"
} else if cfg!(target_vendor = "fortanix") {
"fortanix"
} else if cfg!(target_vendor = "pc") {
"pc"
} else {
"unknown"
};
let env = if cfg!(target_env = "gnu") {
"gnu"
} else if cfg!(target_env = "msvc") {
"msvc"
} else if cfg!(target_env = "musl") {
"musl"
} else if cfg!(target_env = "sgx") {
"sgx"
} else {
"unknown"
};
let endian = if cfg!(target_endian = "little") {
"little"
} else if cfg!(target_endian = "big") {
"big"
} else {
"unknown"
};
let machine = format!("{}-{}-{}-{}-{}", std::env::consts::ARCH, std::env::consts::OS, vendor, env, endian);
let result = SCRATCH_BASE_DIR.join(machine);
fs::create_dir_all(&result).unwrap();
result
};
}

View file

@ -1,6 +1,6 @@
use lazy_static::lazy_static;
use std::fs;
use std::path::{Path, PathBuf};
use std::{env, fs};
use tree_sitter::Language;
use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_loader::Loader;
@ -9,7 +9,13 @@ use tree_sitter_tags::TagsConfiguration;
include!("./dirs.rs");
lazy_static! {
static ref TEST_LOADER: Loader = Loader::with_parser_lib_path(SCRATCH_DIR.join("lib"));
static ref TEST_LOADER: Loader = {
let mut loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone());
if env::var("TREE_SITTER_GRAMMAR_DEBUG").is_ok() {
loader.use_debug_build(true);
}
loader
};
}
pub fn test_loader<'a>() -> &'a Loader {
@ -46,9 +52,11 @@ pub fn get_highlight_config(
let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or(String::new());
let mut result = HighlightConfiguration::new(
language,
language_name,
&highlights_query,
&injections_query,
&locals_query,
false,
)
.unwrap();
result.configure(&highlight_names);
@ -63,11 +71,7 @@ pub fn get_tags_config(language_name: &str) -> TagsConfiguration {
TagsConfiguration::new(language, &tags_query, &locals_query).unwrap()
}
pub fn get_test_language(
name: &str,
parser_code: &str,
scanner_src_path: Option<&Path>,
) -> Language {
pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {
let src_dir = SCRATCH_DIR.join("src").join(name);
fs::create_dir_all(&src_dir).unwrap();
@ -76,11 +80,16 @@ pub fn get_test_language(
fs::write(&parser_path, parser_code).unwrap();
}
if let Some(scanner_src_path) = scanner_src_path {
let scanner_code = fs::read_to_string(&scanner_src_path).unwrap();
let scanner_path = src_dir.join("scanner.c");
if !fs::read_to_string(&scanner_path).map_or(false, |content| content == scanner_code) {
fs::write(&scanner_path, scanner_code).unwrap();
if let Some(path) = path {
let scanner_path = path.join("scanner.c");
if scanner_path.exists() {
let scanner_code = fs::read_to_string(&scanner_path).unwrap();
let scanner_copy_path = src_dir.join("scanner.c");
if !fs::read_to_string(&scanner_copy_path)
.map_or(false, |content| content == scanner_code)
{
fs::write(&scanner_copy_path, scanner_code).unwrap();
}
}
}

View file

@ -6,7 +6,8 @@ pub(super) mod random;
pub(super) mod scope_sequence;
use lazy_static::lazy_static;
use std::{env, time, usize};
use rand::Rng;
use std::env;
lazy_static! {
pub static ref LOG_ENABLED: bool = env::var("TREE_SITTER_LOG").is_ok();
@ -16,11 +17,7 @@ lazy_static! {
}
lazy_static! {
pub static ref START_SEED: usize =
int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| time::SystemTime::now()
.duration_since(time::UNIX_EPOCH)
.unwrap()
.as_secs() as usize,);
pub static ref START_SEED: usize = new_seed();
pub static ref EDIT_COUNT: usize = int_env_var("TREE_SITTER_EDITS").unwrap_or(3);
pub static ref ITERATION_COUNT: usize = int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10);
}
@ -28,3 +25,10 @@ lazy_static! {
fn int_env_var(name: &'static str) -> Option<usize> {
env::var(name).ok().and_then(|e| e.parse().ok())
}
pub(crate) fn new_seed() -> usize {
int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
let mut rng = rand::thread_rng();
rng.gen::<usize>()
})
}

View file

@ -1,6 +1,8 @@
use rand::prelude::Rng;
use std::{cmp::Ordering, fmt::Write, ops::Range};
use tree_sitter::{Node, Point, Tree, TreeCursor};
use tree_sitter::{
Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryMatch, Tree, TreeCursor,
};
#[derive(Debug)]
pub struct Pattern {
@ -304,3 +306,56 @@ fn compare_depth_first(a: Node, b: Node) -> Ordering {
let b = b.byte_range();
a.start.cmp(&b.start).then_with(|| b.end.cmp(&a.end))
}
pub fn assert_query_matches(
language: Language,
query: &Query,
source: &str,
expected: &[(usize, Vec<(&str, &str)>)],
) {
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(&query, tree.root_node(), source.as_bytes());
pretty_assertions::assert_eq!(collect_matches(matches, &query, source), expected);
pretty_assertions::assert_eq!(cursor.did_exceed_match_limit(), false);
}
pub fn collect_matches<'a>(
matches: impl Iterator<Item = QueryMatch<'a, 'a>>,
query: &'a Query,
source: &'a str,
) -> Vec<(usize, Vec<(&'a str, &'a str)>)> {
matches
.map(|m| {
(
m.pattern_index,
format_captures(m.captures.iter().cloned(), query, source),
)
})
.collect()
}
pub fn collect_captures<'a>(
captures: impl Iterator<Item = (QueryMatch<'a, 'a>, usize)>,
query: &'a Query,
source: &'a str,
) -> Vec<(&'a str, &'a str)> {
format_captures(captures.map(|(m, i)| m.captures[i]), query, source)
}
fn format_captures<'a>(
captures: impl Iterator<Item = QueryCapture<'a>>,
query: &'a Query,
source: &'a str,
) -> Vec<(&'a str, &'a str)> {
captures
.map(|capture| {
(
query.capture_names()[capture.index as usize],
capture.node.utf8_text(source.as_bytes()).unwrap(),
)
})
.collect()
}

View file

@ -24,6 +24,7 @@ lazy_static! {
get_highlight_config("rust", Some("injections.scm"), &HIGHLIGHT_NAMES);
static ref HIGHLIGHT_NAMES: Vec<String> = [
"attribute",
"boolean",
"carriage-return",
"comment",
"constant",
@ -61,7 +62,7 @@ lazy_static! {
fn test_highlighting_javascript() {
let source = "const a = function(b) { return b + c; }";
assert_eq!(
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
&to_token_vector(source, &JS_HIGHLIGHT).unwrap(),
&[vec![
("const", vec!["keyword"]),
(" ", vec![]),
@ -71,14 +72,14 @@ fn test_highlighting_javascript() {
(" ", vec![]),
("function", vec!["keyword"]),
("(", vec!["punctuation.bracket"]),
("b", vec!["variable.parameter"]),
("b", vec!["variable"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("{", vec!["punctuation.bracket"]),
(" ", vec![]),
("return", vec!["keyword"]),
(" ", vec![]),
("b", vec!["variable.parameter"]),
("b", vec!["variable"]),
(" ", vec![]),
("+", vec!["operator"]),
(" ", vec![]),
@ -92,7 +93,7 @@ fn test_highlighting_javascript() {
#[test]
fn test_highlighting_injected_html_in_javascript() {
let source = vec!["const s = html `<div>${a < b}</div>`;"].join("\n");
let source = ["const s = html `<div>${a < b}</div>`;"].join("\n");
assert_eq!(
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
@ -156,7 +157,7 @@ fn test_highlighting_injected_javascript_in_html_mini() {
#[test]
fn test_highlighting_injected_javascript_in_html() {
let source = vec![
let source = [
"<body>",
" <script>",
" const x = new Thing();",
@ -211,7 +212,7 @@ fn test_highlighting_injected_javascript_in_html() {
#[test]
fn test_highlighting_multiline_nodes_to_html() {
let source = vec![
let source = [
"const SOMETHING = `",
" one ${",
" two()",
@ -235,7 +236,7 @@ fn test_highlighting_multiline_nodes_to_html() {
#[test]
fn test_highlighting_with_local_variable_tracking() {
let source = vec![
let source = [
"module.exports = function a(b) {",
" const module = c;",
" console.log(module, b);",
@ -257,7 +258,7 @@ fn test_highlighting_with_local_variable_tracking() {
(" ", vec![]),
("a", vec!["function"]),
("(", vec!["punctuation.bracket"]),
("b", vec!["variable.parameter"]),
("b", vec!["variable"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("{", vec!["punctuation.bracket"])
@ -284,7 +285,7 @@ fn test_highlighting_with_local_variable_tracking() {
(",", vec!["punctuation.delimiter"]),
(" ", vec![]),
// A parameter, because `b` was defined as a parameter above.
("b", vec!["variable.parameter"]),
("b", vec!["variable"]),
(")", vec!["punctuation.bracket"]),
(";", vec!["punctuation.delimiter"]),
],
@ -295,7 +296,7 @@ fn test_highlighting_with_local_variable_tracking() {
#[test]
fn test_highlighting_empty_lines() {
let source = vec![
let source = [
"class A {",
"",
" b(c) {",
@ -313,7 +314,7 @@ fn test_highlighting_empty_lines() {
&[
"<span class=keyword>class</span> <span class=constructor>A</span> <span class=punctuation.bracket>{</span>\n".to_string(),
"\n".to_string(),
" <span class=function>b</span><span class=punctuation.bracket>(</span><span class=variable.parameter>c</span><span class=punctuation.bracket>)</span> <span class=punctuation.bracket>{</span>\n".to_string(),
" <span class=function>b</span><span class=punctuation.bracket>(</span><span class=variable>c</span><span class=punctuation.bracket>)</span> <span class=punctuation.bracket>{</span>\n".to_string(),
"\n".to_string(),
" <span class=function>d</span><span class=punctuation.bracket>(</span><span class=variable>e</span><span class=punctuation.bracket>)</span>\n".to_string(),
"\n".to_string(),
@ -329,7 +330,7 @@ fn test_highlighting_carriage_returns() {
let source = "a = \"a\rb\"\r\nb\r";
assert_eq!(
&to_html(&source, &JS_HIGHLIGHT).unwrap(),
&to_html(source, &JS_HIGHLIGHT).unwrap(),
&[
"<span class=variable>a</span> <span class=operator>=</span> <span class=string>&quot;a<span class=carriage-return></span>b&quot;</span>\n",
"<span class=variable>b</span>\n",
@ -339,7 +340,7 @@ fn test_highlighting_carriage_returns() {
#[test]
fn test_highlighting_ejs_with_html_and_javascript() {
let source = vec!["<div><% foo() %></div><script> bar() </script>"].join("\n");
let source = ["<div><% foo() %></div><script> bar() </script>"].join("\n");
assert_eq!(
&to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(),
@ -376,7 +377,7 @@ fn test_highlighting_ejs_with_html_and_javascript() {
fn test_highlighting_javascript_with_jsdoc() {
// Regression test: the middle comment has no highlights. This should not prevent
// later injections from highlighting properly.
let source = vec!["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n");
let source = ["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n");
assert_eq!(
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
@ -404,7 +405,7 @@ fn test_highlighting_javascript_with_jsdoc() {
#[test]
fn test_highlighting_with_content_children_included() {
let source = vec!["assert!(", " a.b.c() < D::e::<F>()", ");"].join("\n");
let source = ["assert!(", " a.b.c() < D::e::<F>()", ");"].join("\n");
assert_eq!(
&to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(),
@ -482,7 +483,7 @@ fn test_highlighting_cancellation() {
#[test]
fn test_highlighting_via_c_api() {
let highlights = vec![
let highlights = [
"class=tag\0",
"class=function\0",
"class=string\0",
@ -496,68 +497,82 @@ fn test_highlighting_via_c_api() {
.iter()
.map(|h| h.as_bytes().as_ptr() as *const c_char)
.collect::<Vec<_>>();
let highlighter = c::ts_highlighter_new(
&highlight_names[0] as *const *const c_char,
&highlight_attrs[0] as *const *const c_char,
highlights.len() as u32,
);
let highlighter = unsafe {
c::ts_highlighter_new(
&highlight_names[0] as *const *const c_char,
&highlight_attrs[0] as *const *const c_char,
highlights.len() as u32,
)
};
let source_code = c_string("<script>\nconst a = b('c');\nc.d();\n</script>");
let js_scope = c_string("source.js");
let js_injection_regex = c_string("^javascript");
let language = get_language("javascript");
let lang_name = c_string("javascript");
let queries = get_language_queries_path("javascript");
let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
let locals_query = fs::read_to_string(queries.join("locals.scm")).unwrap();
c::ts_highlighter_add_language(
highlighter,
js_scope.as_ptr(),
js_injection_regex.as_ptr(),
language,
highlights_query.as_ptr() as *const c_char,
injections_query.as_ptr() as *const c_char,
locals_query.as_ptr() as *const c_char,
highlights_query.len() as u32,
injections_query.len() as u32,
locals_query.len() as u32,
);
unsafe {
c::ts_highlighter_add_language(
highlighter,
lang_name.as_ptr(),
js_scope.as_ptr(),
js_injection_regex.as_ptr(),
language,
highlights_query.as_ptr() as *const c_char,
injections_query.as_ptr() as *const c_char,
locals_query.as_ptr() as *const c_char,
highlights_query.len() as u32,
injections_query.len() as u32,
locals_query.len() as u32,
false,
);
}
let html_scope = c_string("text.html.basic");
let html_injection_regex = c_string("^html");
let language = get_language("html");
let lang_name = c_string("html");
let queries = get_language_queries_path("html");
let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
c::ts_highlighter_add_language(
highlighter,
html_scope.as_ptr(),
html_injection_regex.as_ptr(),
language,
highlights_query.as_ptr() as *const c_char,
injections_query.as_ptr() as *const c_char,
ptr::null(),
highlights_query.len() as u32,
injections_query.len() as u32,
0,
);
unsafe {
c::ts_highlighter_add_language(
highlighter,
lang_name.as_ptr(),
html_scope.as_ptr(),
html_injection_regex.as_ptr(),
language,
highlights_query.as_ptr() as *const c_char,
injections_query.as_ptr() as *const c_char,
ptr::null(),
highlights_query.len() as u32,
injections_query.len() as u32,
0,
false,
);
}
let buffer = c::ts_highlight_buffer_new();
c::ts_highlighter_highlight(
highlighter,
html_scope.as_ptr(),
source_code.as_ptr(),
source_code.as_bytes().len() as u32,
buffer,
ptr::null_mut(),
);
unsafe {
c::ts_highlighter_highlight(
highlighter,
html_scope.as_ptr(),
source_code.as_ptr(),
source_code.as_bytes().len() as u32,
buffer,
ptr::null_mut(),
);
}
let output_bytes = c::ts_highlight_buffer_content(buffer);
let output_line_offsets = c::ts_highlight_buffer_line_offsets(buffer);
let output_len = c::ts_highlight_buffer_len(buffer);
let output_line_count = c::ts_highlight_buffer_line_count(buffer);
let output_bytes = unsafe { c::ts_highlight_buffer_content(buffer) };
let output_line_offsets = unsafe { c::ts_highlight_buffer_line_offsets(buffer) };
let output_len = unsafe { c::ts_highlight_buffer_len(buffer) };
let output_line_count = unsafe { c::ts_highlight_buffer_line_count(buffer) };
let output_bytes = unsafe { slice::from_raw_parts(output_bytes, output_len as usize) };
let output_line_offsets =
@ -583,8 +598,69 @@ fn test_highlighting_via_c_api() {
]
);
c::ts_highlighter_delete(highlighter);
c::ts_highlight_buffer_delete(buffer);
unsafe {
c::ts_highlighter_delete(highlighter);
c::ts_highlight_buffer_delete(buffer);
}
}
#[test]
fn test_highlighting_with_all_captures_applied() {
let source = "fn main(a: u32, b: u32) -> { let c = a + b; }";
let language = get_language("rust");
let highlights_query = indoc::indoc! {"
[
\"fn\"
\"let\"
] @keyword
(identifier) @variable
(function_item name: (identifier) @function)
(parameter pattern: (identifier) @variable.parameter)
(primitive_type) @type.builtin
\"=\" @operator
[ \"->\" \":\" \";\" ] @punctuation.delimiter
[ \"{\" \"}\" \"(\" \")\" ] @punctuation.bracket
"};
let mut rust_highlight_reverse =
HighlightConfiguration::new(language, "rust", highlights_query, "", "", true).unwrap();
rust_highlight_reverse.configure(&HIGHLIGHT_NAMES);
assert_eq!(
&to_token_vector(source, &rust_highlight_reverse).unwrap(),
&[[
("fn", vec!["keyword"]),
(" ", vec![]),
("main", vec!["function"]),
("(", vec!["punctuation.bracket"]),
("a", vec!["variable.parameter"]),
(":", vec!["punctuation.delimiter"]),
(" ", vec![]),
("u32", vec!["type.builtin"]),
(", ", vec![]),
("b", vec!["variable.parameter"]),
(":", vec!["punctuation.delimiter"]),
(" ", vec![]),
("u32", vec!["type.builtin"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("->", vec!["punctuation.delimiter"]),
(" ", vec![]),
("{", vec!["punctuation.bracket"]),
(" ", vec![]),
("let", vec!["keyword"]),
(" ", vec![]),
("c", vec!["variable"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("a", vec!["variable"]),
(" + ", vec![]),
("b", vec!["variable"]),
(";", vec!["punctuation.delimiter"]),
(" ", vec![]),
("}", vec!["punctuation.bracket"])
]],
);
}
#[test]
@ -667,20 +743,20 @@ fn to_token_vector<'a>(
}
HighlightEvent::Source { start, end } => {
let s = str::from_utf8(&src[start..end]).unwrap();
for (i, l) in s.split("\n").enumerate() {
for (i, l) in s.split('\n').enumerate() {
let l = l.trim_end_matches('\r');
if i > 0 {
lines.push(line);
line = Vec::new();
}
if l.len() > 0 {
if !l.is_empty() {
line.push((l, highlights.clone()));
}
}
}
}
}
if line.len() > 0 {
if !line.is_empty() {
lines.push(line);
}
Ok(lines)

View file

@ -0,0 +1,95 @@
use super::helpers::fixtures::get_language;
use tree_sitter::Parser;
#[test]
fn test_lookahead_iterator() {
let mut parser = Parser::new();
let language = get_language("rust");
parser.set_language(language).unwrap();
let tree = parser.parse("struct Stuff {}", None).unwrap();
let mut cursor = tree.walk();
assert!(cursor.goto_first_child()); // struct
assert!(cursor.goto_first_child()); // struct keyword
let next_state = cursor.node().next_parse_state();
assert_ne!(next_state, 0);
assert_eq!(
next_state,
language.next_state(cursor.node().parse_state(), cursor.node().grammar_id())
);
assert!((next_state as usize) < language.parse_state_count());
assert!(cursor.goto_next_sibling()); // type_identifier
assert_eq!(next_state, cursor.node().parse_state());
assert_eq!(cursor.node().grammar_name(), "identifier");
assert_ne!(cursor.node().grammar_id(), cursor.node().kind_id());
let expected_symbols = ["identifier", "block_comment", "line_comment"];
let mut lookahead = language.lookahead_iterator(next_state).unwrap();
assert_eq!(lookahead.language(), language);
assert!(lookahead.iter_names().eq(expected_symbols));
lookahead.reset_state(next_state);
assert!(lookahead.iter_names().eq(expected_symbols));
lookahead.reset(language, next_state);
assert!(lookahead
.map(|s| language.node_kind_for_id(s).unwrap())
.eq(expected_symbols));
}
#[test]
fn test_lookahead_iterator_modifiable_only_by_mut() {
let mut parser = Parser::new();
let language = get_language("rust");
parser.set_language(language).unwrap();
let tree = parser.parse("struct Stuff {}", None).unwrap();
let mut cursor = tree.walk();
assert!(cursor.goto_first_child()); // struct
assert!(cursor.goto_first_child()); // struct keyword
let next_state = cursor.node().next_parse_state();
assert_ne!(next_state, 0);
let mut lookahead = language.lookahead_iterator(next_state).unwrap();
let _ = lookahead.next();
let mut names = lookahead.iter_names();
let _ = names.next();
}
/// It doesn't allowed to use lookahead iterator by shared ref:
/// error[E0596]: cannot borrow `lookahead` as mutable, as it is not declared as mutable
/// ```compile_fail
/// use tree_sitter::{Parser, Language};
/// let mut parser = Parser::new();
/// let language = unsafe { Language::from_raw(std::ptr::null()) };
/// let tree = parser.parse("", None).unwrap();
/// let mut cursor = tree.walk();
/// let next_state = cursor.node().next_parse_state();
/// let lookahead = language.lookahead_iterator(next_state).unwrap();
/// let _ = lookahead.next();
/// ```
/// It doesn't allowed to use lookahead names iterator by shared ref:
/// error[E0596]: cannot borrow `names` as mutable, as it is not declared as mutable
/// ```compile_fail
/// use tree_sitter::{Parser, Language};
/// let mut parser = Parser::new();
/// let language = unsafe { Language::from_raw(std::ptr::null()) };
/// let tree = parser.parse("", None).unwrap();
/// let mut cursor = tree.walk();
/// let next_state = cursor.node().next_parse_state();
/// if let Some(mut lookahead) = language.lookahead_iterator(next_state) {
/// let _ = lookahead.next();
/// let names = lookahead.iter_names();
/// let _ = names.next();
/// }
/// ```
fn _dummy() {}

View file

@ -1,11 +1,16 @@
mod async_context_test;
mod corpus_test;
mod github_issue_test;
mod helpers;
mod highlight_test;
mod language_test;
mod node_test;
mod parser_hang_test;
mod parser_test;
mod pathological_test;
mod query_test;
mod tags_test;
mod test_highlight_test;
mod test_tags_test;
mod text_provider_test;
mod tree_test;

View file

@ -252,12 +252,14 @@ fn test_node_parent_of_child_by_field_name() {
fn test_node_field_name_for_child() {
let mut parser = Parser::new();
parser.set_language(get_language("c")).unwrap();
let tree = parser.parse("x + y;", None).unwrap();
let tree = parser.parse("int w = x + y;", None).unwrap();
let translation_unit_node = tree.root_node();
let binary_expression_node = translation_unit_node
.named_child(0)
let declaration_node = translation_unit_node.named_child(0).unwrap();
let binary_expression_node = declaration_node
.child_by_field_name("declarator")
.unwrap()
.named_child(0)
.child_by_field_name("value")
.unwrap();
assert_eq!(binary_expression_node.field_name_for_child(0), Some("left"));
@ -385,10 +387,52 @@ fn test_node_named_child_with_aliases_and_extras() {
assert_eq!(root.named_child(4).unwrap().kind(), "C");
}
#[test]
fn test_node_descendant_count() {
let tree = parse_json_example();
let value_node = tree.root_node();
let all_nodes = get_all_nodes(&tree);
assert_eq!(value_node.descendant_count(), all_nodes.len());
let mut cursor = value_node.walk();
for (i, node) in all_nodes.iter().enumerate() {
cursor.goto_descendant(i);
assert_eq!(cursor.node(), *node, "index {i}");
}
for (i, node) in all_nodes.iter().enumerate().rev() {
cursor.goto_descendant(i);
assert_eq!(cursor.node(), *node, "rev index {i}");
}
}
#[test]
fn test_descendant_count_single_node_tree() {
let mut parser = Parser::new();
parser
.set_language(get_language("embedded-template"))
.unwrap();
let tree = parser.parse("hello", None).unwrap();
let nodes = get_all_nodes(&tree);
assert_eq!(nodes.len(), 2);
assert_eq!(tree.root_node().descendant_count(), 2);
let mut cursor = tree.root_node().walk();
cursor.goto_descendant(0);
assert_eq!(cursor.depth(), 0);
assert_eq!(cursor.node(), nodes[0]);
cursor.goto_descendant(1);
assert_eq!(cursor.depth(), 1);
assert_eq!(cursor.node(), nodes[1]);
}
#[test]
fn test_node_descendant_for_range() {
let tree = parse_json_example();
let array_node = tree.root_node().child(0).unwrap();
let array_node = tree.root_node();
// Leaf node exactly matches the given bounds - byte query
let colon_index = JSON_EXAMPLE.find(":").unwrap();
@ -508,7 +552,7 @@ fn test_node_edit() {
let edit = get_random_edit(&mut rand, &mut code);
let mut tree2 = tree.clone();
let edit = perform_edit(&mut tree2, &mut code, &edit);
let edit = perform_edit(&mut tree2, &mut code, &edit).unwrap();
for node in nodes_before.iter_mut() {
node.edit(&edit);
}
@ -841,15 +885,17 @@ fn get_all_nodes(tree: &Tree) -> Vec<Node> {
let mut visited_children = false;
let mut cursor = tree.walk();
loop {
result.push(cursor.node());
if !visited_children && cursor.goto_first_child() {
continue;
} else if cursor.goto_next_sibling() {
visited_children = false;
} else if cursor.goto_parent() {
visited_children = true;
if !visited_children {
result.push(cursor.node());
if !cursor.goto_first_child() {
visited_children = true;
}
} else {
break;
if cursor.goto_next_sibling() {
visited_children = false;
} else if !cursor.goto_parent() {
break;
}
}
}
return result;

View file

@ -0,0 +1,104 @@
// For some reasons `Command::spawn` doesn't work in CI env for many exotic arches.
#![cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))]
use crate::{
generate::{generate_parser_for_grammar, load_grammar_file},
tests::helpers::fixtures::{fixtures_dir, get_test_language},
};
use std::{
env::VarError,
process::{Command, Stdio},
};
use tree_sitter::Parser;
// The `sanitizing` cfg is required to don't run tests under specific sunitizer
// because they don't work well with subprocesses _(it's an assumption)_.
//
// Bellow are two alternative examples of how to disable tests for some arches
// if a way with excluding the whole mod from compilation wouldn't work well.
//
// XXX: Also may be it makes sense to keep such tests as ignored by default
// to omit surprises and enable them on CI by passing an extra option explicitly:
//
// > cargo test -- --include-ignored
//
// #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))]
// #[cfg_attr(not(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing))), ignore)]
//
#[test]
fn test_grammar_that_should_hang_and_not_segfault() {
let parent_sleep_millis = 1000;
let test_name = "test_grammar_that_should_hang_and_not_segfault";
let test_var = "CARGO_HANG_TEST";
eprintln!(" {test_name}");
let tests_exec_path = std::env::args()
.nth(0)
.expect("Failed get get tests executable path");
match std::env::var(test_var) {
Ok(v) if v == test_name => {
eprintln!(" child process id {}", std::process::id());
hang_test();
}
Err(VarError::NotPresent) => {
eprintln!(" parent process id {}", std::process::id());
if true {
let mut command = Command::new(tests_exec_path);
command.arg(test_name).env(test_var, test_name);
if std::env::args().any(|x| x == "--nocapture") {
command.arg("--nocapture");
} else {
command.stdout(Stdio::null()).stderr(Stdio::null());
}
match command.spawn() {
Ok(mut child) => {
std::thread::sleep(std::time::Duration::from_millis(parent_sleep_millis));
match child.try_wait() {
Ok(Some(status)) if status.success() => {
panic!("Child wasn't hang and exited successfully")
}
Ok(Some(status)) => panic!(
"Child wasn't hang and exited with status code: {:?}",
status.code()
),
_ => (),
}
if let Err(e) = child.kill() {
eprintln!(
"Failed to kill hang test sub process id: {}, error: {e}",
child.id()
);
}
}
Err(e) => panic!("{e}"),
}
}
}
Err(e) => panic!("Env var error: {e}"),
_ => unreachable!(),
}
fn hang_test() {
let test_grammar_dir = fixtures_dir()
.join("test_grammars")
.join("get_col_should_hang_not_crash");
let grammar_json = load_grammar_file(&test_grammar_dir.join("grammar.js"), None).unwrap();
let (parser_name, parser_code) =
generate_parser_for_grammar(grammar_json.as_str()).unwrap();
let language =
get_test_language(&parser_name, &parser_code, Some(test_grammar_dir.as_path()));
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let code_that_should_hang = "\nHello";
parser.parse(code_that_should_hang, None).unwrap();
}
}

View file

@ -15,6 +15,7 @@ use std::{
thread, time,
};
use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range};
use tree_sitter_proc_macro::retry;
#[test]
fn test_parsing_simple_string() {
@ -149,7 +150,7 @@ fn test_parsing_with_custom_utf8_input() {
)
);
assert_eq!(root.kind(), "source_file");
assert_eq!(root.has_error(), false);
assert!(!root.has_error());
assert_eq!(root.child(0).unwrap().kind(), "function_item");
}
@ -188,7 +189,7 @@ fn test_parsing_with_custom_utf16_input() {
"(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))"
);
assert_eq!(root.kind(), "source_file");
assert_eq!(root.has_error(), false);
assert!(!root.has_error());
assert_eq!(root.child(0).unwrap().kind(), "function_item");
}
@ -277,7 +278,10 @@ fn test_parsing_invalid_chars_at_eof() {
let mut parser = Parser::new();
parser.set_language(get_language("json")).unwrap();
let tree = parser.parse(b"\xdf", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(ERROR (UNEXPECTED INVALID))");
assert_eq!(
tree.root_node().to_sexp(),
"(document (ERROR (UNEXPECTED INVALID)))"
);
}
#[test]
@ -340,7 +344,8 @@ fn test_parsing_after_editing_beginning_of_code() {
deleted_length: 0,
inserted_text: b" || 5".to_vec(),
},
);
)
.unwrap();
let mut recorder = ReadRecorder::new(&code);
let tree = parser
@ -387,7 +392,8 @@ fn test_parsing_after_editing_end_of_code() {
deleted_length: 0,
inserted_text: b".d".to_vec(),
},
);
)
.unwrap();
let mut recorder = ReadRecorder::new(&code);
let tree = parser
@ -466,7 +472,8 @@ h + i
deleted_length: 0,
inserted_text: b"1234".to_vec(),
},
);
)
.unwrap();
assert_eq!(
code,
@ -511,7 +518,7 @@ fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() {
let tree = parser.parse(&source, None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(module (expression_statement (assignment left: (identifier) right: (expression_list (identifier) (string)))))"
"(module (expression_statement (assignment left: (identifier) right: (expression_list (identifier) (string (string_start) (string_content) (string_end))))))"
);
// Delete a suffix of the source code, starting in the middle of the string
@ -530,12 +537,12 @@ fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() {
let undo = invert_edit(&source, &edit);
let mut tree2 = tree.clone();
perform_edit(&mut tree2, &mut source, &edit);
perform_edit(&mut tree2, &mut source, &edit).unwrap();
tree2 = parser.parse(&source, Some(&tree2)).unwrap();
assert!(tree2.root_node().has_error());
let mut tree3 = tree2.clone();
perform_edit(&mut tree3, &mut source, &undo);
perform_edit(&mut tree3, &mut source, &undo).unwrap();
tree3 = parser.parse(&source, Some(&tree3)).unwrap();
assert_eq!(tree3.root_node().to_sexp(), tree.root_node().to_sexp(),);
}
@ -644,6 +651,7 @@ fn test_parsing_cancelled_by_another_thread() {
// Timeouts
#[test]
#[retry(10)]
fn test_parsing_with_a_timeout() {
let mut parser = Parser::new();
parser.set_language(get_language("json")).unwrap();
@ -662,8 +670,12 @@ fn test_parsing_with_a_timeout() {
None,
);
assert!(tree.is_none());
#[cfg(not(target_arch = "sparc64"))]
assert!(start_time.elapsed().as_micros() < 2000);
#[cfg(target_arch = "sparc64")]
assert!(start_time.elapsed().as_micros() < 8000);
// Continue parsing, but pause after 1 ms of processing.
parser.set_timeout_micros(5000);
let start_time = time::Instant::now();
@ -701,6 +713,7 @@ fn test_parsing_with_a_timeout() {
}
#[test]
#[retry(10)]
fn test_parsing_with_a_timeout_and_a_reset() {
let mut parser = Parser::new();
parser.set_language(get_language("json")).unwrap();
@ -756,6 +769,7 @@ fn test_parsing_with_a_timeout_and_a_reset() {
}
#[test]
#[retry(10)]
fn test_parsing_with_a_timeout_and_implicit_reset() {
allocations::record(|| {
let mut parser = Parser::new();
@ -789,6 +803,7 @@ fn test_parsing_with_a_timeout_and_implicit_reset() {
}
#[test]
#[retry(10)]
fn test_parsing_with_timeout_and_no_completion() {
allocations::record(|| {
let mut parser = Parser::new();
@ -828,7 +843,7 @@ fn test_parsing_with_one_included_range() {
concat!(
"(program (expression_statement (call_expression ",
"function: (member_expression object: (identifier) property: (property_identifier)) ",
"arguments: (arguments (string)))))",
"arguments: (arguments (string (string_fragment))))))",
)
);
assert_eq!(
@ -1177,7 +1192,7 @@ fn test_parsing_with_a_newly_included_range() {
.set_included_ranges(&[simple_range(range1_start, range1_end)])
.unwrap();
let tree = parser
.parse_with(&mut chunked_input(&source_code, 3), None)
.parse_with(&mut chunked_input(source_code, 3), None)
.unwrap();
assert_eq!(
tree.root_node().to_sexp(),
@ -1196,7 +1211,7 @@ fn test_parsing_with_a_newly_included_range() {
])
.unwrap();
let tree2 = parser
.parse_with(&mut chunked_input(&source_code, 3), Some(&tree))
.parse_with(&mut chunked_input(source_code, 3), Some(&tree))
.unwrap();
assert_eq!(
tree2.root_node().to_sexp(),
@ -1220,7 +1235,7 @@ fn test_parsing_with_a_newly_included_range() {
simple_range(range3_start, range3_end),
])
.unwrap();
let tree3 = parser.parse(&source_code, Some(&tree)).unwrap();
let tree3 = parser.parse(source_code, Some(&tree)).unwrap();
assert_eq!(
tree3.root_node().to_sexp(),
concat!(
@ -1297,6 +1312,85 @@ fn test_parsing_with_included_ranges_and_missing_tokens() {
assert_eq!(root.child(3).unwrap().start_byte(), 4);
}
#[test]
fn test_grammars_that_can_hang_on_eof() {
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test_single_null_char_regex",
"rules": {
"source_file": {
"type": "SEQ",
"members": [
{ "type": "STRING", "value": "\"" },
{ "type": "PATTERN", "value": "[\\x00]*" },
{ "type": "STRING", "value": "\"" }
]
}
},
"extras": [ { "type": "PATTERN", "value": "\\s" } ]
}
"#,
)
.unwrap();
let mut parser = Parser::new();
parser
.set_language(get_test_language(&parser_name, &parser_code, None))
.unwrap();
parser.parse("\"", None).unwrap();
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test_null_char_with_next_char_regex",
"rules": {
"source_file": {
"type": "SEQ",
"members": [
{ "type": "STRING", "value": "\"" },
{ "type": "PATTERN", "value": "[\\x00-\\x01]*" },
{ "type": "STRING", "value": "\"" }
]
}
},
"extras": [ { "type": "PATTERN", "value": "\\s" } ]
}
"#,
)
.unwrap();
parser
.set_language(get_test_language(&parser_name, &parser_code, None))
.unwrap();
parser.parse("\"", None).unwrap();
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test_null_char_with_range_regex",
"rules": {
"source_file": {
"type": "SEQ",
"members": [
{ "type": "STRING", "value": "\"" },
{ "type": "PATTERN", "value": "[\\x00-\\x7F]*" },
{ "type": "STRING", "value": "\"" }
]
}
},
"extras": [ { "type": "PATTERN", "value": "\\s" } ]
}
"#,
)
.unwrap();
parser
.set_language(get_test_language(&parser_name, &parser_code, None))
.unwrap();
parser.parse("\"", None).unwrap();
}
fn simple_range(start: usize, end: usize) -> Range {
Range {
start_byte: start,

View file

@ -0,0 +1,15 @@
[package]
name = "tree-sitter-tests-proc-macro"
version = "0.0.0"
edition = "2021"
publish = false
rust-version.workspace = true
[lib]
proc-macro = true
[dependencies]
proc-macro2 = "1.0.63"
quote = "1"
rand = "0.8.5"
syn = { version = "1", features = ["full"] }

View file

@ -0,0 +1,137 @@
use proc_macro::TokenStream;
use proc_macro2::Span;
use quote::quote;
use syn::{
parse::{Parse, ParseStream},
parse_macro_input, Error, Expr, Ident, ItemFn, LitInt, Token,
};
#[proc_macro_attribute]
pub fn retry(args: TokenStream, input: TokenStream) -> TokenStream {
let count = parse_macro_input!(args as LitInt);
let input = parse_macro_input!(input as ItemFn);
let attrs = input.attrs.clone();
let name = input.sig.ident.clone();
TokenStream::from(quote! {
#(#attrs),*
fn #name() {
#input
for i in 0..=#count {
let result = std::panic::catch_unwind(|| {
#name();
});
if result.is_ok() {
return;
}
if i == #count {
std::panic::resume_unwind(result.unwrap_err());
}
}
}
})
}
#[proc_macro_attribute]
pub fn test_with_seed(args: TokenStream, input: TokenStream) -> TokenStream {
struct Args {
retry: LitInt,
seed: Expr,
seed_fn: Option<Ident>,
}
impl Parse for Args {
fn parse(input: ParseStream) -> syn::Result<Self> {
let mut retry = None;
let mut seed = None;
let mut seed_fn = None;
while !input.is_empty() {
let name = input.parse::<Ident>()?;
match name.to_string().as_str() {
"retry" => {
input.parse::<Token![=]>()?;
retry.replace(input.parse()?);
}
"seed" => {
input.parse::<Token![=]>()?;
seed.replace(input.parse()?);
}
"seed_fn" => {
input.parse::<Token![=]>()?;
seed_fn.replace(input.parse()?);
}
x => {
return Err(Error::new(
name.span(),
format!("Unsupported parameter `{x}`"),
))
}
}
if !input.is_empty() {
input.parse::<Token![,]>()?;
}
}
if retry.is_none() {
retry.replace(LitInt::new("0", Span::mixed_site()));
}
Ok(Args {
retry: retry.expect("`retry` parameter is required"),
seed: seed.expect("`seed` parameter is required"),
seed_fn,
})
}
}
let Args {
retry,
seed,
seed_fn,
} = parse_macro_input!(args as Args);
let seed_fn = seed_fn.iter();
let func = parse_macro_input!(input as ItemFn);
let attrs = func.attrs.clone();
let name = func.sig.ident.clone();
// dbg!(quote::ToTokens::into_token_stream(&func));
TokenStream::from(quote! {
#[test]
#(#attrs),*
fn #name() {
#func
let mut seed = #seed;
for i in 0..=#retry {
let result = std::panic::catch_unwind(|| {
#name(seed);
});
if result.is_ok() {
return;
}
if i == #retry {
std::panic::resume_unwind(result.unwrap_err());
}
#(
seed = #seed_fn();
)*
if i < #retry {
println!("\nRetry {}/{} with a new seed {}", i + 1, #retry, seed);
}
}
}
})
}

File diff suppressed because it is too large Load diff

View file

@ -9,7 +9,7 @@ use std::{
use tree_sitter::Point;
use tree_sitter_tags::{c_lib as c, Error, TagsConfiguration, TagsContext};
const PYTHON_TAG_QUERY: &'static str = r#"
const PYTHON_TAG_QUERY: &str = r#"
(
(function_definition
name: (identifier) @name
@ -39,7 +39,7 @@ const PYTHON_TAG_QUERY: &'static str = r#"
attribute: (identifier) @name)) @reference.call
"#;
const JS_TAG_QUERY: &'static str = r#"
const JS_TAG_QUERY: &str = r#"
(
(comment)* @doc .
(class_declaration
@ -68,7 +68,7 @@ const JS_TAG_QUERY: &'static str = r#"
function: (identifier) @name) @reference.call
"#;
const RUBY_TAG_QUERY: &'static str = r#"
const RUBY_TAG_QUERY: &str = r#"
(method
name: (_) @name) @definition.method
@ -359,25 +359,29 @@ fn test_tags_via_c_api() {
);
let c_scope_name = CString::new(scope_name).unwrap();
let result = c::ts_tagger_add_language(
tagger,
c_scope_name.as_ptr(),
language,
JS_TAG_QUERY.as_ptr(),
ptr::null(),
JS_TAG_QUERY.len() as u32,
0,
);
let result = unsafe {
c::ts_tagger_add_language(
tagger,
c_scope_name.as_ptr(),
language,
JS_TAG_QUERY.as_ptr(),
ptr::null(),
JS_TAG_QUERY.len() as u32,
0,
)
};
assert_eq!(result, c::TSTagsError::Ok);
let result = c::ts_tagger_tag(
tagger,
c_scope_name.as_ptr(),
source_code.as_ptr(),
source_code.len() as u32,
buffer,
ptr::null(),
);
let result = unsafe {
c::ts_tagger_tag(
tagger,
c_scope_name.as_ptr(),
source_code.as_ptr(),
source_code.len() as u32,
buffer,
ptr::null(),
)
};
assert_eq!(result, c::TSTagsError::Ok);
let tags = unsafe {
slice::from_raw_parts(
@ -419,8 +423,10 @@ fn test_tags_via_c_api() {
]
);
c::ts_tags_buffer_delete(buffer);
c::ts_tagger_delete(tagger);
unsafe {
c::ts_tags_buffer_delete(buffer);
c::ts_tagger_delete(tagger);
}
});
}

View file

@ -12,7 +12,7 @@ fn test_highlight_test_with_basic_test() {
Some("injections.scm"),
&[
"function".to_string(),
"variable.parameter".to_string(),
"variable".to_string(),
"keyword".to_string(),
],
);
@ -22,7 +22,8 @@ fn test_highlight_test_with_basic_test() {
" // ^ function",
" // ^ keyword",
" return d + e;",
" // ^ variable.parameter",
" // ^ variable",
" // ^ !variable",
"};",
]
.join("\n");
@ -32,18 +33,10 @@ fn test_highlight_test_with_basic_test() {
assert_eq!(
assertions,
&[
Assertion {
position: Point::new(1, 5),
expected_capture_name: "function".to_string()
},
Assertion {
position: Point::new(1, 11),
expected_capture_name: "keyword".to_string()
},
Assertion {
position: Point::new(4, 9),
expected_capture_name: "variable.parameter".to_string()
},
Assertion::new(1, 5, false, String::from("function")),
Assertion::new(1, 11, false, String::from("keyword")),
Assertion::new(4, 9, false, String::from("variable")),
Assertion::new(4, 11, true, String::from("variable")),
]
);
@ -60,6 +53,7 @@ fn test_highlight_test_with_basic_test() {
(Point::new(1, 19), Point::new(1, 20), Highlight(1)), // "d"
(Point::new(4, 2), Point::new(4, 8), Highlight(2)), // "return"
(Point::new(4, 9), Point::new(4, 10), Highlight(1)), // "d"
(Point::new(4, 13), Point::new(4, 14), Highlight(1)), // "e"
]
);
}

View file

@ -16,6 +16,7 @@ fn test_tags_test_with_basic_test() {
" # ^ reference.call",
" return d(e)",
" # ^ reference.call",
" # ^ !variable.parameter",
"",
]
.join("\n");
@ -26,18 +27,10 @@ fn test_tags_test_with_basic_test() {
assert_eq!(
assertions,
&[
Assertion {
position: Point::new(1, 4),
expected_capture_name: "definition.function".to_string(),
},
Assertion {
position: Point::new(3, 9),
expected_capture_name: "reference.call".to_string(),
},
Assertion {
position: Point::new(5, 11),
expected_capture_name: "reference.call".to_string(),
},
Assertion::new(1, 4, false, String::from("definition.function")),
Assertion::new(3, 9, false, String::from("reference.call")),
Assertion::new(5, 11, false, String::from("reference.call")),
Assertion::new(5, 13, true, String::from("variable.parameter")),
]
);

View file

@ -0,0 +1,173 @@
use std::{iter, sync::Arc};
use crate::tests::helpers::fixtures::get_language;
use tree_sitter::{Language, Node, Parser, Point, Query, QueryCursor, TextProvider, Tree};
fn parse_text(text: impl AsRef<[u8]>) -> (Tree, Language) {
let language = get_language("c");
let mut parser = Parser::new();
parser.set_language(language).unwrap();
(parser.parse(text, None).unwrap(), language)
}
fn parse_text_with<T, F>(callback: &mut F) -> (Tree, Language)
where
T: AsRef<[u8]>,
F: FnMut(usize, Point) -> T,
{
let language = get_language("c");
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse_with(callback, None).unwrap();
// eprintln!("{}", tree.clone().root_node().to_sexp());
assert_eq!("comment", tree.clone().root_node().child(0).unwrap().kind());
(tree, language)
}
fn tree_query<I: AsRef<[u8]>>(tree: &Tree, text: impl TextProvider<I>, language: Language) {
let query = Query::new(language, "((comment) @c (#eq? @c \"// comment\"))").unwrap();
let mut cursor = QueryCursor::new();
let mut captures = cursor.captures(&query, tree.root_node(), text);
let (match_, idx) = captures.next().unwrap();
let capture = match_.captures[idx];
assert_eq!(capture.index as usize, idx);
assert_eq!("comment", capture.node.kind());
}
fn check_parsing<I: AsRef<[u8]>>(
parser_text: impl AsRef<[u8]>,
text_provider: impl TextProvider<I>,
) {
let (tree, language) = parse_text(parser_text);
tree_query(&tree, text_provider, language);
}
fn check_parsing_callback<T, F, I: AsRef<[u8]>>(
parser_callback: &mut F,
text_provider: impl TextProvider<I>,
) where
T: AsRef<[u8]>,
F: FnMut(usize, Point) -> T,
{
let (tree, language) = parse_text_with(parser_callback);
tree_query(&tree, text_provider, language);
}
#[test]
fn test_text_provider_for_str_slice() {
let text: &str = "// comment";
check_parsing(text, text.as_bytes());
check_parsing(text.as_bytes(), text.as_bytes());
}
#[test]
fn test_text_provider_for_string() {
let text: String = "// comment".to_owned();
check_parsing(text.clone(), text.as_bytes());
check_parsing(text.as_bytes(), text.as_bytes());
check_parsing(<_ as AsRef<[u8]>>::as_ref(&text), text.as_bytes());
}
#[test]
fn test_text_provider_for_box_of_str_slice() {
let text: Box<str> = "// comment".to_owned().into_boxed_str();
check_parsing(text.as_bytes(), text.as_bytes());
check_parsing(<_ as AsRef<str>>::as_ref(&text), text.as_bytes());
check_parsing(text.as_ref(), text.as_ref().as_bytes());
check_parsing(text.as_ref(), text.as_bytes());
}
#[test]
fn test_text_provider_for_box_of_bytes_slice() {
let text: Box<[u8]> = "// comment".to_owned().into_boxed_str().into_boxed_bytes();
check_parsing(text.as_ref(), text.as_ref());
check_parsing(text.as_ref(), &*text);
check_parsing(&*text, &*text);
}
#[test]
fn test_text_provider_for_vec_of_bytes() {
let text: Vec<u8> = "// comment".to_owned().into_bytes();
check_parsing(&*text, &*text);
}
#[test]
fn test_text_provider_for_arc_of_bytes_slice() {
let text: Vec<u8> = "// comment".to_owned().into_bytes();
let text: Arc<[u8]> = Arc::from(text);
check_parsing(&*text, &*text);
check_parsing(text.as_ref(), text.as_ref());
check_parsing(text.clone(), text.as_ref());
}
#[test]
fn test_text_provider_callback_with_str_slice() {
let text: &str = "// comment";
check_parsing(text, |_node: Node<'_>| iter::once(text));
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then(|| text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| iter::once(text),
);
}
#[test]
fn test_text_provider_callback_with_owned_string_slice() {
let text: &str = "// comment";
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then(|| text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| {
let slice: String = text.to_owned();
iter::once(slice)
},
);
}
#[test]
fn test_text_provider_callback_with_owned_bytes_vec_slice() {
let text: &str = "// comment";
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then(|| text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| {
let slice: Vec<u8> = text.to_owned().into_bytes();
iter::once(slice)
},
);
}
#[test]
fn test_text_provider_callback_with_owned_arc_of_bytes_slice() {
let text: &str = "// comment";
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then(|| text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| {
let slice: Arc<[u8]> = text.to_owned().into_bytes().into();
iter::once(slice)
},
);
}

View file

@ -306,7 +306,7 @@ fn test_tree_cursor() {
.parse(
"
struct Stuff {
a: A;
a: A,
b: Option<B>,
}
",
@ -331,6 +331,88 @@ fn test_tree_cursor() {
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "field_declaration_list");
assert_eq!(cursor.node().is_named(), true);
assert!(cursor.goto_last_child());
assert_eq!(cursor.node().kind(), "}");
assert_eq!(cursor.node().is_named(), false);
assert_eq!(cursor.node().start_position(), Point { row: 4, column: 16 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), ",");
assert_eq!(cursor.node().is_named(), false);
assert_eq!(cursor.node().start_position(), Point { row: 3, column: 32 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "field_declaration");
assert_eq!(cursor.node().is_named(), true);
assert_eq!(cursor.node().start_position(), Point { row: 3, column: 20 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), ",");
assert_eq!(cursor.node().is_named(), false);
assert_eq!(cursor.node().start_position(), Point { row: 2, column: 24 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "field_declaration");
assert_eq!(cursor.node().is_named(), true);
assert_eq!(cursor.node().start_position(), Point { row: 2, column: 20 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "{");
assert_eq!(cursor.node().is_named(), false);
assert_eq!(cursor.node().start_position(), Point { row: 1, column: 29 });
let mut copy = tree.walk();
copy.reset_to(cursor);
assert_eq!(copy.node().kind(), "{");
assert_eq!(copy.node().is_named(), false);
assert!(copy.goto_parent());
assert_eq!(copy.node().kind(), "field_declaration_list");
assert_eq!(copy.node().is_named(), true);
assert!(copy.goto_parent());
assert_eq!(copy.node().kind(), "struct_item");
}
#[test]
fn test_tree_cursor_previous_sibling() {
let mut parser = Parser::new();
parser.set_language(get_language("rust")).unwrap();
let text = "
// Hi there
// This is fun!
// Another one!
";
let tree = parser.parse(text, None).unwrap();
let mut cursor = tree.walk();
assert_eq!(cursor.node().kind(), "source_file");
assert!(cursor.goto_last_child());
assert_eq!(cursor.node().kind(), "line_comment");
assert_eq!(
cursor.node().utf8_text(text.as_bytes()).unwrap(),
"// Another one!"
);
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "line_comment");
assert_eq!(
cursor.node().utf8_text(text.as_bytes()).unwrap(),
"// This is fun!"
);
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "line_comment");
assert_eq!(
cursor.node().utf8_text(text.as_bytes()).unwrap(),
"// Hi there"
);
assert!(!cursor.goto_previous_sibling());
}
#[test]
@ -620,7 +702,7 @@ fn get_changed_ranges(
source_code: &mut Vec<u8>,
edit: Edit,
) -> Vec<Range> {
perform_edit(tree, source_code, &edit);
perform_edit(tree, source_code, &edit).unwrap();
let new_tree = parser.parse(&source_code, Some(tree)).unwrap();
let result = tree.changed_ranges(&new_tree).collect();
*tree = new_tree;

View file

@ -1,9 +1,7 @@
use anyhow::Result;
use std::io;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::thread;
use tree_sitter::Parser;
use tree_sitter::{Parser, Tree};
#[cfg(unix)]
use anyhow::{anyhow, Context};
@ -13,55 +11,86 @@ use std::path::PathBuf;
use std::process::{Child, ChildStdin, Command, Stdio};
#[cfg(unix)]
const HTML_HEADER: &[u8] = b"<!DOCTYPE html>\n<style>svg { width: 100%; }</style>\n\n";
const HTML_HEADER: &[u8] = b"
<!DOCTYPE html>
pub fn cancel_on_stdin() -> Arc<AtomicUsize> {
<style>
svg { width: 100%; }
</style>
";
pub fn cancel_on_signal() -> Arc<AtomicUsize> {
let result = Arc::new(AtomicUsize::new(0));
if atty::is(atty::Stream::Stdin) {
thread::spawn({
let flag = result.clone();
move || {
let mut line = String::new();
io::stdin().read_line(&mut line).unwrap();
flag.store(1, Ordering::Relaxed);
}
});
}
ctrlc::set_handler({
let flag = result.clone();
move || {
flag.store(1, Ordering::Relaxed);
}
})
.expect("Error setting Ctrl-C handler");
result
}
#[cfg(windows)]
pub struct LogSession();
pub struct LogSession;
#[cfg(unix)]
pub struct LogSession(PathBuf, Option<Child>, Option<ChildStdin>);
pub struct LogSession {
path: PathBuf,
dot_process: Option<Child>,
dot_process_stdin: Option<ChildStdin>,
}
#[cfg(windows)]
pub fn print_tree_graph(_tree: &Tree, _path: &str) -> Result<()> {
Ok(())
}
#[cfg(windows)]
pub fn log_graphs(_parser: &mut Parser, _path: &str) -> Result<LogSession> {
Ok(LogSession())
Ok(LogSession)
}
#[cfg(unix)]
pub fn print_tree_graph(tree: &Tree, path: &str) -> Result<()> {
let session = LogSession::new(path)?;
tree.print_dot_graph(session.dot_process_stdin.as_ref().unwrap());
Ok(())
}
#[cfg(unix)]
pub fn log_graphs(parser: &mut Parser, path: &str) -> Result<LogSession> {
use std::io::Write;
let session = LogSession::new(path)?;
parser.print_dot_graphs(session.dot_process_stdin.as_ref().unwrap());
Ok(session)
}
let mut dot_file = std::fs::File::create(path)?;
dot_file.write(HTML_HEADER)?;
let mut dot_process = Command::new("dot")
.arg("-Tsvg")
.stdin(Stdio::piped())
.stdout(dot_file)
.spawn()
.with_context(|| "Failed to run the `dot` command. Check that graphviz is installed.")?;
let dot_stdin = dot_process
.stdin
.take()
.ok_or_else(|| anyhow!("Failed to open stdin for `dot` process."))?;
parser.print_dot_graphs(&dot_stdin);
Ok(LogSession(
PathBuf::from(path),
Some(dot_process),
Some(dot_stdin),
))
#[cfg(unix)]
impl LogSession {
fn new(path: &str) -> Result<Self> {
use std::io::Write;
let mut dot_file = std::fs::File::create(path)?;
dot_file.write(HTML_HEADER)?;
let mut dot_process = Command::new("dot")
.arg("-Tsvg")
.stdin(Stdio::piped())
.stdout(dot_file)
.spawn()
.with_context(|| {
"Failed to run the `dot` command. Check that graphviz is installed."
})?;
let dot_stdin = dot_process
.stdin
.take()
.ok_or_else(|| anyhow!("Failed to open stdin for `dot` process."))?;
Ok(Self {
path: PathBuf::from(path),
dot_process: Some(dot_process),
dot_process_stdin: Some(dot_stdin),
})
}
}
#[cfg(unix)]
@ -69,13 +98,13 @@ impl Drop for LogSession {
fn drop(&mut self) {
use std::fs;
drop(self.2.take().unwrap());
let output = self.1.take().unwrap().wait_with_output().unwrap();
drop(self.dot_process_stdin.take().unwrap());
let output = self.dot_process.take().unwrap().wait_with_output().unwrap();
if output.status.success() {
if cfg!(target_os = "macos")
&& fs::metadata(&self.0).unwrap().len() > HTML_HEADER.len() as u64
&& fs::metadata(&self.path).unwrap().len() > HTML_HEADER.len() as u64
{
Command::new("open").arg(&self.0).output().unwrap();
Command::new("open").arg(&self.path).output().unwrap();
}
} else {
eprintln!(

View file

@ -1,5 +1,6 @@
use super::generate::parse_grammar::GrammarJSON;
use anyhow::{anyhow, Context, Result};
use path_slash::PathExt as _;
use std::{
ffi::{OsStr, OsString},
fs,
@ -60,7 +61,7 @@ pub fn compile_language_to_wasm(
volume_string = OsString::from(parent);
volume_string.push(":/src:Z");
command.arg("--workdir");
command.arg(&Path::new("/src").join(filename));
command.arg(Path::new("/src").join(filename).to_slash_lossy().as_ref());
} else {
volume_string = OsString::from(language_dir);
volume_string.push(":/src:Z");
@ -84,6 +85,11 @@ pub fn compile_language_to_wasm(
// Run `emcc` in a container using the `emscripten-slim` image
command.args(&[EMSCRIPTEN_TAG, "emcc"]);
} else {
if force_docker {
return Err(anyhow!(
"You must have docker on your PATH to run this command with --docker"
));
}
return Err(anyhow!(
"You must have either emcc or docker on your PATH to run this command"
));
@ -116,14 +122,18 @@ pub fn compile_language_to_wasm(
let scanner_cpp_path = src.join("scanner.cpp");
if language_dir.join(&scanner_cc_path).exists() {
command.arg("-xc++").arg(&scanner_cc_path);
command
.arg("-xc++")
.arg(scanner_cc_path.to_slash_lossy().as_ref());
} else if language_dir.join(&scanner_cpp_path).exists() {
command.arg("-xc++").arg(&scanner_cpp_path);
command
.arg("-xc++")
.arg(scanner_cpp_path.to_slash_lossy().as_ref());
} else if language_dir.join(&scanner_c_path).exists() {
command.arg(&scanner_c_path);
command.arg(scanner_c_path.to_slash_lossy().as_ref());
}
command.arg(&parser_c_path);
command.arg(parser_c_path.to_slash_lossy().as_ref());
let output = command
.output()