Merge branch 'master' into wasm-language

This commit is contained in:
Max Brunsfeld 2023-10-27 11:57:04 +01:00
commit f4e2f68f14
161 changed files with 10293 additions and 4253 deletions

View file

@ -9,6 +9,7 @@ readme = "README.md"
keywords = ["incremental", "parsing"]
categories = ["command-line-utilities", "parsing"]
repository = "https://github.com/tree-sitter/tree-sitter"
rust-version.workspace = true
[features]
wasm = ["tree-sitter/wasm"]
@ -20,7 +21,8 @@ dirs = "3.0"
libloading = "0.7"
once_cell = "1.7"
regex = "1"
serde = { version = "1.0.130", features = ["derive"] }
# Due to https://github.com/serde-rs/serde/issues/2538
serde = { version = "1.0, < 1.0.172", features = ["derive"] }
which = "4.1.0"
[dependencies.serde_json]

View file

@ -1,6 +1,6 @@
# `tree-sitter-loader`
# Tree-sitter Loader
The `tree-sitter` command-line program will dynamically find and build grammars
at runtime, if you have cloned the grammars' repositories to your local
filesystem. This helper crate implements that logic, so that you can use it in
filesystem. This helper crate implements that logic, so that you can use it in
your own program analysis tools, as well.

View file

@ -1 +1 @@
3.1.25
3.1.37

View file

@ -1,3 +1,5 @@
#![doc = include_str!("../README.md")]
use anyhow::{anyhow, Context, Error, Result};
use libloading::{Library, Symbol};
use once_cell::unsync::OnceCell;
@ -11,7 +13,7 @@ use std::path::{Path, PathBuf};
use std::process::Command;
use std::sync::Mutex;
use std::time::SystemTime;
use std::{fs, mem};
use std::{env, fs, mem};
use tree_sitter::{Language, QueryError, QueryErrorKind};
use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
@ -72,12 +74,12 @@ impl Config {
}
#[cfg(unix)]
const DYLIB_EXTENSION: &'static str = "so";
const DYLIB_EXTENSION: &str = "so";
#[cfg(windows)]
const DYLIB_EXTENSION: &'static str = "dll";
const BUILD_TARGET: &'static str = env!("BUILD_TARGET");
const BUILD_TARGET: &str = env!("BUILD_TARGET");
pub struct LanguageConfiguration<'a> {
pub scope: Option<String>,
@ -90,6 +92,7 @@ pub struct LanguageConfiguration<'a> {
pub injections_filenames: Option<Vec<String>>,
pub locals_filenames: Option<Vec<String>>,
pub tags_filenames: Option<Vec<String>>,
pub language_name: String,
language_id: usize,
highlight_config: OnceCell<Option<HighlightConfiguration>>,
tags_config: OnceCell<Option<TagsConfiguration>>,
@ -102,6 +105,7 @@ pub struct Loader {
languages_by_id: Vec<(PathBuf, OnceCell<Language>)>,
language_configurations: Vec<LanguageConfiguration<'static>>,
language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
language_configuration_in_current_path: Option<usize>,
highlight_names: Box<Mutex<Vec<String>>>,
use_all_highlight_names: bool,
debug_build: bool,
@ -115,9 +119,13 @@ unsafe impl Sync for Loader {}
impl Loader {
pub fn new() -> Result<Self> {
let parser_lib_path = dirs::cache_dir()
.ok_or(anyhow!("Cannot determine cache directory"))?
.join("tree-sitter/lib");
let parser_lib_path = match env::var("TREE_SITTER_LIBDIR") {
Ok(path) => PathBuf::from(path),
_ => dirs::cache_dir()
.ok_or(anyhow!("Cannot determine cache directory"))?
.join("tree-sitter")
.join("lib"),
};
Ok(Self::with_parser_lib_path(parser_lib_path))
}
@ -127,6 +135,7 @@ impl Loader {
languages_by_id: Vec::new(),
language_configurations: Vec::new(),
language_configuration_ids_by_file_type: HashMap::new(),
language_configuration_in_current_path: None,
highlight_names: Box::new(Mutex::new(Vec::new())),
use_all_highlight_names: true,
debug_build: false,
@ -136,7 +145,7 @@ impl Loader {
}
}
pub fn configure_highlights(&mut self, names: &Vec<String>) {
pub fn configure_highlights(&mut self, names: &[String]) {
self.use_all_highlight_names = false;
let mut highlights = self.highlight_names.lock().unwrap();
highlights.clear();
@ -152,8 +161,7 @@ impl Loader {
eprintln!("Warning: You have not configured any parser directories!");
eprintln!("Please run `tree-sitter init-config` and edit the resulting");
eprintln!("configuration file to indicate where we should look for");
eprintln!("language grammars.");
eprintln!("");
eprintln!("language grammars.\n");
}
for parser_container_dir in &config.parser_directories {
if let Ok(entries) = fs::read_dir(parser_container_dir) {
@ -163,6 +171,7 @@ impl Loader {
if parser_dir_name.starts_with("tree-sitter-") {
self.find_language_configurations_at_path(
&parser_container_dir.join(parser_dir_name),
false,
)
.ok();
}
@ -174,7 +183,7 @@ impl Loader {
}
pub fn languages_at_path(&mut self, path: &Path) -> Result<Vec<Language>> {
if let Ok(configurations) = self.find_language_configurations_at_path(path) {
if let Ok(configurations) = self.find_language_configurations_at_path(path, true) {
let mut language_ids = configurations
.iter()
.map(|c| c.language_id)
@ -365,7 +374,7 @@ impl Loader {
library_path.set_extension(DYLIB_EXTENSION);
}
let recompile = needs_recompile(&library_path, &parser_path, &scanner_path)
let recompile = needs_recompile(&library_path, &parser_path, scanner_path.as_deref())
.with_context(|| "Failed to compare source and binary timestamps")?;
if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() {
@ -411,7 +420,7 @@ impl Loader {
header_path: &Path,
parser_path: &Path,
scanner_path: &Option<PathBuf>,
output_path: &PathBuf,
library_path: &PathBuf,
) -> Result<(), Error> {
let mut config = cc::Build::new();
config
@ -419,13 +428,15 @@ impl Loader {
.opt_level(2)
.cargo_metadata(false)
.target(BUILD_TARGET)
.host(BUILD_TARGET);
.host(BUILD_TARGET)
.flag_if_supported("-Werror=implicit-function-declaration");
let compiler = config.get_compiler();
let mut command = Command::new(compiler.path());
for (key, value) in compiler.env() {
command.env(key, value);
}
if cfg!(windows) {
if compiler.is_like_msvc() {
command.args(&["/nologo", "/LD", "/I"]).arg(header_path);
if self.debug_build {
command.arg("/Od");
@ -438,17 +449,20 @@ impl Loader {
}
command
.arg("/link")
.arg(format!("/out:{}", output_path.to_str().unwrap()));
.arg(format!("/out:{}", library_path.to_str().unwrap()));
} else {
command
.arg("-shared")
.arg("-fPIC")
.arg("-fno-exceptions")
.arg("-g")
.arg("-I")
.arg(header_path)
.arg("-o")
.arg(output_path);
.arg(&library_path);
if !cfg!(windows) {
command.arg("-fPIC");
}
if self.debug_build {
command.arg("-O0");
@ -469,6 +483,7 @@ impl Loader {
}
command.arg("-xc").arg(parser_path);
}
let output = command
.output()
.with_context(|| "Failed to execute C compiler")?;
@ -479,6 +494,37 @@ impl Loader {
String::from_utf8_lossy(&output.stderr)
));
}
#[cfg(any(target_os = "macos", target_os = "linux"))]
if scanner_path.is_some() {
let command = Command::new("nm")
.arg("-W")
.arg("-U")
.arg(&library_path)
.output();
if let Ok(output) = command {
if output.status.success() {
let mut found_non_static = false;
for line in String::from_utf8_lossy(&output.stdout).lines() {
if line.contains(" T ") && !line.contains("tree_sitter_") {
if let Some(function_name) =
line.split_whitespace().collect::<Vec<_>>().get(2)
{
if !found_non_static {
found_non_static = true;
eprintln!("Warning: Found non-static non-tree-sitter functions in external scannner");
}
eprintln!(" `{function_name}`");
}
}
}
if found_non_static {
eprintln!("Consider making these functions static, they can cause conflicts when another tree-sitter project uses the same function name");
}
}
}
}
Ok(())
}
@ -581,6 +627,7 @@ impl Loader {
pub fn highlight_config_for_injection_string<'a>(
&'a self,
string: &str,
apply_all_captures: bool,
) -> Option<&'a HighlightConfiguration> {
match self.language_configuration_for_injection_string(string) {
Err(e) => {
@ -591,38 +638,36 @@ impl Loader {
None
}
Ok(None) => None,
Ok(Some((language, configuration))) => match configuration.highlight_config(language) {
Err(e) => {
eprintln!(
"Failed to load property sheet for injection string '{}': {}",
string, e
);
None
Ok(Some((language, configuration))) => {
match configuration.highlight_config(language, apply_all_captures, None) {
Err(e) => {
eprintln!(
"Failed to load property sheet for injection string '{}': {}",
string, e
);
None
}
Ok(None) => None,
Ok(Some(config)) => Some(config),
}
Ok(None) => None,
Ok(Some(config)) => Some(config),
},
}
}
}
pub fn find_language_configurations_at_path<'a>(
&'a mut self,
pub fn find_language_configurations_at_path(
&mut self,
parser_path: &Path,
set_current_path_config: bool,
) -> Result<&[LanguageConfiguration]> {
#[derive(Deserialize)]
#[derive(Default, Deserialize)]
#[serde(untagged)]
enum PathsJSON {
#[default]
Empty,
Single(String),
Multiple(Vec<String>),
}
impl Default for PathsJSON {
fn default() -> Self {
PathsJSON::Empty
}
}
impl PathsJSON {
fn into_vec(self) -> Option<Vec<String>> {
match self {
@ -663,6 +708,11 @@ impl Loader {
tree_sitter: Vec<LanguageConfigurationJSON>,
}
#[derive(Deserialize)]
struct GrammarJSON {
name: String,
}
let initial_language_configuration_count = self.language_configurations.len();
if let Ok(package_json_contents) = fs::read_to_string(&parser_path.join("package.json")) {
@ -674,6 +724,13 @@ impl Loader {
// the package.json, but defaults to the directory containing the package.json.
let language_path = parser_path.join(config_json.path);
let grammar_path = language_path.join("src").join("grammar.json");
let mut grammar_file = fs::File::open(grammar_path)
.with_context(|| "Failed to read grammar.json")?;
let grammar_json: GrammarJSON =
serde_json::from_reader(BufReader::new(&mut grammar_file))
.with_context(|| "Failed to parse grammar.json")?;
// Determine if a previous language configuration in this package.json file
// already uses the same language.
let mut language_id = None;
@ -693,6 +750,7 @@ impl Loader {
let configuration = LanguageConfiguration {
root_path: parser_path.to_path_buf(),
language_name: grammar_json.name.clone(),
scope: config_json.scope,
language_id,
file_types: config_json.file_types.unwrap_or(Vec::new()),
@ -705,19 +763,26 @@ impl Loader {
highlights_filenames: config_json.highlights.into_vec(),
highlight_config: OnceCell::new(),
tags_config: OnceCell::new(),
highlight_names: &*self.highlight_names,
highlight_names: &self.highlight_names,
use_all_highlight_names: self.use_all_highlight_names,
};
for file_type in &configuration.file_types {
self.language_configuration_ids_by_file_type
.entry(file_type.to_string())
.or_insert(Vec::new())
.or_default()
.push(self.language_configurations.len());
}
self.language_configurations
.push(unsafe { mem::transmute(configuration) });
if set_current_path_config
&& self.language_configuration_in_current_path.is_none()
{
self.language_configuration_in_current_path =
Some(self.language_configurations.len() - 1);
}
}
}
}
@ -725,8 +790,15 @@ impl Loader {
if self.language_configurations.len() == initial_language_configuration_count
&& parser_path.join("src").join("grammar.json").exists()
{
let grammar_path = parser_path.join("src").join("grammar.json");
let mut grammar_file =
fs::File::open(grammar_path).with_context(|| "Failed to read grammar.json")?;
let grammar_json: GrammarJSON =
serde_json::from_reader(BufReader::new(&mut grammar_file))
.with_context(|| "Failed to parse grammar.json")?;
let configuration = LanguageConfiguration {
root_path: parser_path.to_owned(),
language_name: grammar_json.name,
language_id: self.languages_by_id.len(),
file_types: Vec::new(),
scope: None,
@ -739,7 +811,7 @@ impl Loader {
tags_filenames: None,
highlight_config: OnceCell::new(),
tags_config: OnceCell::new(),
highlight_names: &*self.highlight_names,
highlight_names: &self.highlight_names,
use_all_highlight_names: self.use_all_highlight_names,
};
self.language_configurations
@ -764,11 +836,11 @@ impl Loader {
if let Some(scope) = scope {
if let Some(config) = self
.language_configuration_for_scope(scope)
.with_context(|| format!("Failed to load language for scope '{}'", scope))?
.with_context(|| format!("Failed to load language for scope '{scope}'"))?
{
Ok(config.0)
} else {
return Err(anyhow!("Unknown scope '{}'", scope));
Err(anyhow!("Unknown scope '{scope}'"))
}
} else if let Some((lang, _)) = self
.language_configuration_for_file_name(path)
@ -780,8 +852,10 @@ impl Loader {
})?
{
Ok(lang)
} else if let Some(id) = self.language_configuration_in_current_path {
Ok(self.language_for_id(self.language_configurations[id].language_id)?)
} else if let Some(lang) = self
.languages_at_path(&current_dir)
.languages_at_path(current_dir)
.with_context(|| "Failed to load language in current directory")?
.first()
.cloned()
@ -803,25 +877,76 @@ impl Loader {
}
impl<'a> LanguageConfiguration<'a> {
pub fn highlight_config(&self, language: Language) -> Result<Option<&HighlightConfiguration>> {
pub fn highlight_config(
&self,
language: Language,
apply_all_captures: bool,
paths: Option<&[String]>,
) -> Result<Option<&HighlightConfiguration>> {
let (highlights_filenames, injections_filenames, locals_filenames) = match paths {
Some(paths) => (
Some(
paths
.iter()
.filter(|p| p.ends_with("highlights.scm"))
.cloned()
.collect::<Vec<_>>(),
),
Some(
paths
.iter()
.filter(|p| p.ends_with("tags.scm"))
.cloned()
.collect::<Vec<_>>(),
),
Some(
paths
.iter()
.filter(|p| p.ends_with("locals.scm"))
.cloned()
.collect::<Vec<_>>(),
),
),
None => (None, None, None),
};
return self
.highlight_config
.get_or_try_init(|| {
let (highlights_query, highlight_ranges) =
self.read_queries(&self.highlights_filenames, "highlights.scm")?;
let (injections_query, injection_ranges) =
self.read_queries(&self.injections_filenames, "injections.scm")?;
let (locals_query, locals_ranges) =
self.read_queries(&self.locals_filenames, "locals.scm")?;
let (highlights_query, highlight_ranges) = self.read_queries(
if highlights_filenames.is_some() {
highlights_filenames.as_deref()
} else {
self.highlights_filenames.as_deref()
},
"highlights.scm",
)?;
let (injections_query, injection_ranges) = self.read_queries(
if injections_filenames.is_some() {
injections_filenames.as_deref()
} else {
self.injections_filenames.as_deref()
},
"injections.scm",
)?;
let (locals_query, locals_ranges) = self.read_queries(
if locals_filenames.is_some() {
locals_filenames.as_deref()
} else {
self.locals_filenames.as_deref()
},
"locals.scm",
)?;
if highlights_query.is_empty() {
Ok(None)
} else {
let mut result = HighlightConfiguration::new(
language,
&self.language_name,
&highlights_query,
&injections_query,
&locals_query,
apply_all_captures,
)
.map_err(|error| match error.kind {
QueryErrorKind::Language => Error::from(error),
@ -853,12 +978,12 @@ impl<'a> LanguageConfiguration<'a> {
let mut all_highlight_names = self.highlight_names.lock().unwrap();
if self.use_all_highlight_names {
for capture_name in result.query.capture_names() {
if !all_highlight_names.contains(capture_name) {
all_highlight_names.push(capture_name.clone());
if !all_highlight_names.iter().any(|x| x == capture_name) {
all_highlight_names.push(capture_name.to_string());
}
}
}
result.configure(&all_highlight_names.as_slice());
result.configure(all_highlight_names.as_slice());
Ok(Some(result))
}
})
@ -869,9 +994,9 @@ impl<'a> LanguageConfiguration<'a> {
self.tags_config
.get_or_try_init(|| {
let (tags_query, tags_ranges) =
self.read_queries(&self.tags_filenames, "tags.scm")?;
self.read_queries(self.tags_filenames.as_deref(), "tags.scm")?;
let (locals_query, locals_ranges) =
self.read_queries(&self.locals_filenames, "locals.scm")?;
self.read_queries(self.locals_filenames.as_deref(), "locals.scm")?;
if tags_query.is_empty() {
Ok(None)
} else {
@ -894,7 +1019,6 @@ impl<'a> LanguageConfiguration<'a> {
locals_query.len(),
)
}
.into()
} else {
error.into()
}
@ -904,9 +1028,9 @@ impl<'a> LanguageConfiguration<'a> {
.map(Option::as_ref)
}
fn include_path_in_query_error<'b>(
fn include_path_in_query_error(
mut error: QueryError,
ranges: &'b Vec<(String, Range<usize>)>,
ranges: &[(String, Range<usize>)],
source: &str,
start_offset: usize,
) -> Error {
@ -914,7 +1038,7 @@ impl<'a> LanguageConfiguration<'a> {
let (path, range) = ranges
.iter()
.find(|(_, range)| range.contains(&offset_within_section))
.unwrap();
.unwrap_or(ranges.last().unwrap());
error.offset = offset_within_section - range.start;
error.row = source[range.start..offset_within_section]
.chars()
@ -925,12 +1049,12 @@ impl<'a> LanguageConfiguration<'a> {
fn read_queries(
&self,
paths: &Option<Vec<String>>,
paths: Option<&[String]>,
default_path: &str,
) -> Result<(String, Vec<(String, Range<usize>)>)> {
let mut query = String::new();
let mut path_ranges = Vec::new();
if let Some(paths) = paths.as_ref() {
if let Some(paths) = paths {
for path in paths {
let abs_path = self.root_path.join(path);
let prev_query_len = query.len();
@ -955,7 +1079,7 @@ impl<'a> LanguageConfiguration<'a> {
fn needs_recompile(
lib_path: &Path,
parser_c_path: &Path,
scanner_path: &Option<PathBuf>,
scanner_path: Option<&Path>,
) -> Result<bool> {
if !lib_path.exists() {
return Ok(true);