cli: Handle multi-parser repos, content-regex property
Prompted by tree-sitter/tree-sitter-typescript#68
This commit is contained in:
parent
7005d8b9d9
commit
93f7de03e2
3 changed files with 198 additions and 97 deletions
|
|
@ -20,27 +20,23 @@ const DYLIB_EXTENSION: &'static str = "dll";
|
|||
|
||||
const BUILD_TARGET: &'static str = env!("BUILD_TARGET");
|
||||
|
||||
struct LanguageRepo {
|
||||
path: PathBuf,
|
||||
language: OnceCell<Language>,
|
||||
configurations: Vec<LanguageConfiguration>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct LanguageConfiguration {
|
||||
scope: Option<String>,
|
||||
_content_regex: Option<Regex>,
|
||||
_first_line_regex: Option<Regex>,
|
||||
injection_regex: Option<Regex>,
|
||||
file_types: Vec<String>,
|
||||
highlight_property_sheet_path: Option<PathBuf>,
|
||||
pub scope: Option<String>,
|
||||
pub content_regex: Option<Regex>,
|
||||
pub _first_line_regex: Option<Regex>,
|
||||
pub injection_regex: Option<Regex>,
|
||||
pub file_types: Vec<String>,
|
||||
pub highlight_property_sheet_path: Option<PathBuf>,
|
||||
language_id: usize,
|
||||
highlight_property_sheet: OnceCell<Option<PropertySheet<Properties>>>,
|
||||
}
|
||||
|
||||
pub struct Loader {
|
||||
parser_lib_path: PathBuf,
|
||||
language_repos: Vec<LanguageRepo>,
|
||||
language_configuration_ids_by_file_type: HashMap<String, Vec<(usize, usize)>>,
|
||||
languages_by_id: Vec<(PathBuf, OnceCell<Language>)>,
|
||||
language_configurations: Vec<LanguageConfiguration>,
|
||||
language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
|
||||
}
|
||||
|
||||
unsafe impl Send for Loader {}
|
||||
|
|
@ -50,7 +46,8 @@ impl Loader {
|
|||
pub fn new(parser_lib_path: PathBuf) -> Self {
|
||||
Loader {
|
||||
parser_lib_path,
|
||||
language_repos: Vec::new(),
|
||||
languages_by_id: Vec::new(),
|
||||
language_configurations: Vec::new(),
|
||||
language_configuration_ids_by_file_type: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
|
@ -62,8 +59,10 @@ impl Loader {
|
|||
let entry = entry?;
|
||||
if let Some(parser_dir_name) = entry.file_name().to_str() {
|
||||
if parser_dir_name.starts_with("tree-sitter-") {
|
||||
self.find_language_at_path(&parser_container_dir.join(parser_dir_name))
|
||||
.ok();
|
||||
self.find_language_configurations_at_path(
|
||||
&parser_container_dir.join(parser_dir_name),
|
||||
)
|
||||
.ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -72,24 +71,38 @@ impl Loader {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub fn language_at_path(&mut self, path: &Path) -> Result<Option<Language>> {
|
||||
if let Ok(id) = self.find_language_at_path(path) {
|
||||
Ok(Some(self.language_for_id(id)?.0))
|
||||
pub fn languages_at_path(&mut self, path: &Path) -> Result<Vec<Language>> {
|
||||
if let Ok(configurations) = self.find_language_configurations_at_path(path) {
|
||||
let mut language_ids = configurations
|
||||
.iter()
|
||||
.map(|c| c.language_id)
|
||||
.collect::<Vec<_>>();
|
||||
language_ids.sort();
|
||||
language_ids.dedup();
|
||||
language_ids
|
||||
.into_iter()
|
||||
.map(|id| self.language_for_id(id))
|
||||
.collect::<Result<Vec<_>>>()
|
||||
} else {
|
||||
Ok(None)
|
||||
Ok(Vec::new())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> {
|
||||
self.language_configurations
|
||||
.iter()
|
||||
.map(|c| (c, self.languages_by_id[c.language_id].0.as_ref()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn language_configuration_for_scope(
|
||||
&self,
|
||||
scope: &str,
|
||||
) -> Result<Option<(Language, &LanguageConfiguration)>> {
|
||||
for (i, repo) in self.language_repos.iter().enumerate() {
|
||||
for configuration in &repo.configurations {
|
||||
if configuration.scope.as_ref().map_or(false, |s| s == scope) {
|
||||
let (language, _) = self.language_for_id(i)?;
|
||||
return Ok(Some((language, &configuration)));
|
||||
}
|
||||
for configuration in &self.language_configurations {
|
||||
if configuration.scope.as_ref().map_or(false, |s| s == scope) {
|
||||
let language = self.language_for_id(configuration.language_id)?;
|
||||
return Ok(Some((language, configuration)));
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
|
|
@ -99,7 +112,9 @@ impl Loader {
|
|||
&self,
|
||||
path: &Path,
|
||||
) -> Result<Option<(Language, &LanguageConfiguration)>> {
|
||||
let ids = path
|
||||
// Find all the language configurations that match this file name
|
||||
// or a suffix of the file name.
|
||||
let configuration_ids = path
|
||||
.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.and_then(|file_name| self.language_configuration_ids_by_file_type.get(file_name))
|
||||
|
|
@ -110,13 +125,57 @@ impl Loader {
|
|||
self.language_configuration_ids_by_file_type.get(extension)
|
||||
})
|
||||
});
|
||||
if let Some(ids) = ids {
|
||||
// TODO use `content-regex` to pick one
|
||||
for (repo_id, configuration_id) in ids.iter().cloned() {
|
||||
let (language, configurations) = self.language_for_id(repo_id)?;
|
||||
return Ok(Some((language, &configurations[configuration_id])));
|
||||
|
||||
if let Some(configuration_ids) = configuration_ids {
|
||||
if !configuration_ids.is_empty() {
|
||||
let configuration;
|
||||
|
||||
// If there is only one language configuration, then use it.
|
||||
if configuration_ids.len() == 1 {
|
||||
configuration = &self.language_configurations[configuration_ids[0]];
|
||||
}
|
||||
|
||||
// If multiple language configurations match, then determine which
|
||||
// one to use by applying the configurations' content regexes.
|
||||
else {
|
||||
let file_contents = fs::read_to_string(path)?;
|
||||
let mut best_score = -2isize;
|
||||
let mut best_configuration_id = None;
|
||||
for configuration_id in configuration_ids {
|
||||
let config = &self.language_configurations[*configuration_id];
|
||||
|
||||
// If the language configuration has a content regex, assign
|
||||
// a score based on the length of the first match.
|
||||
let score;
|
||||
if let Some(content_regex) = &config.content_regex {
|
||||
if let Some(mat) = content_regex.find(&file_contents) {
|
||||
score = (mat.end() - mat.start()) as isize;
|
||||
}
|
||||
|
||||
// If the content regex does not match, then *penalize* this
|
||||
// language configuration, so that language configurations
|
||||
// without content regexes are preferred over those with
|
||||
// non-matching content regexes.
|
||||
else {
|
||||
score = -1;
|
||||
}
|
||||
} else {
|
||||
score = 0;
|
||||
}
|
||||
if score > best_score {
|
||||
best_configuration_id = Some(*configuration_id);
|
||||
best_score = score;
|
||||
}
|
||||
}
|
||||
|
||||
configuration = &self.language_configurations[best_configuration_id.unwrap()];
|
||||
}
|
||||
|
||||
let language = self.language_for_id(configuration.language_id)?;
|
||||
return Ok(Some((language, configuration)));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
|
|
@ -126,34 +185,35 @@ impl Loader {
|
|||
) -> Result<Option<(Language, &LanguageConfiguration)>> {
|
||||
let mut best_match_length = 0;
|
||||
let mut best_match_position = None;
|
||||
for (i, repo) in self.language_repos.iter().enumerate() {
|
||||
for (j, configuration) in repo.configurations.iter().enumerate() {
|
||||
if let Some(injection_regex) = &configuration.injection_regex {
|
||||
if let Some(mat) = injection_regex.find(string) {
|
||||
let length = mat.end() - mat.start();
|
||||
if length > best_match_length {
|
||||
best_match_position = Some((i, j));
|
||||
best_match_length = length;
|
||||
}
|
||||
for (i, configuration) in self.language_configurations.iter().enumerate() {
|
||||
if let Some(injection_regex) = &configuration.injection_regex {
|
||||
if let Some(mat) = injection_regex.find(string) {
|
||||
let length = mat.end() - mat.start();
|
||||
if length > best_match_length {
|
||||
best_match_position = Some(i);
|
||||
best_match_length = length;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some((i, j)) = best_match_position {
|
||||
let (language, configurations) = self.language_for_id(i)?;
|
||||
Ok(Some((language, &configurations[j])))
|
||||
|
||||
if let Some(i) = best_match_position {
|
||||
let configuration = &self.language_configurations[i];
|
||||
let language = self.language_for_id(configuration.language_id)?;
|
||||
Ok(Some((language, configuration)))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
fn language_for_id(&self, id: usize) -> Result<(Language, &Vec<LanguageConfiguration>)> {
|
||||
let repo = &self.language_repos[id];
|
||||
let language = repo.language.get_or_try_init(|| {
|
||||
let src_path = repo.path.join("src");
|
||||
self.load_language_at_path(&src_path, &src_path)
|
||||
})?;
|
||||
Ok((*language, &self.language_repos[id].configurations))
|
||||
fn language_for_id(&self, id: usize) -> Result<Language> {
|
||||
let (path, language) = &self.languages_by_id[id];
|
||||
language
|
||||
.get_or_try_init(|| {
|
||||
let src_path = path.join("src");
|
||||
self.load_language_at_path(&src_path, &src_path)
|
||||
})
|
||||
.map(|l| *l)
|
||||
}
|
||||
|
||||
pub fn load_language_at_path(&self, src_path: &Path, header_path: &Path) -> Result<Language> {
|
||||
|
|
@ -278,9 +338,14 @@ impl Loader {
|
|||
Ok(language)
|
||||
}
|
||||
|
||||
fn find_language_at_path<'a>(&'a mut self, parser_path: &Path) -> Result<usize> {
|
||||
fn find_language_configurations_at_path<'a>(
|
||||
&'a mut self,
|
||||
parser_path: &Path,
|
||||
) -> Result<&[LanguageConfiguration]> {
|
||||
#[derive(Deserialize)]
|
||||
struct LanguageConfigurationJSON {
|
||||
#[serde(default)]
|
||||
path: PathBuf,
|
||||
scope: Option<String>,
|
||||
#[serde(rename = "file-types")]
|
||||
file_types: Option<Vec<String>>,
|
||||
|
|
@ -295,57 +360,75 @@ impl Loader {
|
|||
|
||||
#[derive(Deserialize)]
|
||||
struct PackageJSON {
|
||||
#[serde(default)]
|
||||
#[serde(rename = "tree-sitter")]
|
||||
tree_sitter: Option<Vec<LanguageConfigurationJSON>>,
|
||||
tree_sitter: Vec<LanguageConfigurationJSON>,
|
||||
}
|
||||
|
||||
let mut configurations = vec![LanguageConfiguration::default()];
|
||||
let initial_language_configuration_count = self.language_configurations.len();
|
||||
|
||||
if let Ok(package_json_contents) = fs::read_to_string(&parser_path.join("package.json")) {
|
||||
let package_json = serde_json::from_str::<PackageJSON>(&package_json_contents);
|
||||
if let Ok(package_json) = package_json {
|
||||
configurations = package_json
|
||||
.tree_sitter
|
||||
.map_or(Vec::new(), |configurations| {
|
||||
configurations
|
||||
.into_iter()
|
||||
.map(|conf| LanguageConfiguration {
|
||||
scope: conf.scope,
|
||||
file_types: conf.file_types.unwrap_or(Vec::new()),
|
||||
_content_regex: conf.content_regex.and_then(|r| {
|
||||
RegexBuilder::new(&r).multi_line(true).build().ok()
|
||||
}),
|
||||
_first_line_regex: conf.first_line_regex.and_then(|r| {
|
||||
RegexBuilder::new(&r).multi_line(true).build().ok()
|
||||
}),
|
||||
injection_regex: conf.injection_regex.and_then(|r| {
|
||||
RegexBuilder::new(&r).multi_line(true).build().ok()
|
||||
}),
|
||||
highlight_property_sheet_path: conf
|
||||
.highlights
|
||||
.map(|h| parser_path.join(h)),
|
||||
highlight_property_sheet: OnceCell::new(),
|
||||
})
|
||||
.collect()
|
||||
if package_json.tree_sitter.is_empty() {
|
||||
return Ok(&[]);
|
||||
}
|
||||
|
||||
let language_count = self.languages_by_id.len();
|
||||
for config_json in package_json.tree_sitter {
|
||||
// Determine the path to the parser directory. This can be specified in
|
||||
// the package.json, but defaults to the directory containing the package.json.
|
||||
let language_path = parser_path.join(config_json.path);
|
||||
|
||||
// Determine if a previous language configuration in this package.json file
|
||||
// already uses the same language.
|
||||
let mut language_id = None;
|
||||
for (id, (path, _)) in
|
||||
self.languages_by_id.iter().enumerate().skip(language_count)
|
||||
{
|
||||
if language_path == *path {
|
||||
language_id = Some(id);
|
||||
}
|
||||
}
|
||||
|
||||
// If not, add a new language path to the list.
|
||||
let language_id = language_id.unwrap_or_else(|| {
|
||||
self.languages_by_id.push((language_path, OnceCell::new()));
|
||||
self.languages_by_id.len() - 1
|
||||
});
|
||||
|
||||
for (i, configuration) in configurations.iter().enumerate() {
|
||||
let configuration = LanguageConfiguration {
|
||||
scope: config_json.scope,
|
||||
language_id,
|
||||
file_types: config_json.file_types.unwrap_or(Vec::new()),
|
||||
content_regex: config_json
|
||||
.content_regex
|
||||
.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
|
||||
_first_line_regex: config_json
|
||||
.first_line_regex
|
||||
.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
|
||||
injection_regex: config_json
|
||||
.injection_regex
|
||||
.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
|
||||
highlight_property_sheet_path: config_json
|
||||
.highlights
|
||||
.map(|h| parser_path.join(h)),
|
||||
highlight_property_sheet: OnceCell::new(),
|
||||
};
|
||||
|
||||
for file_type in &configuration.file_types {
|
||||
self.language_configuration_ids_by_file_type
|
||||
.entry(file_type.to_string())
|
||||
.or_insert(Vec::new())
|
||||
.push((self.language_repos.len(), i));
|
||||
.push(self.language_configurations.len());
|
||||
}
|
||||
|
||||
self.language_configurations.push(configuration);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.language_repos.push(LanguageRepo {
|
||||
path: parser_path.to_owned(),
|
||||
language: OnceCell::new(),
|
||||
configurations,
|
||||
});
|
||||
|
||||
Ok(self.language_repos.len() - 1)
|
||||
Ok(&self.language_configurations[initial_language_configuration_count..])
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
use clap::{App, AppSettings, Arg, SubCommand};
|
||||
use error::Error;
|
||||
use std::{env, fs, u64};
|
||||
use std::path::Path;
|
||||
use std::process::exit;
|
||||
use std::{env, fs, u64};
|
||||
use tree_sitter_cli::{
|
||||
config, error, generate, highlight, loader, logger, parse, test, wasm, web_ui,
|
||||
};
|
||||
|
|
@ -102,6 +102,10 @@ fn run() -> error::Result<()> {
|
|||
.subcommand(
|
||||
SubCommand::with_name("web-ui").about("Test a parser interactively in the browser"),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("dump-languages")
|
||||
.about("Print info about all known language parsers"),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let home_dir = dirs::home_dir().expect("Failed to read home directory");
|
||||
|
|
@ -124,8 +128,8 @@ fn run() -> error::Result<()> {
|
|||
let debug_graph = matches.is_present("debug-graph");
|
||||
let filter = matches.value_of("filter");
|
||||
let corpus_path = current_dir.join("corpus");
|
||||
if let Some(language) = loader.language_at_path(¤t_dir)? {
|
||||
test::run_tests_at_path(language, &corpus_path, debug, debug_graph, filter)?;
|
||||
if let Some(language) = loader.languages_at_path(¤t_dir)?.first() {
|
||||
test::run_tests_at_path(*language, &corpus_path, debug, debug_graph, filter)?;
|
||||
} else {
|
||||
eprintln!("No language found");
|
||||
}
|
||||
|
|
@ -173,12 +177,13 @@ fn run() -> error::Result<()> {
|
|||
}))?
|
||||
{
|
||||
lang
|
||||
} else if let Some(lang) =
|
||||
loader
|
||||
.language_at_path(¤t_dir)
|
||||
.map_err(Error::wrap(|| {
|
||||
"Failed to load language in current directory"
|
||||
}))?
|
||||
} else if let Some(lang) = loader
|
||||
.languages_at_path(¤t_dir)
|
||||
.map_err(Error::wrap(|| {
|
||||
"Failed to load language in current directory"
|
||||
}))?
|
||||
.first()
|
||||
.cloned()
|
||||
{
|
||||
lang
|
||||
} else {
|
||||
|
|
@ -251,6 +256,19 @@ fn run() -> error::Result<()> {
|
|||
wasm::compile_language_to_wasm(&grammar_path, matches.is_present("docker"))?;
|
||||
} else if matches.subcommand_matches("web-ui").is_some() {
|
||||
web_ui::serve(¤t_dir);
|
||||
} else if matches.subcommand_matches("dump-languages").is_some() {
|
||||
loader.find_all_languages(&config.parser_directories)?;
|
||||
for (configuration, language_path) in loader.get_all_language_configurations() {
|
||||
println!(
|
||||
"scope: {}\nparser: {:?}\nproperties: {:?}\nfile_types: {:?}\ncontent_regex: {:?}\ninjection_regex: {:?}\n",
|
||||
configuration.scope.as_ref().unwrap_or(&String::new()),
|
||||
language_path,
|
||||
configuration.highlight_property_sheet_path,
|
||||
configuration.file_types,
|
||||
configuration.content_regex,
|
||||
configuration.injection_regex,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ use std::{fmt, ptr, slice, str, u16};
|
|||
pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION;
|
||||
pub const PARSER_HEADER: &'static str = include_str!("../include/tree_sitter/parser.h");
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||
#[repr(transparent)]
|
||||
pub struct Language(*const ffi::TSLanguage);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue