perf(loader): improve language lookup speed

(cherry picked from commit 72f114fa12)
This commit is contained in:
Amaan Qureshi 2024-10-12 00:57:51 -04:00
parent 4cf96126d4
commit aac741dfd1
5 changed files with 63 additions and 66 deletions

1
Cargo.lock generated
View file

@ -1654,6 +1654,7 @@ dependencies = [
"dirs",
"fs4",
"indoc",
"lazy_static",
"libloading",
"once_cell",
"path-slash",

View file

@ -24,6 +24,7 @@ cc.workspace = true
dirs.workspace = true
fs4.workspace = true
indoc.workspace = true
lazy_static.workspace = true
libloading.workspace = true
once_cell.workspace = true
path-slash.workspace = true

View file

@ -21,6 +21,7 @@ use anyhow::Error;
use anyhow::{anyhow, Context, Result};
use fs4::fs_std::FileExt;
use indoc::indoc;
use lazy_static::lazy_static;
use libloading::{Library, Symbol};
use once_cell::unsync::OnceCell;
use path_slash::PathBufExt as _;
@ -38,6 +39,10 @@ use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
use url::Url;
lazy_static! {
static ref GRAMMAR_NAME_REGEX: Regex = Regex::new(r#""name":\s*"(.*?)""#).unwrap();
}
pub const EMSCRIPTEN_TAG: &str = concat!("docker.io/emscripten/emsdk:", env!("EMSCRIPTEN_VERSION"));
#[derive(Default, Deserialize, Serialize)]
@ -142,11 +147,7 @@ pub struct TreeSitterJSON {
impl TreeSitterJSON {
pub fn from_file(path: &Path) -> Option<Self> {
if let Ok(file) = fs::File::open(path.join("tree-sitter.json")) {
Some(serde_json::from_reader(file).ok()?)
} else {
None
}
serde_json::from_str(&fs::read_to_string(path.join("tree-sitter.json")).ok()?).ok()
}
pub fn has_multiple_language_configs(&self) -> bool {
@ -192,7 +193,6 @@ pub struct Metadata {
pub authors: Option<Vec<Author>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub links: Option<Links>,
// #[serde(skip_serializing_if = "Option::is_none")]
#[serde(skip)]
pub namespace: Option<String>,
}
@ -635,27 +635,7 @@ impl Loader {
pub fn load_language_at_path(&self, mut config: CompileConfig) -> Result<Language> {
let grammar_path = config.src_path.join("grammar.json");
#[derive(Deserialize)]
struct GrammarJSON {
name: String,
}
let mut grammar_file = fs::File::open(&grammar_path).with_context(|| {
format!(
"Failed to read grammar.json file at the following path:\n{:?}",
&grammar_path
)
})?;
let grammar_json: GrammarJSON = serde_json::from_reader(BufReader::new(&mut grammar_file))
.with_context(|| {
format!(
"Failed to parse grammar.json file at the following path:\n{:?}",
&grammar_path
)
})?;
config.name = grammar_json.name;
config.name = Self::grammar_json_name(&grammar_path)?;
self.load_language_at_path_with_name(config)
}
@ -1132,11 +1112,6 @@ impl Loader {
parser_path: &Path,
set_current_path_config: bool,
) -> Result<&[LanguageConfiguration]> {
#[derive(Deserialize)]
struct GrammarJSON {
name: String,
}
let initial_language_configuration_count = self.language_configurations.len();
if let Some(config) = TreeSitterJSON::from_file(parser_path) {
@ -1147,13 +1122,6 @@ impl Loader {
// package.json.
let language_path = parser_path.join(grammar.path);
let grammar_path = language_path.join("src").join("grammar.json");
let mut grammar_file =
fs::File::open(grammar_path).with_context(|| "Failed to read grammar.json")?;
let grammar_json: GrammarJSON =
serde_json::from_reader(BufReader::new(&mut grammar_file))
.with_context(|| "Failed to parse grammar.json")?;
// Determine if a previous language configuration in this package.json file
// already uses the same language.
let mut language_id = None;
@ -1191,7 +1159,7 @@ impl Loader {
let configuration = LanguageConfiguration {
root_path: parser_path.to_path_buf(),
language_name: grammar_json.name,
language_name: grammar.name,
scope: Some(grammar.scope),
language_id,
file_types: grammar.file_types.unwrap_or_default(),
@ -1239,18 +1207,17 @@ impl Loader {
}
}
// If we didn't find any language configurations in the tree-sitter.json file,
// but there is a grammar.json file, then use the grammar file to form a simple
// language configuration.
if self.language_configurations.len() == initial_language_configuration_count
&& parser_path.join("src").join("grammar.json").exists()
{
let grammar_path = parser_path.join("src").join("grammar.json");
let mut grammar_file =
fs::File::open(grammar_path).with_context(|| "Failed to read grammar.json")?;
let grammar_json: GrammarJSON =
serde_json::from_reader(BufReader::new(&mut grammar_file))
.with_context(|| "Failed to parse grammar.json")?;
let language_name = Self::grammar_json_name(&grammar_path)?;
let configuration = LanguageConfiguration {
root_path: parser_path.to_owned(),
language_name: grammar_json.name,
language_name,
language_id: self.languages_by_id.len(),
file_types: Vec::new(),
scope: None,
@ -1286,6 +1253,36 @@ impl Loader {
pattern.and_then(|r| RegexBuilder::new(r).multi_line(true).build().ok())
}
fn grammar_json_name(grammar_path: &Path) -> Result<String> {
let file = fs::File::open(grammar_path).with_context(|| {
format!("Failed to open grammar.json at {}", grammar_path.display())
})?;
let first_three_lines = BufReader::new(file)
.lines()
.take(3)
.collect::<Result<Vec<_>, _>>()
.with_context(|| {
format!(
"Failed to read the first three lines of grammar.json at {}",
grammar_path.display()
)
})?
.join("\n");
let name = GRAMMAR_NAME_REGEX
.captures(&first_three_lines)
.and_then(|c| c.get(1))
.ok_or_else(|| {
anyhow!(
"Failed to parse the language name from grammar.json at {}",
grammar_path.display()
)
})?;
Ok(name.as_str().to_string())
}
pub fn select_language(
&mut self,
path: &Path,

View file

@ -1,6 +1,5 @@
use std::{
fs::{self, File},
io::BufReader,
fs,
path::{Path, PathBuf},
str::{self, FromStr},
};
@ -211,9 +210,9 @@ pub fn migrate_package_json(repo_path: &Path) -> Result<bool> {
root_path.join("tree-sitter.json"),
);
let old_config = serde_json::from_reader::<_, PackageJSON>(
File::open(&package_json_path)
.with_context(|| format!("Failed to open package.json in {}", root_path.display()))?,
let old_config = serde_json::from_str::<PackageJSON>(
&fs::read_to_string(&package_json_path)
.with_context(|| format!("Failed to read package.json in {}", root_path.display()))?,
)?;
if old_config.tree_sitter.is_none() {
@ -339,9 +338,9 @@ pub fn migrate_package_json(repo_path: &Path) -> Result<bool> {
)?;
// Remove the `tree-sitter` field in-place
let mut package_json = serde_json::from_reader::<_, Map<String, Value>>(
File::open(&package_json_path)
.with_context(|| format!("Failed to open package.json in {}", root_path.display()))?,
let mut package_json = serde_json::from_str::<Map<String, Value>>(
&fs::read_to_string(&package_json_path)
.with_context(|| format!("Failed to read package.json in {}", root_path.display()))?,
)
.unwrap();
package_json.remove("tree-sitter");
@ -388,9 +387,9 @@ pub fn generate_grammar_files(
},
)?;
let tree_sitter_config = serde_json::from_reader::<_, TreeSitterJSON>(
File::open(tree_sitter_config.as_path())
.with_context(|| "Failed to open tree-sitter.json")?,
let tree_sitter_config = serde_json::from_str::<TreeSitterJSON>(
&fs::read_to_string(tree_sitter_config.as_path())
.with_context(|| "Failed to read tree-sitter.json")?,
)?;
let authors = tree_sitter_config.metadata.authors.as_ref();
@ -671,15 +670,14 @@ pub fn get_root_path(path: &Path) -> Result<PathBuf> {
let json = pathbuf
.exists()
.then(|| {
let file = File::open(pathbuf.as_path())
.with_context(|| format!("Failed to open {filename}"))?;
let reader = BufReader::new(file);
let contents = fs::read_to_string(pathbuf.as_path())
.with_context(|| format!("Failed to read {filename}"))?;
if is_package_json {
serde_json::from_reader::<_, Map<String, Value>>(reader)
serde_json::from_str::<Map<String, Value>>(&contents)
.context(format!("Failed to parse {filename}"))
.map(|v| v.contains_key("tree-sitter"))
} else {
serde_json::from_reader::<_, TreeSitterJSON>(reader)
serde_json::from_str::<TreeSitterJSON>(&contents)
.context(format!("Failed to parse {filename}"))
.map(|_| true)
}

View file

@ -653,11 +653,11 @@ impl Init {
(opts.name.clone(), Some(opts))
} else {
let json = serde_json::from_reader::<_, TreeSitterJSON>(
fs::File::open(current_dir.join("tree-sitter.json"))
.with_context(|| "Failed to open tree-sitter.json")?,
let mut json = serde_json::from_str::<TreeSitterJSON>(
&fs::read_to_string(current_dir.join("tree-sitter.json"))
.with_context(|| "Failed to read tree-sitter.json")?,
)?;
(json.grammars[0].name.clone(), None)
(json.grammars.swap_remove(0).name, None)
};
generate_grammar_files(current_dir, &language_name, self.update, json_config_opts)?;