From aac741dfd1e0825d17749a9aec756d2d6ac60e6a Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Sat, 12 Oct 2024 00:57:51 -0400 Subject: [PATCH] perf(loader): improve language lookup speed (cherry picked from commit 72f114fa126bb43472549f9a1e6f2b4d7b6bfb75) --- Cargo.lock | 1 + cli/loader/Cargo.toml | 1 + cli/loader/src/lib.rs | 89 +++++++++++++++++++++---------------------- cli/src/init.rs | 30 +++++++-------- cli/src/main.rs | 8 ++-- 5 files changed, 63 insertions(+), 66 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0b8fd339..3d52abb9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1654,6 +1654,7 @@ dependencies = [ "dirs", "fs4", "indoc", + "lazy_static", "libloading", "once_cell", "path-slash", diff --git a/cli/loader/Cargo.toml b/cli/loader/Cargo.toml index ec2b35d1..65025127 100644 --- a/cli/loader/Cargo.toml +++ b/cli/loader/Cargo.toml @@ -24,6 +24,7 @@ cc.workspace = true dirs.workspace = true fs4.workspace = true indoc.workspace = true +lazy_static.workspace = true libloading.workspace = true once_cell.workspace = true path-slash.workspace = true diff --git a/cli/loader/src/lib.rs b/cli/loader/src/lib.rs index f07f5eac..1eb094c3 100644 --- a/cli/loader/src/lib.rs +++ b/cli/loader/src/lib.rs @@ -21,6 +21,7 @@ use anyhow::Error; use anyhow::{anyhow, Context, Result}; use fs4::fs_std::FileExt; use indoc::indoc; +use lazy_static::lazy_static; use libloading::{Library, Symbol}; use once_cell::unsync::OnceCell; use path_slash::PathBufExt as _; @@ -38,6 +39,10 @@ use tree_sitter_highlight::HighlightConfiguration; use tree_sitter_tags::{Error as TagsError, TagsConfiguration}; use url::Url; +lazy_static! { + static ref GRAMMAR_NAME_REGEX: Regex = Regex::new(r#""name":\s*"(.*?)""#).unwrap(); +} + pub const EMSCRIPTEN_TAG: &str = concat!("docker.io/emscripten/emsdk:", env!("EMSCRIPTEN_VERSION")); #[derive(Default, Deserialize, Serialize)] @@ -142,11 +147,7 @@ pub struct TreeSitterJSON { impl TreeSitterJSON { pub fn from_file(path: &Path) -> Option { - if let Ok(file) = fs::File::open(path.join("tree-sitter.json")) { - Some(serde_json::from_reader(file).ok()?) - } else { - None - } + serde_json::from_str(&fs::read_to_string(path.join("tree-sitter.json")).ok()?).ok() } pub fn has_multiple_language_configs(&self) -> bool { @@ -192,7 +193,6 @@ pub struct Metadata { pub authors: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub links: Option, - // #[serde(skip_serializing_if = "Option::is_none")] #[serde(skip)] pub namespace: Option, } @@ -635,27 +635,7 @@ impl Loader { pub fn load_language_at_path(&self, mut config: CompileConfig) -> Result { let grammar_path = config.src_path.join("grammar.json"); - - #[derive(Deserialize)] - struct GrammarJSON { - name: String, - } - let mut grammar_file = fs::File::open(&grammar_path).with_context(|| { - format!( - "Failed to read grammar.json file at the following path:\n{:?}", - &grammar_path - ) - })?; - let grammar_json: GrammarJSON = serde_json::from_reader(BufReader::new(&mut grammar_file)) - .with_context(|| { - format!( - "Failed to parse grammar.json file at the following path:\n{:?}", - &grammar_path - ) - })?; - - config.name = grammar_json.name; - + config.name = Self::grammar_json_name(&grammar_path)?; self.load_language_at_path_with_name(config) } @@ -1132,11 +1112,6 @@ impl Loader { parser_path: &Path, set_current_path_config: bool, ) -> Result<&[LanguageConfiguration]> { - #[derive(Deserialize)] - struct GrammarJSON { - name: String, - } - let initial_language_configuration_count = self.language_configurations.len(); if let Some(config) = TreeSitterJSON::from_file(parser_path) { @@ -1147,13 +1122,6 @@ impl Loader { // package.json. let language_path = parser_path.join(grammar.path); - let grammar_path = language_path.join("src").join("grammar.json"); - let mut grammar_file = - fs::File::open(grammar_path).with_context(|| "Failed to read grammar.json")?; - let grammar_json: GrammarJSON = - serde_json::from_reader(BufReader::new(&mut grammar_file)) - .with_context(|| "Failed to parse grammar.json")?; - // Determine if a previous language configuration in this package.json file // already uses the same language. let mut language_id = None; @@ -1191,7 +1159,7 @@ impl Loader { let configuration = LanguageConfiguration { root_path: parser_path.to_path_buf(), - language_name: grammar_json.name, + language_name: grammar.name, scope: Some(grammar.scope), language_id, file_types: grammar.file_types.unwrap_or_default(), @@ -1239,18 +1207,17 @@ impl Loader { } } + // If we didn't find any language configurations in the tree-sitter.json file, + // but there is a grammar.json file, then use the grammar file to form a simple + // language configuration. if self.language_configurations.len() == initial_language_configuration_count && parser_path.join("src").join("grammar.json").exists() { let grammar_path = parser_path.join("src").join("grammar.json"); - let mut grammar_file = - fs::File::open(grammar_path).with_context(|| "Failed to read grammar.json")?; - let grammar_json: GrammarJSON = - serde_json::from_reader(BufReader::new(&mut grammar_file)) - .with_context(|| "Failed to parse grammar.json")?; + let language_name = Self::grammar_json_name(&grammar_path)?; let configuration = LanguageConfiguration { root_path: parser_path.to_owned(), - language_name: grammar_json.name, + language_name, language_id: self.languages_by_id.len(), file_types: Vec::new(), scope: None, @@ -1286,6 +1253,36 @@ impl Loader { pattern.and_then(|r| RegexBuilder::new(r).multi_line(true).build().ok()) } + fn grammar_json_name(grammar_path: &Path) -> Result { + let file = fs::File::open(grammar_path).with_context(|| { + format!("Failed to open grammar.json at {}", grammar_path.display()) + })?; + + let first_three_lines = BufReader::new(file) + .lines() + .take(3) + .collect::, _>>() + .with_context(|| { + format!( + "Failed to read the first three lines of grammar.json at {}", + grammar_path.display() + ) + })? + .join("\n"); + + let name = GRAMMAR_NAME_REGEX + .captures(&first_three_lines) + .and_then(|c| c.get(1)) + .ok_or_else(|| { + anyhow!( + "Failed to parse the language name from grammar.json at {}", + grammar_path.display() + ) + })?; + + Ok(name.as_str().to_string()) + } + pub fn select_language( &mut self, path: &Path, diff --git a/cli/src/init.rs b/cli/src/init.rs index 6294f2e1..724583d3 100644 --- a/cli/src/init.rs +++ b/cli/src/init.rs @@ -1,6 +1,5 @@ use std::{ - fs::{self, File}, - io::BufReader, + fs, path::{Path, PathBuf}, str::{self, FromStr}, }; @@ -211,9 +210,9 @@ pub fn migrate_package_json(repo_path: &Path) -> Result { root_path.join("tree-sitter.json"), ); - let old_config = serde_json::from_reader::<_, PackageJSON>( - File::open(&package_json_path) - .with_context(|| format!("Failed to open package.json in {}", root_path.display()))?, + let old_config = serde_json::from_str::( + &fs::read_to_string(&package_json_path) + .with_context(|| format!("Failed to read package.json in {}", root_path.display()))?, )?; if old_config.tree_sitter.is_none() { @@ -339,9 +338,9 @@ pub fn migrate_package_json(repo_path: &Path) -> Result { )?; // Remove the `tree-sitter` field in-place - let mut package_json = serde_json::from_reader::<_, Map>( - File::open(&package_json_path) - .with_context(|| format!("Failed to open package.json in {}", root_path.display()))?, + let mut package_json = serde_json::from_str::>( + &fs::read_to_string(&package_json_path) + .with_context(|| format!("Failed to read package.json in {}", root_path.display()))?, ) .unwrap(); package_json.remove("tree-sitter"); @@ -388,9 +387,9 @@ pub fn generate_grammar_files( }, )?; - let tree_sitter_config = serde_json::from_reader::<_, TreeSitterJSON>( - File::open(tree_sitter_config.as_path()) - .with_context(|| "Failed to open tree-sitter.json")?, + let tree_sitter_config = serde_json::from_str::( + &fs::read_to_string(tree_sitter_config.as_path()) + .with_context(|| "Failed to read tree-sitter.json")?, )?; let authors = tree_sitter_config.metadata.authors.as_ref(); @@ -671,15 +670,14 @@ pub fn get_root_path(path: &Path) -> Result { let json = pathbuf .exists() .then(|| { - let file = File::open(pathbuf.as_path()) - .with_context(|| format!("Failed to open {filename}"))?; - let reader = BufReader::new(file); + let contents = fs::read_to_string(pathbuf.as_path()) + .with_context(|| format!("Failed to read {filename}"))?; if is_package_json { - serde_json::from_reader::<_, Map>(reader) + serde_json::from_str::>(&contents) .context(format!("Failed to parse {filename}")) .map(|v| v.contains_key("tree-sitter")) } else { - serde_json::from_reader::<_, TreeSitterJSON>(reader) + serde_json::from_str::(&contents) .context(format!("Failed to parse {filename}")) .map(|_| true) } diff --git a/cli/src/main.rs b/cli/src/main.rs index 1758fada..4926f256 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -653,11 +653,11 @@ impl Init { (opts.name.clone(), Some(opts)) } else { - let json = serde_json::from_reader::<_, TreeSitterJSON>( - fs::File::open(current_dir.join("tree-sitter.json")) - .with_context(|| "Failed to open tree-sitter.json")?, + let mut json = serde_json::from_str::( + &fs::read_to_string(current_dir.join("tree-sitter.json")) + .with_context(|| "Failed to read tree-sitter.json")?, )?; - (json.grammars[0].name.clone(), None) + (json.grammars.swap_remove(0).name, None) }; generate_grammar_files(current_dir, &language_name, self.update, json_config_opts)?;