From 867433afd7726c331a451e4450f36359260a25f0 Mon Sep 17 00:00:00 2001 From: WillLillis Date: Tue, 24 Dec 2024 19:16:19 -0500 Subject: [PATCH] feat(rust): use `thiserror` for `generate` crate Co-authored-by: Amaan Qureshi --- Cargo.lock | 1 + cli/generate/Cargo.toml | 1 + .../src/build_tables/build_parse_table.rs | 392 ++++++++++++------ cli/generate/src/build_tables/mod.rs | 5 +- cli/generate/src/lib.rs | 179 ++++++-- cli/generate/src/node_types.rs | 21 +- cli/generate/src/parse_grammar.rs | 50 ++- .../src/prepare_grammar/expand_tokens.rs | 115 ++++- .../src/prepare_grammar/extract_tokens.rs | 73 ++-- .../src/prepare_grammar/flatten_grammar.rs | 54 ++- .../src/prepare_grammar/intern_symbols.rs | 40 +- cli/generate/src/prepare_grammar/mod.rs | 87 +++- .../src/prepare_grammar/process_inlines.rs | 39 +- cli/generate/src/rules.rs | 15 +- cli/src/init.rs | 21 +- cli/src/main.rs | 55 ++- cli/src/parse.rs | 44 +- 17 files changed, 821 insertions(+), 371 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e054afb2..c52f7a5a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1870,6 +1870,7 @@ dependencies = [ "serde", "serde_json", "smallbitvec", + "thiserror 2.0.9", "tree-sitter", "url", ] diff --git a/cli/generate/Cargo.toml b/cli/generate/Cargo.toml index 8f374ae1..e1170891 100644 --- a/cli/generate/Cargo.toml +++ b/cli/generate/Cargo.toml @@ -29,6 +29,7 @@ semver.workspace = true serde.workspace = true serde_json.workspace = true smallbitvec.workspace = true +thiserror.workspace = true url.workspace = true tree-sitter.workspace = true diff --git a/cli/generate/src/build_tables/build_parse_table.rs b/cli/generate/src/build_tables/build_parse_table.rs index 5e56b9fa..403fa57f 100644 --- a/cli/generate/src/build_tables/build_parse_table.rs +++ b/cli/generate/src/build_tables/build_parse_table.rs @@ -1,13 +1,13 @@ use std::{ cmp::Ordering, - collections::{BTreeMap, HashMap, HashSet, VecDeque}, - fmt::Write, + collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque}, hash::BuildHasherDefault, }; -use anyhow::{anyhow, Result}; use indexmap::{map::Entry, IndexMap}; use rustc_hash::FxHasher; +use serde::Serialize; +use thiserror::Error; use super::{ item::{ParseItem, ParseItemSet, ParseItemSetCore, ParseItemSetEntry}, @@ -64,6 +64,176 @@ struct ParseTableBuilder<'a> { parse_table: ParseTable, } +pub type BuildTableResult = Result; + +#[derive(Debug, Error, Serialize)] +pub enum ParseTableBuilderError { + #[error("Unresolved conflict for symbol sequence:\n\n{0}")] + Conflict(#[from] ConflictError), + #[error("Extra rules must have unambiguous endings. Conflicting rules: {0}")] + AmbiguousExtra(#[from] AmbiguousExtraError), +} + +#[derive(Default, Debug, Serialize)] +pub struct ConflictError { + pub symbol_sequence: Vec, + pub conflicting_lookahead: String, + pub possible_interpretations: Vec, + pub possible_resolutions: Vec, +} + +#[derive(Default, Debug, Serialize)] +pub struct Interpretation { + pub preceding_symbols: Vec, + pub variable_name: String, + pub production_step_symbols: Vec, + pub step_index: u32, + pub done: bool, + pub conflicting_lookahead: String, + pub precedence: Option, + pub associativity: Option, +} + +#[derive(Debug, Serialize)] +pub enum Resolution { + Precedence { symbols: Vec }, + Associativity { symbols: Vec }, + AddConflict { symbols: Vec }, +} + +#[derive(Debug, Serialize)] +pub struct AmbiguousExtraError { + pub parent_symbols: Vec, +} + +impl std::fmt::Display for ConflictError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + for symbol in &self.symbol_sequence { + write!(f, " {symbol}")?; + } + writeln!(f, " • {} …\n", self.conflicting_lookahead)?; + + writeln!(f, "Possible interpretations:\n")?; + let mut interpretations = self + .possible_interpretations + .iter() + .map(|i| { + let line = i.to_string(); + let prec_line = if let (Some(precedence), Some(associativity)) = + (&i.precedence, &i.associativity) + { + Some(format!( + "(precedence: {precedence}, associativity: {associativity})", + )) + } else { + i.precedence + .as_ref() + .map(|precedence| format!("(precedence: {precedence})")) + }; + + (line, prec_line) + }) + .collect::>(); + let max_interpretation_length = interpretations + .iter() + .map(|i| i.0.chars().count()) + .max() + .unwrap(); + interpretations.sort_unstable(); + for (i, (line, prec_suffix)) in interpretations.into_iter().enumerate() { + write!(f, " {}:", i + 1).unwrap(); + write!(f, "{line}")?; + if let Some(prec_suffix) = prec_suffix { + write!( + f, + "{:1$}", + "", + max_interpretation_length.saturating_sub(line.chars().count()) + 2 + )?; + write!(f, "{prec_suffix}")?; + } + writeln!(f)?; + } + + writeln!(f, "\nPossible resolutions:\n")?; + for (i, resolution) in self.possible_resolutions.iter().enumerate() { + writeln!(f, " {}: {resolution}", i + 1)?; + } + Ok(()) + } +} + +impl std::fmt::Display for Interpretation { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + for symbol in &self.preceding_symbols { + write!(f, " {symbol}")?; + } + write!(f, " ({}", self.variable_name)?; + for (i, symbol) in self.production_step_symbols.iter().enumerate() { + if i == self.step_index as usize { + write!(f, " •")?; + } + write!(f, " {symbol}")?; + } + write!(f, ")")?; + if self.done { + write!(f, " • {} …", self.conflicting_lookahead)?; + } + Ok(()) + } +} + +impl std::fmt::Display for Resolution { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::Precedence { symbols } => { + write!(f, "Specify a higher precedence in ")?; + for (i, symbol) in symbols.iter().enumerate() { + if i > 0 { + write!(f, " and ")?; + } + write!(f, "`{symbol}`")?; + } + write!(f, " than in the other rules.")?; + } + Self::Associativity { symbols } => { + write!(f, "Specify a left or right associativity in ")?; + for (i, symbol) in symbols.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "`{symbol}`")?; + } + } + Self::AddConflict { symbols } => { + write!(f, "Add a conflict for these rules: ")?; + for (i, symbol) in symbols.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "`{symbol}`")?; + } + } + } + Ok(()) + } +} + +impl std::fmt::Display for AmbiguousExtraError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + for (i, symbol) in self.parent_symbols.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{symbol}")?; + } + Ok(()) + } +} + +impl std::error::Error for ConflictError {} +impl std::error::Error for AmbiguousExtraError {} + impl<'a> ParseTableBuilder<'a> { fn new( syntax_grammar: &'a SyntaxGrammar, @@ -92,7 +262,7 @@ impl<'a> ParseTableBuilder<'a> { } } - fn build(mut self) -> Result<(ParseTable, Vec>)> { + fn build(mut self) -> BuildTableResult<(ParseTable, Vec>)> { // Ensure that the empty alias sequence has index 0. self.parse_table .production_infos @@ -222,7 +392,7 @@ impl<'a> ParseTableBuilder<'a> { mut preceding_auxiliary_symbols: AuxiliarySymbolSequence, state_id: ParseStateId, item_set: &ParseItemSet<'a>, - ) -> Result<()> { + ) -> BuildTableResult<()> { let mut terminal_successors = BTreeMap::new(); let mut non_terminal_successors = BTreeMap::new(); let mut lookaheads_with_conflicts = TokenSet::new(); @@ -426,15 +596,18 @@ impl<'a> ParseTableBuilder<'a> { } }) .collect::>(); - let mut message = - "Extra rules must have unambiguous endings. Conflicting rules: ".to_string(); - for (i, variable_index) in parent_symbols.iter().enumerate() { - if i > 0 { - message += ", "; - } - message += &self.syntax_grammar.variables[*variable_index as usize].name; - } - return Err(anyhow!(message)); + let parent_symbol_names = parent_symbols + .iter() + .map(|&variable_index| { + self.syntax_grammar.variables[variable_index as usize] + .name + .clone() + }) + .collect::>(); + + Err(AmbiguousExtraError { + parent_symbols: parent_symbol_names, + })?; } } // Add actions for the start tokens of each non-terminal extra rule. @@ -507,7 +680,7 @@ impl<'a> ParseTableBuilder<'a> { preceding_auxiliary_symbols: &[AuxiliarySymbolInfo], conflicting_lookahead: Symbol, reduction_info: &ReductionInfo, - ) -> Result<()> { + ) -> BuildTableResult<()> { let entry = self.parse_table.states[state_id] .terminal_entries .get_mut(&conflicting_lookahead) @@ -521,7 +694,7 @@ impl<'a> ParseTableBuilder<'a> { // precedence, and there can still be SHIFT/REDUCE conflicts. let mut considered_associativity = false; let mut shift_precedence = Vec::<(&Precedence, Symbol)>::new(); - let mut conflicting_items = HashSet::new(); + let mut conflicting_items = BTreeSet::new(); for ParseItemSetEntry { item, lookaheads, .. } in &item_set.entries @@ -662,93 +835,55 @@ impl<'a> ParseTableBuilder<'a> { return Ok(()); } - let mut msg = "Unresolved conflict for symbol sequence:\n\n".to_string(); + let mut conflict_error = ConflictError::default(); for symbol in preceding_symbols { - write!(&mut msg, " {}", self.symbol_name(symbol)).unwrap(); + conflict_error + .symbol_sequence + .push(self.symbol_name(symbol).to_string()); } + conflict_error.conflicting_lookahead = self.symbol_name(&conflicting_lookahead).to_string(); - writeln!( - &mut msg, - " • {} …\n", - self.symbol_name(&conflicting_lookahead) - ) - .unwrap(); - writeln!(&mut msg, "Possible interpretations:\n").unwrap(); - - let mut interpretations = conflicting_items + let interpretations = conflicting_items .iter() .map(|item| { - let mut line = String::new(); - for preceding_symbol in preceding_symbols + let preceding_symbols = preceding_symbols .iter() .take(preceding_symbols.len() - item.step_index as usize) - { - write!(&mut line, " {}", self.symbol_name(preceding_symbol)).unwrap(); - } + .map(|symbol| self.symbol_name(symbol).to_string()) + .collect::>(); - write!( - &mut line, - " ({}", - &self.syntax_grammar.variables[item.variable_index as usize].name - ) - .unwrap(); + let variable_name = self.syntax_grammar.variables[item.variable_index as usize] + .name + .clone(); - for (j, step) in item.production.steps.iter().enumerate() { - if j as u32 == item.step_index { - write!(&mut line, " •").unwrap(); - } - write!(&mut line, " {}", self.symbol_name(&step.symbol)).unwrap(); - } + let production_step_symbols = item + .production + .steps + .iter() + .map(|step| self.symbol_name(&step.symbol).to_string()) + .collect::>(); - write!(&mut line, ")").unwrap(); - - if item.is_done() { - write!( - &mut line, - " • {} …", - self.symbol_name(&conflicting_lookahead) - ) - .unwrap(); - } - - let precedence = item.precedence(); - let associativity = item.associativity(); - - let prec_line = if let Some(associativity) = associativity { - Some(format!( - "(precedence: {precedence}, associativity: {associativity:?})", - )) - } else if !precedence.is_none() { - Some(format!("(precedence: {precedence})")) - } else { - None + let precedence = match item.precedence() { + Precedence::None => None, + _ => Some(item.precedence().to_string()), }; - (line, prec_line) + let associativity = item.associativity().map(|assoc| format!("{assoc:?}")); + + Interpretation { + preceding_symbols, + variable_name, + production_step_symbols, + step_index: item.step_index, + done: item.is_done(), + conflicting_lookahead: self.symbol_name(&conflicting_lookahead).to_string(), + precedence, + associativity, + } }) .collect::>(); + conflict_error.possible_interpretations = interpretations; - let max_interpretation_length = interpretations - .iter() - .map(|i| i.0.chars().count()) - .max() - .unwrap(); - interpretations.sort_unstable(); - for (i, (line, prec_suffix)) in interpretations.into_iter().enumerate() { - write!(&mut msg, " {}:", i + 1).unwrap(); - msg += &line; - if let Some(prec_suffix) = prec_suffix { - for _ in line.chars().count()..max_interpretation_length { - msg.push(' '); - } - msg += " "; - msg += &prec_suffix; - } - msg.push('\n'); - } - - let mut resolution_count = 0; - writeln!(&mut msg, "\nPossible resolutions:\n").unwrap(); let mut shift_items = Vec::new(); let mut reduce_items = Vec::new(); for item in conflicting_items { @@ -761,76 +896,57 @@ impl<'a> ParseTableBuilder<'a> { shift_items.sort_unstable(); reduce_items.sort_unstable(); - let list_rule_names = |mut msg: &mut String, items: &[&ParseItem]| { + let get_rule_names = |items: &[&ParseItem]| -> Vec { let mut last_rule_id = None; + let mut result = Vec::new(); for item in items { if last_rule_id == Some(item.variable_index) { continue; } - - if last_rule_id.is_some() { - write!(&mut msg, " and").unwrap(); - } - last_rule_id = Some(item.variable_index); - write!( - msg, - " `{}`", - self.symbol_name(&Symbol::non_terminal(item.variable_index as usize)) - ) - .unwrap(); + result.push(self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))); } + + result }; if actual_conflict.len() > 1 { if !shift_items.is_empty() { - resolution_count += 1; - write!( - &mut msg, - " {resolution_count}: Specify a higher precedence in", - ) - .unwrap(); - list_rule_names(&mut msg, &shift_items); - writeln!(&mut msg, " than in the other rules.").unwrap(); + let names = get_rule_names(&shift_items); + conflict_error + .possible_resolutions + .push(Resolution::Precedence { symbols: names }); } for item in &reduce_items { - resolution_count += 1; - writeln!( - &mut msg, - " {resolution_count}: Specify a higher precedence in `{}` than in the other rules.", - self.symbol_name(&Symbol::non_terminal(item.variable_index as usize)) - ) - .unwrap(); + let name = self.symbol_name(&Symbol::non_terminal(item.variable_index as usize)); + conflict_error + .possible_resolutions + .push(Resolution::Precedence { + symbols: vec![name], + }); } } if considered_associativity { - resolution_count += 1; - write!( - &mut msg, - " {resolution_count}: Specify a left or right associativity in", - ) - .unwrap(); - list_rule_names(&mut msg, &reduce_items); - writeln!(&mut msg).unwrap(); + let names = get_rule_names(&reduce_items); + conflict_error + .possible_resolutions + .push(Resolution::Associativity { symbols: names }); } - resolution_count += 1; - write!( - &mut msg, - " {resolution_count}: Add a conflict for these rules: ", - ) - .unwrap(); - for (i, symbol) in actual_conflict.iter().enumerate() { - if i > 0 { - write!(&mut msg, ", ").unwrap(); - } - write!(&mut msg, "`{}`", self.symbol_name(symbol)).unwrap(); - } - writeln!(&mut msg).unwrap(); + conflict_error + .possible_resolutions + .push(Resolution::AddConflict { + symbols: actual_conflict + .iter() + .map(|s| self.symbol_name(s)) + .collect(), + }); - Err(anyhow!(msg)) + self.actual_conflicts.insert(actual_conflict); + + Err(conflict_error)? } fn compare_precedence( @@ -999,7 +1115,7 @@ pub fn build_parse_table<'a>( lexical_grammar: &'a LexicalGrammar, item_set_builder: ParseItemSetBuilder<'a>, variable_info: &'a [VariableInfo], -) -> Result<(ParseTable, Vec>)> { +) -> BuildTableResult<(ParseTable, Vec>)> { ParseTableBuilder::new( syntax_grammar, lexical_grammar, diff --git a/cli/generate/src/build_tables/mod.rs b/cli/generate/src/build_tables/mod.rs index 6aad09af..aa9ef4e9 100644 --- a/cli/generate/src/build_tables/mod.rs +++ b/cli/generate/src/build_tables/mod.rs @@ -8,8 +8,9 @@ mod token_conflicts; use std::collections::{BTreeSet, HashMap}; -use anyhow::Result; pub use build_lex_table::LARGE_CHARACTER_RANGE_COUNT; +use build_parse_table::BuildTableResult; +pub use build_parse_table::ParseTableBuilderError; use log::info; use self::{ @@ -42,7 +43,7 @@ pub fn build_tables( variable_info: &[VariableInfo], inlines: &InlinedProductionMap, report_symbol_name: Option<&str>, -) -> Result { +) -> BuildTableResult { let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines); let following_tokens = get_following_tokens(syntax_grammar, lexical_grammar, inlines, &item_set_builder); diff --git a/cli/generate/src/lib.rs b/cli/generate/src/lib.rs index 6f8f6ee7..89d68a5b 100644 --- a/cli/generate/src/lib.rs +++ b/cli/generate/src/lib.rs @@ -5,12 +5,15 @@ use std::{ process::{Command, Stdio}, }; -use anyhow::{anyhow, Context, Result}; +use anyhow::Result; use build_tables::build_tables; use grammars::InputGrammar; use lazy_static::lazy_static; +pub use node_types::VariableInfoError; use parse_grammar::parse_grammar; +pub use parse_grammar::ParseGrammarError; use prepare_grammar::prepare_grammar; +pub use prepare_grammar::PrepareGrammarError; use regex::{Regex, RegexBuilder}; use render::render_c_code; use semver::Version; @@ -27,6 +30,10 @@ mod render; mod rules; mod tables; +pub use build_tables::ParseTableBuilderError; +use serde::Serialize; +use thiserror::Error; + lazy_static! { static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*") .multi_line(true) @@ -42,6 +49,88 @@ struct GeneratedParser { pub const ALLOC_HEADER: &str = include_str!("templates/alloc.h"); pub const ARRAY_HEADER: &str = include_str!("templates/array.h"); +pub type GenerateResult = Result; + +#[derive(Debug, Error, Serialize)] +pub enum GenerateError { + #[error("Error with specified path -- {0}")] + GrammarPath(String), + #[error("{0}")] + IO(String), + #[error(transparent)] + LoadGrammarFile(#[from] LoadGrammarError), + #[error(transparent)] + ParseGrammar(#[from] ParseGrammarError), + #[error(transparent)] + Prepare(#[from] PrepareGrammarError), + #[error(transparent)] + VariableInfo(#[from] VariableInfoError), + #[error(transparent)] + BuildTables(#[from] ParseTableBuilderError), +} + +impl From for GenerateError { + fn from(value: std::io::Error) -> Self { + Self::IO(value.to_string()) + } +} + +pub type LoadGrammarFileResult = Result; + +#[derive(Debug, Error, Serialize)] +pub enum LoadGrammarError { + #[error("Path to a grammar file with `.js` or `.json` extension is required")] + InvalidPath, + #[error("Failed to load grammar.js -- {0}")] + LoadJSGrammarFile(#[from] JSError), + #[error("Failed to load grammar.json -- {0}")] + IO(String), + #[error("Unknown grammar file extension: {0:?}")] + FileExtension(PathBuf), +} + +impl From for LoadGrammarError { + fn from(value: std::io::Error) -> Self { + Self::IO(value.to_string()) + } +} + +pub type JSResult = Result; + +#[derive(Debug, Error, Serialize)] +pub enum JSError { + #[error("Failed to run `{runtime}` -- {error}")] + JSRuntimeSpawn { runtime: String, error: String }, + #[error("Got invalid UTF8 from `{runtime}` -- {error}")] + JSRuntimeUtf8 { runtime: String, error: String }, + #[error("`{runtime}` process exited with status {code}")] + JSRuntimeExit { runtime: String, code: i32 }, + #[error("{0}")] + IO(String), + #[error("Could not parse this package's version as semver -- {0}")] + Semver(String), + #[error("Failed to serialze grammar JSON -- {0}")] + Serialzation(String), +} + +impl From for JSError { + fn from(value: std::io::Error) -> Self { + Self::IO(value.to_string()) + } +} + +impl From for JSError { + fn from(value: serde_json::Error) -> Self { + Self::Serialzation(value.to_string()) + } +} + +impl From for JSError { + fn from(value: semver::Error) -> Self { + Self::Semver(value.to_string()) + } +} + pub fn generate_parser_in_directory( repo_path: &Path, out_path: Option<&str>, @@ -49,7 +138,7 @@ pub fn generate_parser_in_directory( abi_version: usize, report_symbol_name: Option<&str>, js_runtime: Option<&str>, -) -> Result<()> { +) -> GenerateResult<()> { let mut repo_path = repo_path.to_owned(); let mut grammar_path = grammar_path; @@ -58,7 +147,7 @@ pub fn generate_parser_in_directory( let path = PathBuf::from(path); if !path .try_exists() - .with_context(|| "Some error with specified path")? + .map_err(|e| GenerateError::GrammarPath(e.to_string()))? { fs::create_dir_all(&path)?; grammar_path = None; @@ -79,8 +168,11 @@ pub fn generate_parser_in_directory( fs::create_dir_all(&header_path)?; if grammar_path.file_name().unwrap() != "grammar.json" { - fs::write(src_path.join("grammar.json"), &grammar_json) - .with_context(|| format!("Failed to write grammar.json to {src_path:?}"))?; + fs::write(src_path.join("grammar.json"), &grammar_json).map_err(|e| { + GenerateError::IO(format!( + "Failed to write grammar.json to {src_path:?} -- {e}" + )) + })?; } // Parse and preprocess the grammar. @@ -101,7 +193,7 @@ pub fn generate_parser_in_directory( Ok(()) } -pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)> { +pub fn generate_parser_for_grammar(grammar_json: &str) -> GenerateResult<(String, String)> { let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n"); let input_grammar = parse_grammar(&grammar_json)?; let parser = @@ -113,7 +205,7 @@ fn generate_parser_for_grammar_with_opts( input_grammar: &InputGrammar, abi_version: usize, report_symbol_name: Option<&str>, -) -> Result { +) -> GenerateResult { let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = prepare_grammar(input_grammar)?; let variable_info = @@ -149,23 +241,21 @@ fn generate_parser_for_grammar_with_opts( }) } -pub fn load_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result { +pub fn load_grammar_file( + grammar_path: &Path, + js_runtime: Option<&str>, +) -> LoadGrammarFileResult { if grammar_path.is_dir() { - return Err(anyhow!( - "Path to a grammar file with `.js` or `.json` extension is required" - )); + Err(LoadGrammarError::InvalidPath)?; } match grammar_path.extension().and_then(|e| e.to_str()) { - Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime) - .with_context(|| "Failed to load grammar.js")?), - Some("json") => { - Ok(fs::read_to_string(grammar_path).with_context(|| "Failed to load grammar.json")?) - } - _ => Err(anyhow!("Unknown grammar file extension: {grammar_path:?}",)), + Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime)?), + Some("json") => Ok(fs::read_to_string(grammar_path)?), + _ => Err(LoadGrammarError::FileExtension(grammar_path.to_owned()))?, } } -fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result { +fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> JSResult { let grammar_path = fs::canonicalize(grammar_path)?; #[cfg(windows)] @@ -194,14 +284,17 @@ fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result .stdin(Stdio::piped()) .stdout(Stdio::piped()) .spawn() - .with_context(|| format!("Failed to run `{js_runtime}`"))?; + .map_err(|e| JSError::JSRuntimeSpawn { + runtime: js_runtime.to_string(), + error: e.to_string(), + })?; let mut js_stdin = js_process .stdin .take() - .with_context(|| format!("Failed to open stdin for {js_runtime}"))?; - let cli_version = Version::parse(env!("CARGO_PKG_VERSION")) - .with_context(|| "Could not parse this package's version as semver.")?; + .ok_or_else(|| JSError::IO(format!("Failed to open stdin for `{js_runtime}`")))?; + + let cli_version = Version::parse(env!("CARGO_PKG_VERSION"))?; write!( js_stdin, "globalThis.TREE_SITTER_CLI_VERSION_MAJOR = {}; @@ -209,20 +302,28 @@ fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result globalThis.TREE_SITTER_CLI_VERSION_PATCH = {};", cli_version.major, cli_version.minor, cli_version.patch, ) - .with_context(|| format!("Failed to write tree-sitter version to {js_runtime}'s stdin"))?; - js_stdin - .write(include_bytes!("./dsl.js")) - .with_context(|| format!("Failed to write grammar dsl to {js_runtime}'s stdin"))?; + .map_err(|e| { + JSError::IO(format!( + "Failed to write tree-sitter version to `{js_runtime}`'s stdin -- {e}" + )) + })?; + js_stdin.write(include_bytes!("./dsl.js")).map_err(|e| { + JSError::IO(format!( + "Failed to write grammar dsl to `{js_runtime}`'s stdin -- {e}" + )) + })?; drop(js_stdin); let output = js_process .wait_with_output() - .with_context(|| format!("Failed to read output from {js_runtime}"))?; + .map_err(|e| JSError::IO(format!("Failed to read output from `{js_runtime}` -- {e}")))?; match output.status.code() { - None => panic!("{js_runtime} process was killed"), + None => panic!("`{js_runtime}` process was killed"), Some(0) => { - let stdout = String::from_utf8(output.stdout) - .with_context(|| format!("Got invalid UTF8 from {js_runtime}"))?; + let stdout = String::from_utf8(output.stdout).map_err(|e| JSError::JSRuntimeUtf8 { + runtime: js_runtime.to_string(), + error: e.to_string(), + })?; let mut grammar_json = &stdout[..]; @@ -237,18 +338,18 @@ fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result stdout.flush()?; } - Ok(serde_json::to_string_pretty( - &serde_json::from_str::(grammar_json) - .with_context(|| "Failed to parse grammar JSON")?, - ) - .with_context(|| "Failed to serialize grammar JSON")? - + "\n") + Ok(serde_json::to_string_pretty(&serde_json::from_str::< + serde_json::Value, + >(grammar_json)?)?) } - Some(code) => Err(anyhow!("{js_runtime} process exited with status {code}")), + Some(code) => Err(JSError::JSRuntimeExit { + runtime: js_runtime.to_string(), + code, + }), } } -pub fn write_file(path: &Path, body: impl AsRef<[u8]>) -> Result<()> { +pub fn write_file(path: &Path, body: impl AsRef<[u8]>) -> GenerateResult<()> { fs::write(path, body) - .with_context(|| format!("Failed to write {:?}", path.file_name().unwrap())) + .map_err(|e| GenerateError::IO(format!("Failed to write {:?} -- {e}", path.file_name()))) } diff --git a/cli/generate/src/node_types.rs b/cli/generate/src/node_types.rs index 03341661..3f261a35 100644 --- a/cli/generate/src/node_types.rs +++ b/cli/generate/src/node_types.rs @@ -3,8 +3,9 @@ use std::{ collections::{BTreeMap, HashMap, HashSet}, }; -use anyhow::{anyhow, Result}; +use anyhow::Result; use serde::Serialize; +use thiserror::Error; use super::{ grammars::{LexicalGrammar, SyntaxGrammar, VariableType}, @@ -132,6 +133,14 @@ impl ChildQuantity { } } +pub type VariableInfoResult = Result; + +#[derive(Debug, Error, Serialize)] +pub enum VariableInfoError { + #[error("Grammar error: Supertype symbols must always have a single visible child, but `{0}` can have multiple")] + InvalidSupertype(String), +} + /// Compute a summary of the public-facing structure of each variable in the /// grammar. Each variable in the grammar corresponds to a distinct public-facing /// node type. @@ -157,7 +166,7 @@ pub fn get_variable_info( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, default_aliases: &AliasMap, -) -> Result> { +) -> VariableInfoResult> { let child_type_is_visible = |t: &ChildType| { variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous }; @@ -338,13 +347,7 @@ pub fn get_variable_info( for supertype_symbol in &syntax_grammar.supertype_symbols { if result[supertype_symbol.index].has_multi_step_production { let variable = &syntax_grammar.variables[supertype_symbol.index]; - return Err(anyhow!( - concat!( - "Grammar error: Supertype symbols must always ", - "have a single visible child, but `{}` can have multiple" - ), - variable.name - )); + Err(VariableInfoError::InvalidSupertype(variable.name.clone()))?; } } diff --git a/cli/generate/src/parse_grammar.rs b/cli/generate/src/parse_grammar.rs index 25aaf9f5..cc3d7de3 100644 --- a/cli/generate/src/parse_grammar.rs +++ b/cli/generate/src/parse_grammar.rs @@ -1,8 +1,9 @@ use std::collections::HashSet; -use anyhow::{anyhow, bail, Result}; -use serde::Deserialize; +use anyhow::Result; +use serde::{Deserialize, Serialize}; use serde_json::{Map, Value}; +use thiserror::Error; use super::{ grammars::{InputGrammar, PrecedenceEntry, Variable, VariableType}, @@ -104,6 +105,26 @@ pub struct GrammarJSON { reserved: Map, } +pub type ParseGrammarResult = Result; + +#[derive(Debug, Error, Serialize)] +pub enum ParseGrammarError { + #[error("{0}")] + Serialization(String), + #[error("Rules in the `extras` array must not contain empty strings")] + InvalidExtra, + #[error("Invalid rule in precedences array. Only strings and symbols are allowed")] + Unexpected, + #[error("Reserved word sets must be arrays")] + InvalidReservedWordSet, +} + +impl From for ParseGrammarError { + fn from(value: serde_json::Error) -> Self { + Self::Serialization(value.to_string()) + } +} + fn rule_is_referenced(rule: &Rule, target: &str) -> bool { match rule { Rule::NamedSymbol(name) => name == target, @@ -153,24 +174,22 @@ fn variable_is_used( result } -pub(crate) fn parse_grammar(input: &str) -> Result { +pub(crate) fn parse_grammar(input: &str) -> ParseGrammarResult { let mut grammar_json = serde_json::from_str::(input)?; let mut extra_symbols = grammar_json .extras .into_iter() - .try_fold(Vec::new(), |mut acc, item| { + .try_fold(Vec::::new(), |mut acc, item| { let rule = parse_rule(item); if let Rule::String(ref value) = rule { if value.is_empty() { - return Err(anyhow!( - "Rules in the `extras` array must not contain empty strings" - )); + Err(ParseGrammarError::InvalidExtra)?; } } acc.push(rule); - Ok(acc) + ParseGrammarResult::Ok(acc) })?; let mut external_tokens = grammar_json @@ -186,11 +205,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result { ordering.push(match entry { RuleJSON::STRING { value } => PrecedenceEntry::Name(value), RuleJSON::SYMBOL { name } => PrecedenceEntry::Symbol(name), - _ => { - return Err(anyhow!( - "Invalid rule in precedences array. Only strings and symbols are allowed" - )) - } + _ => Err(ParseGrammarError::Unexpected)?, }); } precedence_orderings.push(ordering); @@ -202,7 +217,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result { .rules .into_iter() .map(|(n, r)| Ok((n, parse_rule(serde_json::from_value(r)?)))) - .collect::>>()?; + .collect::>>()?; let mut in_progress = HashSet::new(); @@ -243,19 +258,18 @@ pub(crate) fn parse_grammar(input: &str) -> Result { let mut reserved_words = Vec::new(); let Value::Array(rule_values) = rule_values else { - bail!("reserved word sets must be arrays"); + Err(ParseGrammarError::InvalidReservedWordSet)? }; for value in rule_values { - let rule_json: RuleJSON = serde_json::from_value(value)?; - reserved_words.push(parse_rule(rule_json)); + reserved_words.push(parse_rule(serde_json::from_value(value)?)); } Ok(ReservedWordContext { name, reserved_words, }) }) - .collect::>>()?; + .collect::>>()?; Ok(InputGrammar { name: grammar_json.name, diff --git a/cli/generate/src/prepare_grammar/expand_tokens.rs b/cli/generate/src/prepare_grammar/expand_tokens.rs index 84d05981..ed4774d4 100644 --- a/cli/generate/src/prepare_grammar/expand_tokens.rs +++ b/cli/generate/src/prepare_grammar/expand_tokens.rs @@ -1,9 +1,10 @@ -use anyhow::{anyhow, Context, Result}; -use indoc::indoc; +use anyhow::Result; use regex_syntax::{ hir::{Class, Hir, HirKind}, ParserBuilder, }; +use serde::Serialize; +use thiserror::Error; use super::ExtractedLexicalGrammar; use crate::{ @@ -18,6 +19,40 @@ struct NfaBuilder { precedence_stack: Vec, } +pub type ExpandTokensResult = Result; + +#[derive(Debug, Error, Serialize)] +pub enum ExpandTokensError { + #[error( + "The rule `{0}` matches the empty string. +Tree-sitter does not support syntactic rules that match the empty string +unless they are used only as the grammar's start rule. + " + )] + EmptyString(String), + #[error(transparent)] + Processing(ExpandTokensProcessingError), + #[error(transparent)] + ExpandRule(ExpandRuleError), +} + +#[derive(Debug, Error, Serialize)] +pub struct ExpandTokensProcessingError { + rule: String, + error: ExpandRuleError, +} + +impl std::fmt::Display for ExpandTokensProcessingError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!( + f, + "Error processing rule {}: Grammar error: Unexpected rule {:?}", + self.rule, self.error + )?; + Ok(()) + } +} + fn get_implicit_precedence(rule: &Rule) -> i32 { match rule { Rule::String(_) => 2, @@ -41,7 +76,7 @@ const fn get_completion_precedence(rule: &Rule) -> i32 { 0 } -pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result { +pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> ExpandTokensResult { let mut builder = NfaBuilder { nfa: Nfa::new(), is_sep: true, @@ -58,14 +93,7 @@ pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result Result Result = Result; + +#[derive(Debug, Error, Serialize)] +pub enum ExpandRuleError { + #[error("Grammar error: Unexpected rule {0:?}")] + UnexpectedRule(Rule), + #[error("{0}")] + Parse(String), + #[error(transparent)] + ExpandRegex(ExpandRegexError), +} + +pub type ExpandRegexResult = Result; + +#[derive(Debug, Error, Serialize)] +pub enum ExpandRegexError { + #[error("{0}")] + Utf8(String), + #[error("Regex error: Assertions are not supported")] + Assertion, +} + impl NfaBuilder { - fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> Result { + fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> ExpandRuleResult { match rule { Rule::Pattern(s, f) => { // With unicode enabled, `\w`, `\s` and `\d` expand to character sets that are much @@ -124,8 +181,11 @@ impl NfaBuilder { .unicode(true) .utf8(false) .build(); - let hir = parser.parse(&s)?; + let hir = parser + .parse(&s) + .map_err(|e| ExpandRuleError::Parse(e.to_string()))?; self.expand_regex(&hir, next_state_id) + .map_err(ExpandRuleError::ExpandRegex) } Rule::String(s) => { for c in s.chars().rev() { @@ -189,15 +249,19 @@ impl NfaBuilder { result } Rule::Blank => Ok(false), - _ => Err(anyhow!("Grammar error: Unexpected rule {rule:?}")), + _ => Err(ExpandRuleError::UnexpectedRule(rule.clone()))?, } } - fn expand_regex(&mut self, hir: &Hir, mut next_state_id: u32) -> Result { + fn expand_regex(&mut self, hir: &Hir, mut next_state_id: u32) -> ExpandRegexResult { match hir.kind() { HirKind::Empty => Ok(false), HirKind::Literal(literal) => { - for character in std::str::from_utf8(&literal.0)?.chars().rev() { + for character in std::str::from_utf8(&literal.0) + .map_err(|e| ExpandRegexError::Utf8(e.to_string()))? + .chars() + .rev() + { let char_set = CharacterSet::from_char(character); self.push_advance(char_set, next_state_id); next_state_id = self.nfa.last_state_id(); @@ -234,7 +298,7 @@ impl NfaBuilder { Ok(true) } }, - HirKind::Look(_) => Err(anyhow!("Regex error: Assertions are not supported")), + HirKind::Look(_) => Err(ExpandRegexError::Assertion)?, HirKind::Repetition(repetition) => match (repetition.min, repetition.max) { (0, Some(1)) => self.expand_zero_or_one(&repetition.sub, next_state_id), (1, None) => self.expand_one_or_more(&repetition.sub, next_state_id), @@ -293,7 +357,7 @@ impl NfaBuilder { } } - fn expand_one_or_more(&mut self, hir: &Hir, next_state_id: u32) -> Result { + fn expand_one_or_more(&mut self, hir: &Hir, next_state_id: u32) -> ExpandRegexResult { self.nfa.states.push(NfaState::Accept { variable_index: 0, precedence: 0, @@ -309,7 +373,7 @@ impl NfaBuilder { } } - fn expand_zero_or_one(&mut self, hir: &Hir, next_state_id: u32) -> Result { + fn expand_zero_or_one(&mut self, hir: &Hir, next_state_id: u32) -> ExpandRegexResult { if self.expand_regex(hir, next_state_id)? { self.push_split(next_state_id); Ok(true) @@ -318,7 +382,7 @@ impl NfaBuilder { } } - fn expand_zero_or_more(&mut self, hir: &Hir, next_state_id: u32) -> Result { + fn expand_zero_or_more(&mut self, hir: &Hir, next_state_id: u32) -> ExpandRegexResult { if self.expand_one_or_more(hir, next_state_id)? { self.push_split(next_state_id); Ok(true) @@ -327,7 +391,12 @@ impl NfaBuilder { } } - fn expand_count(&mut self, hir: &Hir, count: u32, mut next_state_id: u32) -> Result { + fn expand_count( + &mut self, + hir: &Hir, + count: u32, + mut next_state_id: u32, + ) -> ExpandRegexResult { let mut result = false; for _ in 0..count { if self.expand_regex(hir, next_state_id)? { diff --git a/cli/generate/src/prepare_grammar/extract_tokens.rs b/cli/generate/src/prepare_grammar/extract_tokens.rs index fc4e22ae..6a0ebc0e 100644 --- a/cli/generate/src/prepare_grammar/extract_tokens.rs +++ b/cli/generate/src/prepare_grammar/extract_tokens.rs @@ -1,6 +1,8 @@ use std::collections::HashMap; -use anyhow::{anyhow, Result}; +use anyhow::Result; +use serde::Serialize; +use thiserror::Error; use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar}; use crate::{ @@ -8,9 +10,31 @@ use crate::{ rules::{MetadataParams, Rule, Symbol, SymbolType}, }; +pub type ExtractTokensResult = Result; + +#[derive(Debug, Error, Serialize)] +pub enum ExtractTokensError { + #[error( + "The rule `{0}` contains an empty string. + +Tree-sitter does not support syntactic rules that contain an empty string +unless they are used only as the grammar's start rule. +" + )] + EmptyString(String), + #[error("Rule '{0}' cannot be used as both an external token and a non-terminal rule")] + ExternalTokenNonTerminal(String), + #[error("Non-symbol rules cannot be used as external tokens")] + NonSymbolExternalToken, + #[error("Non-terminal symbol '{0}' cannot be used as the word token")] + NonTerminalWordToken(String), + #[error("Reserved words must be tokens")] + NonTokenReservedWord, +} + pub(super) fn extract_tokens( mut grammar: InternedGrammar, -) -> Result<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> { +) -> ExtractTokensResult<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> { let mut extractor = TokenExtractor { current_variable_name: String::new(), current_variable_token_count: 0, @@ -110,10 +134,9 @@ pub(super) fn extract_tokens( let rule = symbol_replacer.replace_symbols_in_rule(&external_token.rule); if let Rule::Symbol(symbol) = rule { if symbol.is_non_terminal() { - return Err(anyhow!( - "Rule '{}' cannot be used as both an external token and a non-terminal rule", - &variables[symbol.index].name, - )); + Err(ExtractTokensError::ExternalTokenNonTerminal( + variables[symbol.index].name.clone(), + ))?; } if symbol.is_external() { @@ -130,9 +153,7 @@ pub(super) fn extract_tokens( }); } } else { - return Err(anyhow!( - "Non-symbol rules cannot be used as external tokens" - )); + Err(ExtractTokensError::NonSymbolExternalToken)?; } } @@ -140,10 +161,9 @@ pub(super) fn extract_tokens( if let Some(token) = grammar.word_token { let token = symbol_replacer.replace_symbol(token); if token.is_non_terminal() { - return Err(anyhow!( - "Non-terminal symbol '{}' cannot be used as the word token", - &variables[token.index].name - )); + Err(ExtractTokensError::NonTerminalWordToken( + variables[token.index].name.clone(), + ))?; } word_token = Some(token); } @@ -160,7 +180,7 @@ pub(super) fn extract_tokens( { reserved_words.push(Symbol::terminal(index)); } else { - return Err(anyhow!("Reserved words must be tokens")); + Err(ExtractTokensError::NonTokenReservedWord)?; } } reserved_word_contexts.push(ReservedWordContext { @@ -205,7 +225,7 @@ impl TokenExtractor { &mut self, is_first: bool, variable: &mut Variable, - ) -> Result<()> { + ) -> ExtractTokensResult<()> { self.current_variable_name.clear(); self.current_variable_name.push_str(&variable.name); self.current_variable_token_count = 0; @@ -214,7 +234,7 @@ impl TokenExtractor { Ok(()) } - fn extract_tokens_in_rule(&mut self, input: &Rule) -> Result { + fn extract_tokens_in_rule(&mut self, input: &Rule) -> ExtractTokensResult { match input { Rule::String(name) => Ok(self.extract_token(input, Some(name))?.into()), Rule::Pattern(..) => Ok(self.extract_token(input, None)?.into()), @@ -249,13 +269,13 @@ impl TokenExtractor { elements .iter() .map(|e| self.extract_tokens_in_rule(e)) - .collect::>>()?, + .collect::>>()?, )), Rule::Choice(elements) => Ok(Rule::Choice( elements .iter() .map(|e| self.extract_tokens_in_rule(e)) - .collect::>>()?, + .collect::>>()?, )), Rule::Reserved { rule, context_name } => Ok(Rule::Reserved { rule: Box::new(self.extract_tokens_in_rule(rule)?), @@ -265,7 +285,11 @@ impl TokenExtractor { } } - fn extract_token(&mut self, rule: &Rule, string_value: Option<&String>) -> Result { + fn extract_token( + &mut self, + rule: &Rule, + string_value: Option<&String>, + ) -> ExtractTokensResult { for (i, variable) in self.extracted_variables.iter_mut().enumerate() { if variable.rule == *rule { self.extracted_usage_counts[i] += 1; @@ -276,14 +300,9 @@ impl TokenExtractor { let index = self.extracted_variables.len(); let variable = if let Some(string_value) = string_value { if string_value.is_empty() && !self.is_first_rule { - return Err(anyhow!( - "The rule `{}` contains an empty string. - -Tree-sitter does not support syntactic rules that contain an empty string -unless they are used only as the grammar's start rule. -", - self.current_variable_name - )); + Err(ExtractTokensError::EmptyString( + self.current_variable_name.clone(), + ))?; } Variable { name: string_value.clone(), diff --git a/cli/generate/src/prepare_grammar/flatten_grammar.rs b/cli/generate/src/prepare_grammar/flatten_grammar.rs index 759d8add..b8033d5f 100644 --- a/cli/generate/src/prepare_grammar/flatten_grammar.rs +++ b/cli/generate/src/prepare_grammar/flatten_grammar.rs @@ -1,7 +1,8 @@ use std::collections::HashMap; -use anyhow::{anyhow, Result}; -use indoc::indoc; +use anyhow::Result; +use serde::Serialize; +use thiserror::Error; use super::ExtractedSyntaxGrammar; use crate::{ @@ -11,6 +12,24 @@ use crate::{ rules::{Alias, Associativity, Precedence, Rule, Symbol, TokenSet}, }; +pub type FlattenGrammarResult = Result; + +#[derive(Debug, Error, Serialize)] +pub enum FlattenGrammarError { + #[error("No such reserved word set: {0}")] + NoReservedWordSet(String), + #[error( + "The rule `{0}` matches the empty string. + +Tree-sitter does not support syntactic rules that match the empty string +unless they are used only as the grammar's start rule. +" + )] + EmptyString(String), + #[error("Rule `{0}` cannot be inlined because it contains a reference to itself")] + RecursiveInline(String), +} + struct RuleFlattener { production: Production, reserved_word_set_ids: HashMap, @@ -37,7 +56,7 @@ impl RuleFlattener { } } - fn flatten_variable(&mut self, variable: Variable) -> Result { + fn flatten_variable(&mut self, variable: Variable) -> FlattenGrammarResult { let mut productions = Vec::new(); for rule in extract_choices(variable.rule) { let production = self.flatten_rule(rule)?; @@ -52,7 +71,7 @@ impl RuleFlattener { }) } - fn flatten_rule(&mut self, rule: Rule) -> Result { + fn flatten_rule(&mut self, rule: Rule) -> FlattenGrammarResult { self.production = Production::default(); self.alias_stack.clear(); self.reserved_word_stack.clear(); @@ -63,7 +82,7 @@ impl RuleFlattener { Ok(self.production.clone()) } - fn apply(&mut self, rule: Rule, at_end: bool) -> Result { + fn apply(&mut self, rule: Rule, at_end: bool) -> FlattenGrammarResult { match rule { Rule::Seq(members) => { let mut result = false; @@ -138,7 +157,9 @@ impl RuleFlattener { self.reserved_word_set_ids .get(&context_name) .copied() - .ok_or_else(|| anyhow!("no such reserved word set: {context_name}"))?, + .ok_or_else(|| { + FlattenGrammarError::NoReservedWordSet(context_name.clone()) + })?, ); let did_push = self.apply(*rule, at_end)?; self.reserved_word_stack.pop(); @@ -224,7 +245,9 @@ fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool { false } -pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result { +pub(super) fn flatten_grammar( + grammar: ExtractedSyntaxGrammar, +) -> FlattenGrammarResult { let mut reserved_word_set_ids_by_name = HashMap::new(); for (ix, set) in grammar.reserved_word_sets.iter().enumerate() { reserved_word_set_ids_by_name.insert(set.name.clone(), ReservedWordSetId(ix)); @@ -235,31 +258,20 @@ pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result>>()?; + .collect::>>()?; for (i, variable) in variables.iter().enumerate() { let symbol = Symbol::non_terminal(i); for production in &variable.productions { if production.steps.is_empty() && symbol_is_used(&variables, symbol) { - return Err(anyhow!( - indoc! {" - The rule `{}` matches the empty string. - - Tree-sitter does not support syntactic rules that match the empty string - unless they are used only as the grammar's start rule. - "}, - variable.name - )); + Err(FlattenGrammarError::EmptyString(variable.name.clone()))?; } if grammar.variables_to_inline.contains(&symbol) && production.steps.iter().any(|step| step.symbol == symbol) { - return Err(anyhow!( - "Rule `{}` cannot be inlined because it contains a reference to itself.", - variable.name, - )); + Err(FlattenGrammarError::RecursiveInline(variable.name.clone()))?; } } } diff --git a/cli/generate/src/prepare_grammar/intern_symbols.rs b/cli/generate/src/prepare_grammar/intern_symbols.rs index ee9967b8..6301e462 100644 --- a/cli/generate/src/prepare_grammar/intern_symbols.rs +++ b/cli/generate/src/prepare_grammar/intern_symbols.rs @@ -1,4 +1,6 @@ -use anyhow::{anyhow, Result}; +use anyhow::Result; +use serde::Serialize; +use thiserror::Error; use super::InternedGrammar; use crate::{ @@ -6,11 +8,27 @@ use crate::{ rules::{Rule, Symbol}, }; -pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result { +pub type InternSymbolsResult = Result; + +#[derive(Debug, Error, Serialize)] +pub enum InternSymbolsError { + #[error("A grammar's start rule must be visible.")] + HiddenStartRule, + #[error("Undefined symbol `{0}`")] + Undefined(String), + #[error("Undefined symbol `{0}` in grammar's supertypes array")] + UndefinedSupertype(String), + #[error("Undefined symbol `{0}` in grammar's conflicts array")] + UndefinedConflict(String), + #[error("Undefined symbol `{0}` as grammar's word token")] + UndefinedWordToken(String), +} + +pub(super) fn intern_symbols(grammar: &InputGrammar) -> InternSymbolsResult { let interner = Interner { grammar }; if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden { - return Err(anyhow!("A grammar's start rule must be visible.")); + Err(InternSymbolsError::HiddenStartRule)?; } let mut variables = Vec::with_capacity(grammar.variables.len()); @@ -41,7 +59,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len()); for supertype_symbol_name in &grammar.supertype_symbols { supertype_symbols.push(interner.intern_name(supertype_symbol_name).ok_or_else(|| { - anyhow!("Undefined symbol `{supertype_symbol_name}` in grammar's supertypes array") + InternSymbolsError::UndefinedSupertype(supertype_symbol_name.clone()) })?); } @@ -61,9 +79,11 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result for conflict in &grammar.expected_conflicts { let mut interned_conflict = Vec::with_capacity(conflict.len()); for name in conflict { - interned_conflict.push(interner.intern_name(name).ok_or_else(|| { - anyhow!("Undefined symbol `{name}` in grammar's conflicts array") - })?); + interned_conflict.push( + interner + .intern_name(name) + .ok_or_else(|| InternSymbolsError::UndefinedConflict(name.clone()))?, + ); } expected_conflicts.push(interned_conflict); } @@ -80,7 +100,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result word_token = Some( interner .intern_name(name) - .ok_or_else(|| anyhow!("Undefined symbol `{name}` as grammar's word token"))?, + .ok_or_else(|| InternSymbolsError::UndefinedWordToken(name.clone()))?, ); } @@ -108,7 +128,7 @@ struct Interner<'a> { } impl Interner<'_> { - fn intern_rule(&self, rule: &Rule, name: Option<&str>) -> Result { + fn intern_rule(&self, rule: &Rule, name: Option<&str>) -> InternSymbolsResult { match rule { Rule::Choice(elements) => { self.check_single(elements, name); @@ -136,7 +156,7 @@ impl Interner<'_> { context_name: context_name.clone(), }), Rule::NamedSymbol(name) => self.intern_name(name).map_or_else( - || Err(anyhow!("Undefined symbol `{name}`")), + || Err(InternSymbolsError::Undefined(name.clone())), |symbol| Ok(Rule::Symbol(symbol)), ), _ => Ok(rule.clone()), diff --git a/cli/generate/src/prepare_grammar/mod.rs b/cli/generate/src/prepare_grammar/mod.rs index ffd6aa6f..c8b92337 100644 --- a/cli/generate/src/prepare_grammar/mod.rs +++ b/cli/generate/src/prepare_grammar/mod.rs @@ -12,7 +12,14 @@ use std::{ mem, }; -use anyhow::{anyhow, Result}; +use anyhow::Result; +pub use expand_tokens::ExpandTokensError; +pub use extract_tokens::ExtractTokensError; +pub use flatten_grammar::FlattenGrammarError; +pub use intern_symbols::InternSymbolsError; +pub use process_inlines::ProcessInlinesError; +use serde::Serialize; +use thiserror::Error; pub use self::expand_tokens::expand_tokens; use self::{ @@ -67,11 +74,67 @@ impl Default for IntermediateGrammar { } } +pub type PrepareGrammarResult = Result; + +#[derive(Debug, Error, Serialize)] +#[error(transparent)] +pub enum PrepareGrammarError { + ValidatePrecedences(#[from] ValidatePrecedenceError), + InternSymbols(#[from] InternSymbolsError), + ExtractTokens(#[from] ExtractTokensError), + FlattenGrammar(#[from] FlattenGrammarError), + ExpandTokens(#[from] ExpandTokensError), + ProcessInlines(#[from] ProcessInlinesError), +} + +pub type ValidatePrecedenceResult = Result; + +#[derive(Debug, Error, Serialize)] +#[error(transparent)] +pub enum ValidatePrecedenceError { + Undeclared(#[from] UndeclaredPrecedenceError), + Ordering(#[from] ConflictingPrecedenceOrderingError), +} + +#[derive(Debug, Error, Serialize)] +pub struct UndeclaredPrecedenceError { + pub precedence: String, + pub rule: String, +} + +impl std::fmt::Display for UndeclaredPrecedenceError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Undeclared precedence '{}' in rule '{}'", + self.precedence, self.rule + )?; + Ok(()) + } +} + +#[derive(Debug, Error, Serialize)] +pub struct ConflictingPrecedenceOrderingError { + pub precedence_1: String, + pub precedence_2: String, +} + +impl std::fmt::Display for ConflictingPrecedenceOrderingError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Conflicting orderings for precedences {} and {}", + self.precedence_1, self.precedence_2 + )?; + Ok(()) + } +} + /// Transform an input grammar into separate components that are ready /// for parse table construction. pub fn prepare_grammar( input_grammar: &InputGrammar, -) -> Result<( +) -> PrepareGrammarResult<( SyntaxGrammar, LexicalGrammar, InlinedProductionMap, @@ -92,10 +155,14 @@ pub fn prepare_grammar( /// Check that all of the named precedences used in the grammar are declared /// within the `precedences` lists, and also that there are no conflicting /// precedence orderings declared in those lists. -fn validate_precedences(grammar: &InputGrammar) -> Result<()> { +fn validate_precedences(grammar: &InputGrammar) -> ValidatePrecedenceResult<()> { // Check that no rule contains a named precedence that is not present in // any of the `precedences` lists. - fn validate(rule_name: &str, rule: &Rule, names: &HashSet<&String>) -> Result<()> { + fn validate( + rule_name: &str, + rule: &Rule, + names: &HashSet<&String>, + ) -> ValidatePrecedenceResult<()> { match rule { Rule::Repeat(rule) => validate(rule_name, rule, names), Rule::Seq(elements) | Rule::Choice(elements) => elements @@ -104,7 +171,10 @@ fn validate_precedences(grammar: &InputGrammar) -> Result<()> { Rule::Metadata { rule, params } => { if let Precedence::Name(n) = ¶ms.precedence { if !names.contains(n) { - return Err(anyhow!("Undeclared precedence '{n}' in rule '{rule_name}'")); + Err(UndeclaredPrecedenceError { + precedence: n.to_string(), + rule: rule_name.to_string(), + })?; } } validate(rule_name, rule, names)?; @@ -134,9 +204,10 @@ fn validate_precedences(grammar: &InputGrammar) -> Result<()> { } hash_map::Entry::Occupied(e) => { if e.get() != &ordering { - return Err(anyhow!( - "Conflicting orderings for precedences {entry1} and {entry2}", - )); + Err(ConflictingPrecedenceOrderingError { + precedence_1: entry1.to_string(), + precedence_2: entry2.to_string(), + })?; } } } diff --git a/cli/generate/src/prepare_grammar/process_inlines.rs b/cli/generate/src/prepare_grammar/process_inlines.rs index f2acffb6..085e6732 100644 --- a/cli/generate/src/prepare_grammar/process_inlines.rs +++ b/cli/generate/src/prepare_grammar/process_inlines.rs @@ -1,6 +1,8 @@ use std::collections::HashMap; -use anyhow::{anyhow, Result}; +use anyhow::Result; +use serde::Serialize; +use thiserror::Error; use crate::{ grammars::{InlinedProductionMap, LexicalGrammar, Production, ProductionStep, SyntaxGrammar}, @@ -187,29 +189,38 @@ impl InlinedProductionMapBuilder { } } +pub type ProcessInlinesResult = Result; + +#[derive(Debug, Error, Serialize)] +pub enum ProcessInlinesError { + #[error("External token `{0}` cannot be inlined")] + ExternalToken(String), + #[error("Token `{0}` cannot be inlined")] + Token(String), + #[error("Rule `{0}` cannot be inlined because it is the first rule")] + FirstRule(String), +} + pub(super) fn process_inlines( grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, -) -> Result { +) -> ProcessInlinesResult { for symbol in &grammar.variables_to_inline { match symbol.kind { SymbolType::External => { - return Err(anyhow!( - "External token `{}` cannot be inlined", - grammar.external_tokens[symbol.index].name - )) + Err(ProcessInlinesError::ExternalToken( + grammar.external_tokens[symbol.index].name.clone(), + ))?; } SymbolType::Terminal => { - return Err(anyhow!( - "Token `{}` cannot be inlined", - lexical_grammar.variables[symbol.index].name, - )) + Err(ProcessInlinesError::Token( + lexical_grammar.variables[symbol.index].name.clone(), + ))?; } SymbolType::NonTerminal if symbol.index == 0 => { - return Err(anyhow!( - "Rule `{}` cannot be inlined because it is the first rule", - grammar.variables[symbol.index].name, - )) + Err(ProcessInlinesError::FirstRule( + grammar.variables[symbol.index].name.clone(), + ))?; } _ => {} } diff --git a/cli/generate/src/rules.rs b/cli/generate/src/rules.rs index e2de62a3..aa7d46ab 100644 --- a/cli/generate/src/rules.rs +++ b/cli/generate/src/rules.rs @@ -1,10 +1,11 @@ use std::{collections::HashMap, fmt}; +use serde::Serialize; use smallbitvec::SmallBitVec; use super::grammars::VariableType; -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)] pub enum SymbolType { External, End, @@ -13,19 +14,19 @@ pub enum SymbolType { NonTerminal, } -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)] pub enum Associativity { Left, Right, } -#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)] pub struct Alias { pub value: String, pub is_named: bool, } -#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Default)] +#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Default, Serialize)] pub enum Precedence { #[default] None, @@ -35,7 +36,7 @@ pub enum Precedence { pub type AliasMap = HashMap; -#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Serialize)] pub struct MetadataParams { pub precedence: Precedence, pub dynamic_precedence: i32, @@ -46,13 +47,13 @@ pub struct MetadataParams { pub field_name: Option, } -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)] pub struct Symbol { pub kind: SymbolType, pub index: usize, } -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize)] pub enum Rule { Blank, String(String), diff --git a/cli/src/init.rs b/cli/src/init.rs index 4d5f26af..fd12a1ed 100644 --- a/cli/src/init.rs +++ b/cli/src/init.rs @@ -381,7 +381,8 @@ pub fn generate_grammar_files( let Some(opts) = opts else { unreachable!() }; let tree_sitter_json = opts.clone().to_tree_sitter_json(); - write_file(path, serde_json::to_string_pretty(&tree_sitter_json)?) + write_file(path, serde_json::to_string_pretty(&tree_sitter_json)?)?; + Ok(()) }, |path| { // updating the config, if needed @@ -523,10 +524,9 @@ pub fn generate_grammar_files( |path| { let contents = fs::read_to_string(path)?; if contents.contains("fs.exists(") { - write_file(path, contents.replace("fs.exists(", "fs.existsSync(")) - } else { - Ok(()) + write_file(path, contents.replace("fs.exists(", "fs.existsSync("))?; } + Ok(()) }, )?; @@ -566,10 +566,9 @@ pub fn generate_grammar_files( let contents = fs::read_to_string(path)?; let old = "add_custom_target(test"; if contents.contains(old) { - write_file(path, contents.replace(old, "add_custom_target(ts-test")) - } else { - Ok(()) + write_file(path, contents.replace(old, "add_custom_target(ts-test"))?; } + Ok(()) }, )?; @@ -671,10 +670,9 @@ pub fn generate_grammar_files( "egg_info": EggInfo, "#}, ); - write_file(path, contents) - } else { - Ok(()) + write_file(path, contents)?; } + Ok(()) }, )?; @@ -951,7 +949,8 @@ fn generate_file( } } - write_file(path, replacement) + write_file(path, replacement)?; + Ok(()) } fn create_dir(path: &Path) -> Result<()> { diff --git a/cli/src/main.rs b/cli/src/main.rs index 8b6cea86..1534e070 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -22,7 +22,7 @@ use tree_sitter_cli::{ init::{generate_grammar_files, get_root_path, migrate_package_json, JsonConfigOpts}, input::{get_input, get_tmp_source_file, CliInput}, logger, - parse::{self, ParseFileOptions, ParseOutput, ParseResult, ParseTheme}, + parse::{self, ParseFileOptions, ParseOutput, ParseTheme}, playground, query, tags::{self, TagsOptions}, test::{self, TestOptions, TestStats}, @@ -121,6 +121,9 @@ struct Generate { /// Produce a report of the states for the given rule, use `-` to report every rule #[arg(long)] pub report_states_for_rule: Option, + /// Report conflicts in a JSON format + #[arg(long)] + pub json: bool, /// The name or path of the JavaScript runtime to use for generating parsers #[arg( long, @@ -215,7 +218,7 @@ struct Parse { pub open_log: bool, /// Output parsing results in a JSON format #[arg(long, short = 'j')] - pub output_json_summary: bool, + pub json: bool, /// The path to an alternative config.json file #[arg(long)] pub config_path: Option, @@ -729,14 +732,22 @@ impl Generate { version.parse().expect("invalid abi version flag") } }); - tree_sitter_generate::generate_parser_in_directory( + if let Err(err) = tree_sitter_generate::generate_parser_in_directory( current_dir, self.output.as_deref(), self.grammar_path.as_deref(), abi_version, self.report_states_for_rule.as_deref(), self.js_runtime.as_deref(), - )?; + ) { + if self.json { + eprintln!("{}", serde_json::to_string_pretty(&err)?); + // Exit early to prevent errors from being printed a second time in the caller + std::process::exit(1); + } else { + return Err(err.into()); + } + } if self.build { if let Some(path) = self.libdir { loader = loader::Loader::with_parser_lib_path(PathBuf::from(path)); @@ -815,7 +826,7 @@ impl Parse { ParseOutput::Xml } else if self.output_cst { ParseOutput::Cst - } else if self.quiet || self.output_json_summary { + } else if self.quiet || self.json { ParseOutput::Quiet } else { ParseOutput::Normal @@ -862,9 +873,9 @@ impl Parse { loader.find_all_languages(&loader_config)?; let should_track_stats = self.stat; - let mut stats = parse::ParseStats::new(); + let mut stats = parse::ParseStats::default(); - let options = ParseFileOptions { + let mut options = ParseFileOptions { edits: &edits .iter() .map(std::string::String::as_str) @@ -872,6 +883,7 @@ impl Parse { output, print_time: time, timeout, + stats: &mut stats, debug: self.debug, debug_graph: self.debug_graph, cancellation_flag: Some(&cancellation_flag), @@ -881,14 +893,15 @@ impl Parse { parse_theme: &parse_theme, }; - let mut update_stats = |parse_result: ParseResult| { + let mut update_stats = |stats: &mut parse::ParseStats| { + let parse_result = stats.parse_summaries.last().unwrap(); if should_track_stats { - stats.total_parses += 1; + stats.cumulative_stats.total_parses += 1; if parse_result.successful { stats.cumulative_stats.successful_parses += 1; } - if let Some(duration) = parse_result.duration { - stats.cumulative_stats.total_bytes += parse_result.bytes; + if let (Some(duration), Some(bytes)) = (parse_result.duration, parse_result.bytes) { + stats.cumulative_stats.total_bytes += bytes; stats.cumulative_stats.total_duration += duration; } } @@ -915,15 +928,15 @@ impl Parse { let language = loader.select_language(path, current_dir, self.scope.as_deref())?; - let parse_result = parse::parse_file_at_path( + parse::parse_file_at_path( &mut parser, &language, path, &path.display().to_string(), max_path_length, - &options, + &mut options, )?; - update_stats(parse_result); + update_stats(options.stats); } } @@ -941,15 +954,15 @@ impl Parse { .map(|(l, _)| l.clone()) .ok_or_else(|| anyhow!("No language found"))?; - let parse_result = parse::parse_file_at_path( + parse::parse_file_at_path( &mut parser, &language, &path, &name, name.chars().count(), - &options, + &mut options, )?; - update_stats(parse_result); + update_stats(&mut stats); fs::remove_file(path)?; } @@ -961,15 +974,15 @@ impl Parse { let name = "stdin"; let language = loader.select_language(&path, current_dir, None)?; - let parse_result = parse::parse_file_at_path( + parse::parse_file_at_path( &mut parser, &language, &path, name, name.chars().count(), - &options, + &mut options, )?; - update_stats(parse_result); + update_stats(&mut stats); fs::remove_file(path)?; } } @@ -977,7 +990,7 @@ impl Parse { if should_track_stats { println!("\n{}", stats.cumulative_stats); } - if self.output_json_summary { + if self.json { println!("{}", serde_json::to_string_pretty(&stats)?); } diff --git a/cli/src/parse.rs b/cli/src/parse.rs index ba55ab08..67540cce 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -204,10 +204,11 @@ pub struct ParseSummary { pub start: Option, pub end: Option, pub duration: Option, - pub bytes: Option, + pub bytes: Option, } impl ParseSummary { + #[must_use] pub fn new(path: &Path) -> Self { Self { file: path.to_path_buf(), @@ -217,21 +218,12 @@ impl ParseSummary { } } -#[derive(Serialize, Debug)] +#[derive(Serialize, Debug, Default)] pub struct ParseStats { pub parse_summaries: Vec, pub cumulative_stats: Stats, } -impl ParseStats { - pub fn new() -> Self { - Self { - parse_summaries: Vec::new(), - cumulative_stats: Stats::default(), - } - } -} - pub struct ParseFileOptions<'a> { pub edits: &'a [&'a str], pub output: ParseOutput, @@ -260,8 +252,8 @@ pub fn parse_file_at_path( path: &Path, name: &str, max_path_length: usize, - opts: &ParseFileOptions, -) -> Result { + opts: &mut ParseFileOptions, +) -> Result<()> { let mut _log_session = None; parser.set_language(language)?; let mut source_code = fs::read(path).with_context(|| format!("Error reading {name:?}"))?; @@ -398,10 +390,6 @@ pub fn parse_file_at_path( parser.stop_printing_dot_graphs(); - let current_summary = opts.stats.parse_summaries.last_mut().unwrap(); - current_summary.start = Some(tree.root_node().start_position().into()); - current_summary.end = Some(tree.root_node().end_position().into()); - let parse_duration_ms = parse_duration.as_micros() as f64 / 1e3; let edit_duration_ms = edit_duration.as_micros() as f64 / 1e3; let mut cursor = tree.walk(); @@ -656,11 +644,16 @@ pub fn parse_file_at_path( writeln!(&mut stdout)?; } - return Ok(ParseResult { - successful: first_error.is_none(), - bytes: source_code.len(), + opts.stats.parse_summaries.push(ParseSummary { + file: path.to_path_buf(), + successful: true, + start: Some(tree.root_node().start_position().into()), + end: Some(tree.root_node().end_position().into()), duration: Some(parse_duration), + bytes: Some(source_code.len()), }); + + return Ok(()); } parser.stop_printing_dot_graphs(); @@ -675,11 +668,16 @@ pub fn parse_file_at_path( )?; } - Ok(ParseResult { + opts.stats.parse_summaries.push(ParseSummary { + file: path.to_path_buf(), successful: false, - bytes: source_code.len(), + start: None, + end: None, duration: None, - }) + bytes: Some(source_code.len()), + }); + + Ok(()) } const fn escape_invisible(c: char) -> Option<&'static str> {