feat(rust): use thiserror for generate crate

Co-authored-by: Amaan Qureshi <amaanq12@gmail.com>
This commit is contained in:
WillLillis 2024-12-24 19:16:19 -05:00 committed by Amaan Qureshi
parent 5a825a0930
commit 867433afd7
17 changed files with 821 additions and 371 deletions

1
Cargo.lock generated
View file

@ -1870,6 +1870,7 @@ dependencies = [
"serde",
"serde_json",
"smallbitvec",
"thiserror 2.0.9",
"tree-sitter",
"url",
]

View file

@ -29,6 +29,7 @@ semver.workspace = true
serde.workspace = true
serde_json.workspace = true
smallbitvec.workspace = true
thiserror.workspace = true
url.workspace = true
tree-sitter.workspace = true

View file

@ -1,13 +1,13 @@
use std::{
cmp::Ordering,
collections::{BTreeMap, HashMap, HashSet, VecDeque},
fmt::Write,
collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque},
hash::BuildHasherDefault,
};
use anyhow::{anyhow, Result};
use indexmap::{map::Entry, IndexMap};
use rustc_hash::FxHasher;
use serde::Serialize;
use thiserror::Error;
use super::{
item::{ParseItem, ParseItemSet, ParseItemSetCore, ParseItemSetEntry},
@ -64,6 +64,176 @@ struct ParseTableBuilder<'a> {
parse_table: ParseTable,
}
pub type BuildTableResult<T> = Result<T, ParseTableBuilderError>;
#[derive(Debug, Error, Serialize)]
pub enum ParseTableBuilderError {
#[error("Unresolved conflict for symbol sequence:\n\n{0}")]
Conflict(#[from] ConflictError),
#[error("Extra rules must have unambiguous endings. Conflicting rules: {0}")]
AmbiguousExtra(#[from] AmbiguousExtraError),
}
#[derive(Default, Debug, Serialize)]
pub struct ConflictError {
pub symbol_sequence: Vec<String>,
pub conflicting_lookahead: String,
pub possible_interpretations: Vec<Interpretation>,
pub possible_resolutions: Vec<Resolution>,
}
#[derive(Default, Debug, Serialize)]
pub struct Interpretation {
pub preceding_symbols: Vec<String>,
pub variable_name: String,
pub production_step_symbols: Vec<String>,
pub step_index: u32,
pub done: bool,
pub conflicting_lookahead: String,
pub precedence: Option<String>,
pub associativity: Option<String>,
}
#[derive(Debug, Serialize)]
pub enum Resolution {
Precedence { symbols: Vec<String> },
Associativity { symbols: Vec<String> },
AddConflict { symbols: Vec<String> },
}
#[derive(Debug, Serialize)]
pub struct AmbiguousExtraError {
pub parent_symbols: Vec<String>,
}
impl std::fmt::Display for ConflictError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
for symbol in &self.symbol_sequence {
write!(f, " {symbol}")?;
}
writeln!(f, " • {} …\n", self.conflicting_lookahead)?;
writeln!(f, "Possible interpretations:\n")?;
let mut interpretations = self
.possible_interpretations
.iter()
.map(|i| {
let line = i.to_string();
let prec_line = if let (Some(precedence), Some(associativity)) =
(&i.precedence, &i.associativity)
{
Some(format!(
"(precedence: {precedence}, associativity: {associativity})",
))
} else {
i.precedence
.as_ref()
.map(|precedence| format!("(precedence: {precedence})"))
};
(line, prec_line)
})
.collect::<Vec<_>>();
let max_interpretation_length = interpretations
.iter()
.map(|i| i.0.chars().count())
.max()
.unwrap();
interpretations.sort_unstable();
for (i, (line, prec_suffix)) in interpretations.into_iter().enumerate() {
write!(f, " {}:", i + 1).unwrap();
write!(f, "{line}")?;
if let Some(prec_suffix) = prec_suffix {
write!(
f,
"{:1$}",
"",
max_interpretation_length.saturating_sub(line.chars().count()) + 2
)?;
write!(f, "{prec_suffix}")?;
}
writeln!(f)?;
}
writeln!(f, "\nPossible resolutions:\n")?;
for (i, resolution) in self.possible_resolutions.iter().enumerate() {
writeln!(f, " {}: {resolution}", i + 1)?;
}
Ok(())
}
}
impl std::fmt::Display for Interpretation {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
for symbol in &self.preceding_symbols {
write!(f, " {symbol}")?;
}
write!(f, " ({}", self.variable_name)?;
for (i, symbol) in self.production_step_symbols.iter().enumerate() {
if i == self.step_index as usize {
write!(f, "")?;
}
write!(f, " {symbol}")?;
}
write!(f, ")")?;
if self.done {
write!(f, " • {} …", self.conflicting_lookahead)?;
}
Ok(())
}
}
impl std::fmt::Display for Resolution {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Precedence { symbols } => {
write!(f, "Specify a higher precedence in ")?;
for (i, symbol) in symbols.iter().enumerate() {
if i > 0 {
write!(f, " and ")?;
}
write!(f, "`{symbol}`")?;
}
write!(f, " than in the other rules.")?;
}
Self::Associativity { symbols } => {
write!(f, "Specify a left or right associativity in ")?;
for (i, symbol) in symbols.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "`{symbol}`")?;
}
}
Self::AddConflict { symbols } => {
write!(f, "Add a conflict for these rules: ")?;
for (i, symbol) in symbols.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "`{symbol}`")?;
}
}
}
Ok(())
}
}
impl std::fmt::Display for AmbiguousExtraError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
for (i, symbol) in self.parent_symbols.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{symbol}")?;
}
Ok(())
}
}
impl std::error::Error for ConflictError {}
impl std::error::Error for AmbiguousExtraError {}
impl<'a> ParseTableBuilder<'a> {
fn new(
syntax_grammar: &'a SyntaxGrammar,
@ -92,7 +262,7 @@ impl<'a> ParseTableBuilder<'a> {
}
}
fn build(mut self) -> Result<(ParseTable, Vec<ParseStateInfo<'a>>)> {
fn build(mut self) -> BuildTableResult<(ParseTable, Vec<ParseStateInfo<'a>>)> {
// Ensure that the empty alias sequence has index 0.
self.parse_table
.production_infos
@ -222,7 +392,7 @@ impl<'a> ParseTableBuilder<'a> {
mut preceding_auxiliary_symbols: AuxiliarySymbolSequence,
state_id: ParseStateId,
item_set: &ParseItemSet<'a>,
) -> Result<()> {
) -> BuildTableResult<()> {
let mut terminal_successors = BTreeMap::new();
let mut non_terminal_successors = BTreeMap::new();
let mut lookaheads_with_conflicts = TokenSet::new();
@ -426,15 +596,18 @@ impl<'a> ParseTableBuilder<'a> {
}
})
.collect::<HashSet<_>>();
let mut message =
"Extra rules must have unambiguous endings. Conflicting rules: ".to_string();
for (i, variable_index) in parent_symbols.iter().enumerate() {
if i > 0 {
message += ", ";
}
message += &self.syntax_grammar.variables[*variable_index as usize].name;
}
return Err(anyhow!(message));
let parent_symbol_names = parent_symbols
.iter()
.map(|&variable_index| {
self.syntax_grammar.variables[variable_index as usize]
.name
.clone()
})
.collect::<Vec<_>>();
Err(AmbiguousExtraError {
parent_symbols: parent_symbol_names,
})?;
}
}
// Add actions for the start tokens of each non-terminal extra rule.
@ -507,7 +680,7 @@ impl<'a> ParseTableBuilder<'a> {
preceding_auxiliary_symbols: &[AuxiliarySymbolInfo],
conflicting_lookahead: Symbol,
reduction_info: &ReductionInfo,
) -> Result<()> {
) -> BuildTableResult<()> {
let entry = self.parse_table.states[state_id]
.terminal_entries
.get_mut(&conflicting_lookahead)
@ -521,7 +694,7 @@ impl<'a> ParseTableBuilder<'a> {
// precedence, and there can still be SHIFT/REDUCE conflicts.
let mut considered_associativity = false;
let mut shift_precedence = Vec::<(&Precedence, Symbol)>::new();
let mut conflicting_items = HashSet::new();
let mut conflicting_items = BTreeSet::new();
for ParseItemSetEntry {
item, lookaheads, ..
} in &item_set.entries
@ -662,93 +835,55 @@ impl<'a> ParseTableBuilder<'a> {
return Ok(());
}
let mut msg = "Unresolved conflict for symbol sequence:\n\n".to_string();
let mut conflict_error = ConflictError::default();
for symbol in preceding_symbols {
write!(&mut msg, " {}", self.symbol_name(symbol)).unwrap();
conflict_error
.symbol_sequence
.push(self.symbol_name(symbol).to_string());
}
conflict_error.conflicting_lookahead = self.symbol_name(&conflicting_lookahead).to_string();
writeln!(
&mut msg,
" • {} …\n",
self.symbol_name(&conflicting_lookahead)
)
.unwrap();
writeln!(&mut msg, "Possible interpretations:\n").unwrap();
let mut interpretations = conflicting_items
let interpretations = conflicting_items
.iter()
.map(|item| {
let mut line = String::new();
for preceding_symbol in preceding_symbols
let preceding_symbols = preceding_symbols
.iter()
.take(preceding_symbols.len() - item.step_index as usize)
{
write!(&mut line, " {}", self.symbol_name(preceding_symbol)).unwrap();
}
.map(|symbol| self.symbol_name(symbol).to_string())
.collect::<Vec<_>>();
write!(
&mut line,
" ({}",
&self.syntax_grammar.variables[item.variable_index as usize].name
)
.unwrap();
let variable_name = self.syntax_grammar.variables[item.variable_index as usize]
.name
.clone();
for (j, step) in item.production.steps.iter().enumerate() {
if j as u32 == item.step_index {
write!(&mut line, "").unwrap();
}
write!(&mut line, " {}", self.symbol_name(&step.symbol)).unwrap();
}
let production_step_symbols = item
.production
.steps
.iter()
.map(|step| self.symbol_name(&step.symbol).to_string())
.collect::<Vec<_>>();
write!(&mut line, ")").unwrap();
if item.is_done() {
write!(
&mut line,
" • {} …",
self.symbol_name(&conflicting_lookahead)
)
.unwrap();
}
let precedence = item.precedence();
let associativity = item.associativity();
let prec_line = if let Some(associativity) = associativity {
Some(format!(
"(precedence: {precedence}, associativity: {associativity:?})",
))
} else if !precedence.is_none() {
Some(format!("(precedence: {precedence})"))
} else {
None
let precedence = match item.precedence() {
Precedence::None => None,
_ => Some(item.precedence().to_string()),
};
(line, prec_line)
let associativity = item.associativity().map(|assoc| format!("{assoc:?}"));
Interpretation {
preceding_symbols,
variable_name,
production_step_symbols,
step_index: item.step_index,
done: item.is_done(),
conflicting_lookahead: self.symbol_name(&conflicting_lookahead).to_string(),
precedence,
associativity,
}
})
.collect::<Vec<_>>();
conflict_error.possible_interpretations = interpretations;
let max_interpretation_length = interpretations
.iter()
.map(|i| i.0.chars().count())
.max()
.unwrap();
interpretations.sort_unstable();
for (i, (line, prec_suffix)) in interpretations.into_iter().enumerate() {
write!(&mut msg, " {}:", i + 1).unwrap();
msg += &line;
if let Some(prec_suffix) = prec_suffix {
for _ in line.chars().count()..max_interpretation_length {
msg.push(' ');
}
msg += " ";
msg += &prec_suffix;
}
msg.push('\n');
}
let mut resolution_count = 0;
writeln!(&mut msg, "\nPossible resolutions:\n").unwrap();
let mut shift_items = Vec::new();
let mut reduce_items = Vec::new();
for item in conflicting_items {
@ -761,76 +896,57 @@ impl<'a> ParseTableBuilder<'a> {
shift_items.sort_unstable();
reduce_items.sort_unstable();
let list_rule_names = |mut msg: &mut String, items: &[&ParseItem]| {
let get_rule_names = |items: &[&ParseItem]| -> Vec<String> {
let mut last_rule_id = None;
let mut result = Vec::new();
for item in items {
if last_rule_id == Some(item.variable_index) {
continue;
}
if last_rule_id.is_some() {
write!(&mut msg, " and").unwrap();
}
last_rule_id = Some(item.variable_index);
write!(
msg,
" `{}`",
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
)
.unwrap();
result.push(self.symbol_name(&Symbol::non_terminal(item.variable_index as usize)));
}
result
};
if actual_conflict.len() > 1 {
if !shift_items.is_empty() {
resolution_count += 1;
write!(
&mut msg,
" {resolution_count}: Specify a higher precedence in",
)
.unwrap();
list_rule_names(&mut msg, &shift_items);
writeln!(&mut msg, " than in the other rules.").unwrap();
let names = get_rule_names(&shift_items);
conflict_error
.possible_resolutions
.push(Resolution::Precedence { symbols: names });
}
for item in &reduce_items {
resolution_count += 1;
writeln!(
&mut msg,
" {resolution_count}: Specify a higher precedence in `{}` than in the other rules.",
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
)
.unwrap();
let name = self.symbol_name(&Symbol::non_terminal(item.variable_index as usize));
conflict_error
.possible_resolutions
.push(Resolution::Precedence {
symbols: vec![name],
});
}
}
if considered_associativity {
resolution_count += 1;
write!(
&mut msg,
" {resolution_count}: Specify a left or right associativity in",
)
.unwrap();
list_rule_names(&mut msg, &reduce_items);
writeln!(&mut msg).unwrap();
let names = get_rule_names(&reduce_items);
conflict_error
.possible_resolutions
.push(Resolution::Associativity { symbols: names });
}
resolution_count += 1;
write!(
&mut msg,
" {resolution_count}: Add a conflict for these rules: ",
)
.unwrap();
for (i, symbol) in actual_conflict.iter().enumerate() {
if i > 0 {
write!(&mut msg, ", ").unwrap();
}
write!(&mut msg, "`{}`", self.symbol_name(symbol)).unwrap();
}
writeln!(&mut msg).unwrap();
conflict_error
.possible_resolutions
.push(Resolution::AddConflict {
symbols: actual_conflict
.iter()
.map(|s| self.symbol_name(s))
.collect(),
});
Err(anyhow!(msg))
self.actual_conflicts.insert(actual_conflict);
Err(conflict_error)?
}
fn compare_precedence(
@ -999,7 +1115,7 @@ pub fn build_parse_table<'a>(
lexical_grammar: &'a LexicalGrammar,
item_set_builder: ParseItemSetBuilder<'a>,
variable_info: &'a [VariableInfo],
) -> Result<(ParseTable, Vec<ParseStateInfo<'a>>)> {
) -> BuildTableResult<(ParseTable, Vec<ParseStateInfo<'a>>)> {
ParseTableBuilder::new(
syntax_grammar,
lexical_grammar,

View file

@ -8,8 +8,9 @@ mod token_conflicts;
use std::collections::{BTreeSet, HashMap};
use anyhow::Result;
pub use build_lex_table::LARGE_CHARACTER_RANGE_COUNT;
use build_parse_table::BuildTableResult;
pub use build_parse_table::ParseTableBuilderError;
use log::info;
use self::{
@ -42,7 +43,7 @@ pub fn build_tables(
variable_info: &[VariableInfo],
inlines: &InlinedProductionMap,
report_symbol_name: Option<&str>,
) -> Result<Tables> {
) -> BuildTableResult<Tables> {
let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
let following_tokens =
get_following_tokens(syntax_grammar, lexical_grammar, inlines, &item_set_builder);

View file

@ -5,12 +5,15 @@ use std::{
process::{Command, Stdio},
};
use anyhow::{anyhow, Context, Result};
use anyhow::Result;
use build_tables::build_tables;
use grammars::InputGrammar;
use lazy_static::lazy_static;
pub use node_types::VariableInfoError;
use parse_grammar::parse_grammar;
pub use parse_grammar::ParseGrammarError;
use prepare_grammar::prepare_grammar;
pub use prepare_grammar::PrepareGrammarError;
use regex::{Regex, RegexBuilder};
use render::render_c_code;
use semver::Version;
@ -27,6 +30,10 @@ mod render;
mod rules;
mod tables;
pub use build_tables::ParseTableBuilderError;
use serde::Serialize;
use thiserror::Error;
lazy_static! {
static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*")
.multi_line(true)
@ -42,6 +49,88 @@ struct GeneratedParser {
pub const ALLOC_HEADER: &str = include_str!("templates/alloc.h");
pub const ARRAY_HEADER: &str = include_str!("templates/array.h");
pub type GenerateResult<T> = Result<T, GenerateError>;
#[derive(Debug, Error, Serialize)]
pub enum GenerateError {
#[error("Error with specified path -- {0}")]
GrammarPath(String),
#[error("{0}")]
IO(String),
#[error(transparent)]
LoadGrammarFile(#[from] LoadGrammarError),
#[error(transparent)]
ParseGrammar(#[from] ParseGrammarError),
#[error(transparent)]
Prepare(#[from] PrepareGrammarError),
#[error(transparent)]
VariableInfo(#[from] VariableInfoError),
#[error(transparent)]
BuildTables(#[from] ParseTableBuilderError),
}
impl From<std::io::Error> for GenerateError {
fn from(value: std::io::Error) -> Self {
Self::IO(value.to_string())
}
}
pub type LoadGrammarFileResult<T> = Result<T, LoadGrammarError>;
#[derive(Debug, Error, Serialize)]
pub enum LoadGrammarError {
#[error("Path to a grammar file with `.js` or `.json` extension is required")]
InvalidPath,
#[error("Failed to load grammar.js -- {0}")]
LoadJSGrammarFile(#[from] JSError),
#[error("Failed to load grammar.json -- {0}")]
IO(String),
#[error("Unknown grammar file extension: {0:?}")]
FileExtension(PathBuf),
}
impl From<std::io::Error> for LoadGrammarError {
fn from(value: std::io::Error) -> Self {
Self::IO(value.to_string())
}
}
pub type JSResult<T> = Result<T, JSError>;
#[derive(Debug, Error, Serialize)]
pub enum JSError {
#[error("Failed to run `{runtime}` -- {error}")]
JSRuntimeSpawn { runtime: String, error: String },
#[error("Got invalid UTF8 from `{runtime}` -- {error}")]
JSRuntimeUtf8 { runtime: String, error: String },
#[error("`{runtime}` process exited with status {code}")]
JSRuntimeExit { runtime: String, code: i32 },
#[error("{0}")]
IO(String),
#[error("Could not parse this package's version as semver -- {0}")]
Semver(String),
#[error("Failed to serialze grammar JSON -- {0}")]
Serialzation(String),
}
impl From<std::io::Error> for JSError {
fn from(value: std::io::Error) -> Self {
Self::IO(value.to_string())
}
}
impl From<serde_json::Error> for JSError {
fn from(value: serde_json::Error) -> Self {
Self::Serialzation(value.to_string())
}
}
impl From<semver::Error> for JSError {
fn from(value: semver::Error) -> Self {
Self::Semver(value.to_string())
}
}
pub fn generate_parser_in_directory(
repo_path: &Path,
out_path: Option<&str>,
@ -49,7 +138,7 @@ pub fn generate_parser_in_directory(
abi_version: usize,
report_symbol_name: Option<&str>,
js_runtime: Option<&str>,
) -> Result<()> {
) -> GenerateResult<()> {
let mut repo_path = repo_path.to_owned();
let mut grammar_path = grammar_path;
@ -58,7 +147,7 @@ pub fn generate_parser_in_directory(
let path = PathBuf::from(path);
if !path
.try_exists()
.with_context(|| "Some error with specified path")?
.map_err(|e| GenerateError::GrammarPath(e.to_string()))?
{
fs::create_dir_all(&path)?;
grammar_path = None;
@ -79,8 +168,11 @@ pub fn generate_parser_in_directory(
fs::create_dir_all(&header_path)?;
if grammar_path.file_name().unwrap() != "grammar.json" {
fs::write(src_path.join("grammar.json"), &grammar_json)
.with_context(|| format!("Failed to write grammar.json to {src_path:?}"))?;
fs::write(src_path.join("grammar.json"), &grammar_json).map_err(|e| {
GenerateError::IO(format!(
"Failed to write grammar.json to {src_path:?} -- {e}"
))
})?;
}
// Parse and preprocess the grammar.
@ -101,7 +193,7 @@ pub fn generate_parser_in_directory(
Ok(())
}
pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)> {
pub fn generate_parser_for_grammar(grammar_json: &str) -> GenerateResult<(String, String)> {
let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
let input_grammar = parse_grammar(&grammar_json)?;
let parser =
@ -113,7 +205,7 @@ fn generate_parser_for_grammar_with_opts(
input_grammar: &InputGrammar,
abi_version: usize,
report_symbol_name: Option<&str>,
) -> Result<GeneratedParser> {
) -> GenerateResult<GeneratedParser> {
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
prepare_grammar(input_grammar)?;
let variable_info =
@ -149,23 +241,21 @@ fn generate_parser_for_grammar_with_opts(
})
}
pub fn load_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result<String> {
pub fn load_grammar_file(
grammar_path: &Path,
js_runtime: Option<&str>,
) -> LoadGrammarFileResult<String> {
if grammar_path.is_dir() {
return Err(anyhow!(
"Path to a grammar file with `.js` or `.json` extension is required"
));
Err(LoadGrammarError::InvalidPath)?;
}
match grammar_path.extension().and_then(|e| e.to_str()) {
Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime)
.with_context(|| "Failed to load grammar.js")?),
Some("json") => {
Ok(fs::read_to_string(grammar_path).with_context(|| "Failed to load grammar.json")?)
}
_ => Err(anyhow!("Unknown grammar file extension: {grammar_path:?}",)),
Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime)?),
Some("json") => Ok(fs::read_to_string(grammar_path)?),
_ => Err(LoadGrammarError::FileExtension(grammar_path.to_owned()))?,
}
}
fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result<String> {
fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> JSResult<String> {
let grammar_path = fs::canonicalize(grammar_path)?;
#[cfg(windows)]
@ -194,14 +284,17 @@ fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.with_context(|| format!("Failed to run `{js_runtime}`"))?;
.map_err(|e| JSError::JSRuntimeSpawn {
runtime: js_runtime.to_string(),
error: e.to_string(),
})?;
let mut js_stdin = js_process
.stdin
.take()
.with_context(|| format!("Failed to open stdin for {js_runtime}"))?;
let cli_version = Version::parse(env!("CARGO_PKG_VERSION"))
.with_context(|| "Could not parse this package's version as semver.")?;
.ok_or_else(|| JSError::IO(format!("Failed to open stdin for `{js_runtime}`")))?;
let cli_version = Version::parse(env!("CARGO_PKG_VERSION"))?;
write!(
js_stdin,
"globalThis.TREE_SITTER_CLI_VERSION_MAJOR = {};
@ -209,20 +302,28 @@ fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result
globalThis.TREE_SITTER_CLI_VERSION_PATCH = {};",
cli_version.major, cli_version.minor, cli_version.patch,
)
.with_context(|| format!("Failed to write tree-sitter version to {js_runtime}'s stdin"))?;
js_stdin
.write(include_bytes!("./dsl.js"))
.with_context(|| format!("Failed to write grammar dsl to {js_runtime}'s stdin"))?;
.map_err(|e| {
JSError::IO(format!(
"Failed to write tree-sitter version to `{js_runtime}`'s stdin -- {e}"
))
})?;
js_stdin.write(include_bytes!("./dsl.js")).map_err(|e| {
JSError::IO(format!(
"Failed to write grammar dsl to `{js_runtime}`'s stdin -- {e}"
))
})?;
drop(js_stdin);
let output = js_process
.wait_with_output()
.with_context(|| format!("Failed to read output from {js_runtime}"))?;
.map_err(|e| JSError::IO(format!("Failed to read output from `{js_runtime}` -- {e}")))?;
match output.status.code() {
None => panic!("{js_runtime} process was killed"),
None => panic!("`{js_runtime}` process was killed"),
Some(0) => {
let stdout = String::from_utf8(output.stdout)
.with_context(|| format!("Got invalid UTF8 from {js_runtime}"))?;
let stdout = String::from_utf8(output.stdout).map_err(|e| JSError::JSRuntimeUtf8 {
runtime: js_runtime.to_string(),
error: e.to_string(),
})?;
let mut grammar_json = &stdout[..];
@ -237,18 +338,18 @@ fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result
stdout.flush()?;
}
Ok(serde_json::to_string_pretty(
&serde_json::from_str::<serde_json::Value>(grammar_json)
.with_context(|| "Failed to parse grammar JSON")?,
)
.with_context(|| "Failed to serialize grammar JSON")?
+ "\n")
Ok(serde_json::to_string_pretty(&serde_json::from_str::<
serde_json::Value,
>(grammar_json)?)?)
}
Some(code) => Err(anyhow!("{js_runtime} process exited with status {code}")),
Some(code) => Err(JSError::JSRuntimeExit {
runtime: js_runtime.to_string(),
code,
}),
}
}
pub fn write_file(path: &Path, body: impl AsRef<[u8]>) -> Result<()> {
pub fn write_file(path: &Path, body: impl AsRef<[u8]>) -> GenerateResult<()> {
fs::write(path, body)
.with_context(|| format!("Failed to write {:?}", path.file_name().unwrap()))
.map_err(|e| GenerateError::IO(format!("Failed to write {:?} -- {e}", path.file_name())))
}

View file

@ -3,8 +3,9 @@ use std::{
collections::{BTreeMap, HashMap, HashSet},
};
use anyhow::{anyhow, Result};
use anyhow::Result;
use serde::Serialize;
use thiserror::Error;
use super::{
grammars::{LexicalGrammar, SyntaxGrammar, VariableType},
@ -132,6 +133,14 @@ impl ChildQuantity {
}
}
pub type VariableInfoResult<T> = Result<T, VariableInfoError>;
#[derive(Debug, Error, Serialize)]
pub enum VariableInfoError {
#[error("Grammar error: Supertype symbols must always have a single visible child, but `{0}` can have multiple")]
InvalidSupertype(String),
}
/// Compute a summary of the public-facing structure of each variable in the
/// grammar. Each variable in the grammar corresponds to a distinct public-facing
/// node type.
@ -157,7 +166,7 @@ pub fn get_variable_info(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
default_aliases: &AliasMap,
) -> Result<Vec<VariableInfo>> {
) -> VariableInfoResult<Vec<VariableInfo>> {
let child_type_is_visible = |t: &ChildType| {
variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous
};
@ -338,13 +347,7 @@ pub fn get_variable_info(
for supertype_symbol in &syntax_grammar.supertype_symbols {
if result[supertype_symbol.index].has_multi_step_production {
let variable = &syntax_grammar.variables[supertype_symbol.index];
return Err(anyhow!(
concat!(
"Grammar error: Supertype symbols must always ",
"have a single visible child, but `{}` can have multiple"
),
variable.name
));
Err(VariableInfoError::InvalidSupertype(variable.name.clone()))?;
}
}

View file

@ -1,8 +1,9 @@
use std::collections::HashSet;
use anyhow::{anyhow, bail, Result};
use serde::Deserialize;
use anyhow::Result;
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use thiserror::Error;
use super::{
grammars::{InputGrammar, PrecedenceEntry, Variable, VariableType},
@ -104,6 +105,26 @@ pub struct GrammarJSON {
reserved: Map<String, Value>,
}
pub type ParseGrammarResult<T> = Result<T, ParseGrammarError>;
#[derive(Debug, Error, Serialize)]
pub enum ParseGrammarError {
#[error("{0}")]
Serialization(String),
#[error("Rules in the `extras` array must not contain empty strings")]
InvalidExtra,
#[error("Invalid rule in precedences array. Only strings and symbols are allowed")]
Unexpected,
#[error("Reserved word sets must be arrays")]
InvalidReservedWordSet,
}
impl From<serde_json::Error> for ParseGrammarError {
fn from(value: serde_json::Error) -> Self {
Self::Serialization(value.to_string())
}
}
fn rule_is_referenced(rule: &Rule, target: &str) -> bool {
match rule {
Rule::NamedSymbol(name) => name == target,
@ -153,24 +174,22 @@ fn variable_is_used(
result
}
pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
pub(crate) fn parse_grammar(input: &str) -> ParseGrammarResult<InputGrammar> {
let mut grammar_json = serde_json::from_str::<GrammarJSON>(input)?;
let mut extra_symbols =
grammar_json
.extras
.into_iter()
.try_fold(Vec::new(), |mut acc, item| {
.try_fold(Vec::<Rule>::new(), |mut acc, item| {
let rule = parse_rule(item);
if let Rule::String(ref value) = rule {
if value.is_empty() {
return Err(anyhow!(
"Rules in the `extras` array must not contain empty strings"
));
Err(ParseGrammarError::InvalidExtra)?;
}
}
acc.push(rule);
Ok(acc)
ParseGrammarResult::Ok(acc)
})?;
let mut external_tokens = grammar_json
@ -186,11 +205,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
ordering.push(match entry {
RuleJSON::STRING { value } => PrecedenceEntry::Name(value),
RuleJSON::SYMBOL { name } => PrecedenceEntry::Symbol(name),
_ => {
return Err(anyhow!(
"Invalid rule in precedences array. Only strings and symbols are allowed"
))
}
_ => Err(ParseGrammarError::Unexpected)?,
});
}
precedence_orderings.push(ordering);
@ -202,7 +217,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
.rules
.into_iter()
.map(|(n, r)| Ok((n, parse_rule(serde_json::from_value(r)?))))
.collect::<Result<Vec<_>>>()?;
.collect::<ParseGrammarResult<Vec<_>>>()?;
let mut in_progress = HashSet::new();
@ -243,19 +258,18 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
let mut reserved_words = Vec::new();
let Value::Array(rule_values) = rule_values else {
bail!("reserved word sets must be arrays");
Err(ParseGrammarError::InvalidReservedWordSet)?
};
for value in rule_values {
let rule_json: RuleJSON = serde_json::from_value(value)?;
reserved_words.push(parse_rule(rule_json));
reserved_words.push(parse_rule(serde_json::from_value(value)?));
}
Ok(ReservedWordContext {
name,
reserved_words,
})
})
.collect::<Result<Vec<_>>>()?;
.collect::<ParseGrammarResult<Vec<_>>>()?;
Ok(InputGrammar {
name: grammar_json.name,

View file

@ -1,9 +1,10 @@
use anyhow::{anyhow, Context, Result};
use indoc::indoc;
use anyhow::Result;
use regex_syntax::{
hir::{Class, Hir, HirKind},
ParserBuilder,
};
use serde::Serialize;
use thiserror::Error;
use super::ExtractedLexicalGrammar;
use crate::{
@ -18,6 +19,40 @@ struct NfaBuilder {
precedence_stack: Vec<i32>,
}
pub type ExpandTokensResult<T> = Result<T, ExpandTokensError>;
#[derive(Debug, Error, Serialize)]
pub enum ExpandTokensError {
#[error(
"The rule `{0}` matches the empty string.
Tree-sitter does not support syntactic rules that match the empty string
unless they are used only as the grammar's start rule.
"
)]
EmptyString(String),
#[error(transparent)]
Processing(ExpandTokensProcessingError),
#[error(transparent)]
ExpandRule(ExpandRuleError),
}
#[derive(Debug, Error, Serialize)]
pub struct ExpandTokensProcessingError {
rule: String,
error: ExpandRuleError,
}
impl std::fmt::Display for ExpandTokensProcessingError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(
f,
"Error processing rule {}: Grammar error: Unexpected rule {:?}",
self.rule, self.error
)?;
Ok(())
}
}
fn get_implicit_precedence(rule: &Rule) -> i32 {
match rule {
Rule::String(_) => 2,
@ -41,7 +76,7 @@ const fn get_completion_precedence(rule: &Rule) -> i32 {
0
}
pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> ExpandTokensResult<LexicalGrammar> {
let mut builder = NfaBuilder {
nfa: Nfa::new(),
is_sep: true,
@ -58,14 +93,7 @@ pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGram
let mut variables = Vec::new();
for (i, variable) in grammar.variables.into_iter().enumerate() {
if variable.rule.is_empty() {
return Err(anyhow!(
indoc! {"
The rule `{}` matches the empty string.
Tree-sitter does not support syntactic rules that match the empty string
unless they are used only as the grammar's start rule.
"},
variable.name
));
Err(ExpandTokensError::EmptyString(variable.name.clone()))?;
}
let is_immediate_token = match &variable.rule {
@ -81,12 +109,19 @@ pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGram
let last_state_id = builder.nfa.last_state_id();
builder
.expand_rule(&variable.rule, last_state_id)
.with_context(|| format!("Error processing rule {}", variable.name))?;
.map_err(|e| {
ExpandTokensError::Processing(ExpandTokensProcessingError {
rule: variable.name.clone(),
error: e,
})
})?;
if !is_immediate_token {
builder.is_sep = true;
let last_state_id = builder.nfa.last_state_id();
builder.expand_rule(&separator_rule, last_state_id)?;
builder
.expand_rule(&separator_rule, last_state_id)
.map_err(ExpandTokensError::ExpandRule)?;
}
variables.push(LexicalVariable {
@ -103,8 +138,30 @@ pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGram
})
}
pub type ExpandRuleResult<T> = Result<T, ExpandRuleError>;
#[derive(Debug, Error, Serialize)]
pub enum ExpandRuleError {
#[error("Grammar error: Unexpected rule {0:?}")]
UnexpectedRule(Rule),
#[error("{0}")]
Parse(String),
#[error(transparent)]
ExpandRegex(ExpandRegexError),
}
pub type ExpandRegexResult<T> = Result<T, ExpandRegexError>;
#[derive(Debug, Error, Serialize)]
pub enum ExpandRegexError {
#[error("{0}")]
Utf8(String),
#[error("Regex error: Assertions are not supported")]
Assertion,
}
impl NfaBuilder {
fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> Result<bool> {
fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> ExpandRuleResult<bool> {
match rule {
Rule::Pattern(s, f) => {
// With unicode enabled, `\w`, `\s` and `\d` expand to character sets that are much
@ -124,8 +181,11 @@ impl NfaBuilder {
.unicode(true)
.utf8(false)
.build();
let hir = parser.parse(&s)?;
let hir = parser
.parse(&s)
.map_err(|e| ExpandRuleError::Parse(e.to_string()))?;
self.expand_regex(&hir, next_state_id)
.map_err(ExpandRuleError::ExpandRegex)
}
Rule::String(s) => {
for c in s.chars().rev() {
@ -189,15 +249,19 @@ impl NfaBuilder {
result
}
Rule::Blank => Ok(false),
_ => Err(anyhow!("Grammar error: Unexpected rule {rule:?}")),
_ => Err(ExpandRuleError::UnexpectedRule(rule.clone()))?,
}
}
fn expand_regex(&mut self, hir: &Hir, mut next_state_id: u32) -> Result<bool> {
fn expand_regex(&mut self, hir: &Hir, mut next_state_id: u32) -> ExpandRegexResult<bool> {
match hir.kind() {
HirKind::Empty => Ok(false),
HirKind::Literal(literal) => {
for character in std::str::from_utf8(&literal.0)?.chars().rev() {
for character in std::str::from_utf8(&literal.0)
.map_err(|e| ExpandRegexError::Utf8(e.to_string()))?
.chars()
.rev()
{
let char_set = CharacterSet::from_char(character);
self.push_advance(char_set, next_state_id);
next_state_id = self.nfa.last_state_id();
@ -234,7 +298,7 @@ impl NfaBuilder {
Ok(true)
}
},
HirKind::Look(_) => Err(anyhow!("Regex error: Assertions are not supported")),
HirKind::Look(_) => Err(ExpandRegexError::Assertion)?,
HirKind::Repetition(repetition) => match (repetition.min, repetition.max) {
(0, Some(1)) => self.expand_zero_or_one(&repetition.sub, next_state_id),
(1, None) => self.expand_one_or_more(&repetition.sub, next_state_id),
@ -293,7 +357,7 @@ impl NfaBuilder {
}
}
fn expand_one_or_more(&mut self, hir: &Hir, next_state_id: u32) -> Result<bool> {
fn expand_one_or_more(&mut self, hir: &Hir, next_state_id: u32) -> ExpandRegexResult<bool> {
self.nfa.states.push(NfaState::Accept {
variable_index: 0,
precedence: 0,
@ -309,7 +373,7 @@ impl NfaBuilder {
}
}
fn expand_zero_or_one(&mut self, hir: &Hir, next_state_id: u32) -> Result<bool> {
fn expand_zero_or_one(&mut self, hir: &Hir, next_state_id: u32) -> ExpandRegexResult<bool> {
if self.expand_regex(hir, next_state_id)? {
self.push_split(next_state_id);
Ok(true)
@ -318,7 +382,7 @@ impl NfaBuilder {
}
}
fn expand_zero_or_more(&mut self, hir: &Hir, next_state_id: u32) -> Result<bool> {
fn expand_zero_or_more(&mut self, hir: &Hir, next_state_id: u32) -> ExpandRegexResult<bool> {
if self.expand_one_or_more(hir, next_state_id)? {
self.push_split(next_state_id);
Ok(true)
@ -327,7 +391,12 @@ impl NfaBuilder {
}
}
fn expand_count(&mut self, hir: &Hir, count: u32, mut next_state_id: u32) -> Result<bool> {
fn expand_count(
&mut self,
hir: &Hir,
count: u32,
mut next_state_id: u32,
) -> ExpandRegexResult<bool> {
let mut result = false;
for _ in 0..count {
if self.expand_regex(hir, next_state_id)? {

View file

@ -1,6 +1,8 @@
use std::collections::HashMap;
use anyhow::{anyhow, Result};
use anyhow::Result;
use serde::Serialize;
use thiserror::Error;
use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
use crate::{
@ -8,9 +10,31 @@ use crate::{
rules::{MetadataParams, Rule, Symbol, SymbolType},
};
pub type ExtractTokensResult<T> = Result<T, ExtractTokensError>;
#[derive(Debug, Error, Serialize)]
pub enum ExtractTokensError {
#[error(
"The rule `{0}` contains an empty string.
Tree-sitter does not support syntactic rules that contain an empty string
unless they are used only as the grammar's start rule.
"
)]
EmptyString(String),
#[error("Rule '{0}' cannot be used as both an external token and a non-terminal rule")]
ExternalTokenNonTerminal(String),
#[error("Non-symbol rules cannot be used as external tokens")]
NonSymbolExternalToken,
#[error("Non-terminal symbol '{0}' cannot be used as the word token")]
NonTerminalWordToken(String),
#[error("Reserved words must be tokens")]
NonTokenReservedWord,
}
pub(super) fn extract_tokens(
mut grammar: InternedGrammar,
) -> Result<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> {
) -> ExtractTokensResult<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> {
let mut extractor = TokenExtractor {
current_variable_name: String::new(),
current_variable_token_count: 0,
@ -110,10 +134,9 @@ pub(super) fn extract_tokens(
let rule = symbol_replacer.replace_symbols_in_rule(&external_token.rule);
if let Rule::Symbol(symbol) = rule {
if symbol.is_non_terminal() {
return Err(anyhow!(
"Rule '{}' cannot be used as both an external token and a non-terminal rule",
&variables[symbol.index].name,
));
Err(ExtractTokensError::ExternalTokenNonTerminal(
variables[symbol.index].name.clone(),
))?;
}
if symbol.is_external() {
@ -130,9 +153,7 @@ pub(super) fn extract_tokens(
});
}
} else {
return Err(anyhow!(
"Non-symbol rules cannot be used as external tokens"
));
Err(ExtractTokensError::NonSymbolExternalToken)?;
}
}
@ -140,10 +161,9 @@ pub(super) fn extract_tokens(
if let Some(token) = grammar.word_token {
let token = symbol_replacer.replace_symbol(token);
if token.is_non_terminal() {
return Err(anyhow!(
"Non-terminal symbol '{}' cannot be used as the word token",
&variables[token.index].name
));
Err(ExtractTokensError::NonTerminalWordToken(
variables[token.index].name.clone(),
))?;
}
word_token = Some(token);
}
@ -160,7 +180,7 @@ pub(super) fn extract_tokens(
{
reserved_words.push(Symbol::terminal(index));
} else {
return Err(anyhow!("Reserved words must be tokens"));
Err(ExtractTokensError::NonTokenReservedWord)?;
}
}
reserved_word_contexts.push(ReservedWordContext {
@ -205,7 +225,7 @@ impl TokenExtractor {
&mut self,
is_first: bool,
variable: &mut Variable,
) -> Result<()> {
) -> ExtractTokensResult<()> {
self.current_variable_name.clear();
self.current_variable_name.push_str(&variable.name);
self.current_variable_token_count = 0;
@ -214,7 +234,7 @@ impl TokenExtractor {
Ok(())
}
fn extract_tokens_in_rule(&mut self, input: &Rule) -> Result<Rule> {
fn extract_tokens_in_rule(&mut self, input: &Rule) -> ExtractTokensResult<Rule> {
match input {
Rule::String(name) => Ok(self.extract_token(input, Some(name))?.into()),
Rule::Pattern(..) => Ok(self.extract_token(input, None)?.into()),
@ -249,13 +269,13 @@ impl TokenExtractor {
elements
.iter()
.map(|e| self.extract_tokens_in_rule(e))
.collect::<Result<Vec<_>>>()?,
.collect::<ExtractTokensResult<Vec<_>>>()?,
)),
Rule::Choice(elements) => Ok(Rule::Choice(
elements
.iter()
.map(|e| self.extract_tokens_in_rule(e))
.collect::<Result<Vec<_>>>()?,
.collect::<ExtractTokensResult<Vec<_>>>()?,
)),
Rule::Reserved { rule, context_name } => Ok(Rule::Reserved {
rule: Box::new(self.extract_tokens_in_rule(rule)?),
@ -265,7 +285,11 @@ impl TokenExtractor {
}
}
fn extract_token(&mut self, rule: &Rule, string_value: Option<&String>) -> Result<Symbol> {
fn extract_token(
&mut self,
rule: &Rule,
string_value: Option<&String>,
) -> ExtractTokensResult<Symbol> {
for (i, variable) in self.extracted_variables.iter_mut().enumerate() {
if variable.rule == *rule {
self.extracted_usage_counts[i] += 1;
@ -276,14 +300,9 @@ impl TokenExtractor {
let index = self.extracted_variables.len();
let variable = if let Some(string_value) = string_value {
if string_value.is_empty() && !self.is_first_rule {
return Err(anyhow!(
"The rule `{}` contains an empty string.
Tree-sitter does not support syntactic rules that contain an empty string
unless they are used only as the grammar's start rule.
",
self.current_variable_name
));
Err(ExtractTokensError::EmptyString(
self.current_variable_name.clone(),
))?;
}
Variable {
name: string_value.clone(),

View file

@ -1,7 +1,8 @@
use std::collections::HashMap;
use anyhow::{anyhow, Result};
use indoc::indoc;
use anyhow::Result;
use serde::Serialize;
use thiserror::Error;
use super::ExtractedSyntaxGrammar;
use crate::{
@ -11,6 +12,24 @@ use crate::{
rules::{Alias, Associativity, Precedence, Rule, Symbol, TokenSet},
};
pub type FlattenGrammarResult<T> = Result<T, FlattenGrammarError>;
#[derive(Debug, Error, Serialize)]
pub enum FlattenGrammarError {
#[error("No such reserved word set: {0}")]
NoReservedWordSet(String),
#[error(
"The rule `{0}` matches the empty string.
Tree-sitter does not support syntactic rules that match the empty string
unless they are used only as the grammar's start rule.
"
)]
EmptyString(String),
#[error("Rule `{0}` cannot be inlined because it contains a reference to itself")]
RecursiveInline(String),
}
struct RuleFlattener {
production: Production,
reserved_word_set_ids: HashMap<String, ReservedWordSetId>,
@ -37,7 +56,7 @@ impl RuleFlattener {
}
}
fn flatten_variable(&mut self, variable: Variable) -> Result<SyntaxVariable> {
fn flatten_variable(&mut self, variable: Variable) -> FlattenGrammarResult<SyntaxVariable> {
let mut productions = Vec::new();
for rule in extract_choices(variable.rule) {
let production = self.flatten_rule(rule)?;
@ -52,7 +71,7 @@ impl RuleFlattener {
})
}
fn flatten_rule(&mut self, rule: Rule) -> Result<Production> {
fn flatten_rule(&mut self, rule: Rule) -> FlattenGrammarResult<Production> {
self.production = Production::default();
self.alias_stack.clear();
self.reserved_word_stack.clear();
@ -63,7 +82,7 @@ impl RuleFlattener {
Ok(self.production.clone())
}
fn apply(&mut self, rule: Rule, at_end: bool) -> Result<bool> {
fn apply(&mut self, rule: Rule, at_end: bool) -> FlattenGrammarResult<bool> {
match rule {
Rule::Seq(members) => {
let mut result = false;
@ -138,7 +157,9 @@ impl RuleFlattener {
self.reserved_word_set_ids
.get(&context_name)
.copied()
.ok_or_else(|| anyhow!("no such reserved word set: {context_name}"))?,
.ok_or_else(|| {
FlattenGrammarError::NoReservedWordSet(context_name.clone())
})?,
);
let did_push = self.apply(*rule, at_end)?;
self.reserved_word_stack.pop();
@ -224,7 +245,9 @@ fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool {
false
}
pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
pub(super) fn flatten_grammar(
grammar: ExtractedSyntaxGrammar,
) -> FlattenGrammarResult<SyntaxGrammar> {
let mut reserved_word_set_ids_by_name = HashMap::new();
for (ix, set) in grammar.reserved_word_sets.iter().enumerate() {
reserved_word_set_ids_by_name.insert(set.name.clone(), ReservedWordSetId(ix));
@ -235,31 +258,20 @@ pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxG
.variables
.into_iter()
.map(|variable| flattener.flatten_variable(variable))
.collect::<Result<Vec<_>>>()?;
.collect::<FlattenGrammarResult<Vec<_>>>()?;
for (i, variable) in variables.iter().enumerate() {
let symbol = Symbol::non_terminal(i);
for production in &variable.productions {
if production.steps.is_empty() && symbol_is_used(&variables, symbol) {
return Err(anyhow!(
indoc! {"
The rule `{}` matches the empty string.
Tree-sitter does not support syntactic rules that match the empty string
unless they are used only as the grammar's start rule.
"},
variable.name
));
Err(FlattenGrammarError::EmptyString(variable.name.clone()))?;
}
if grammar.variables_to_inline.contains(&symbol)
&& production.steps.iter().any(|step| step.symbol == symbol)
{
return Err(anyhow!(
"Rule `{}` cannot be inlined because it contains a reference to itself.",
variable.name,
));
Err(FlattenGrammarError::RecursiveInline(variable.name.clone()))?;
}
}
}

View file

@ -1,4 +1,6 @@
use anyhow::{anyhow, Result};
use anyhow::Result;
use serde::Serialize;
use thiserror::Error;
use super::InternedGrammar;
use crate::{
@ -6,11 +8,27 @@ use crate::{
rules::{Rule, Symbol},
};
pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar> {
pub type InternSymbolsResult<T> = Result<T, InternSymbolsError>;
#[derive(Debug, Error, Serialize)]
pub enum InternSymbolsError {
#[error("A grammar's start rule must be visible.")]
HiddenStartRule,
#[error("Undefined symbol `{0}`")]
Undefined(String),
#[error("Undefined symbol `{0}` in grammar's supertypes array")]
UndefinedSupertype(String),
#[error("Undefined symbol `{0}` in grammar's conflicts array")]
UndefinedConflict(String),
#[error("Undefined symbol `{0}` as grammar's word token")]
UndefinedWordToken(String),
}
pub(super) fn intern_symbols(grammar: &InputGrammar) -> InternSymbolsResult<InternedGrammar> {
let interner = Interner { grammar };
if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
return Err(anyhow!("A grammar's start rule must be visible."));
Err(InternSymbolsError::HiddenStartRule)?;
}
let mut variables = Vec::with_capacity(grammar.variables.len());
@ -41,7 +59,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len());
for supertype_symbol_name in &grammar.supertype_symbols {
supertype_symbols.push(interner.intern_name(supertype_symbol_name).ok_or_else(|| {
anyhow!("Undefined symbol `{supertype_symbol_name}` in grammar's supertypes array")
InternSymbolsError::UndefinedSupertype(supertype_symbol_name.clone())
})?);
}
@ -61,9 +79,11 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
for conflict in &grammar.expected_conflicts {
let mut interned_conflict = Vec::with_capacity(conflict.len());
for name in conflict {
interned_conflict.push(interner.intern_name(name).ok_or_else(|| {
anyhow!("Undefined symbol `{name}` in grammar's conflicts array")
})?);
interned_conflict.push(
interner
.intern_name(name)
.ok_or_else(|| InternSymbolsError::UndefinedConflict(name.clone()))?,
);
}
expected_conflicts.push(interned_conflict);
}
@ -80,7 +100,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
word_token = Some(
interner
.intern_name(name)
.ok_or_else(|| anyhow!("Undefined symbol `{name}` as grammar's word token"))?,
.ok_or_else(|| InternSymbolsError::UndefinedWordToken(name.clone()))?,
);
}
@ -108,7 +128,7 @@ struct Interner<'a> {
}
impl Interner<'_> {
fn intern_rule(&self, rule: &Rule, name: Option<&str>) -> Result<Rule> {
fn intern_rule(&self, rule: &Rule, name: Option<&str>) -> InternSymbolsResult<Rule> {
match rule {
Rule::Choice(elements) => {
self.check_single(elements, name);
@ -136,7 +156,7 @@ impl Interner<'_> {
context_name: context_name.clone(),
}),
Rule::NamedSymbol(name) => self.intern_name(name).map_or_else(
|| Err(anyhow!("Undefined symbol `{name}`")),
|| Err(InternSymbolsError::Undefined(name.clone())),
|symbol| Ok(Rule::Symbol(symbol)),
),
_ => Ok(rule.clone()),

View file

@ -12,7 +12,14 @@ use std::{
mem,
};
use anyhow::{anyhow, Result};
use anyhow::Result;
pub use expand_tokens::ExpandTokensError;
pub use extract_tokens::ExtractTokensError;
pub use flatten_grammar::FlattenGrammarError;
pub use intern_symbols::InternSymbolsError;
pub use process_inlines::ProcessInlinesError;
use serde::Serialize;
use thiserror::Error;
pub use self::expand_tokens::expand_tokens;
use self::{
@ -67,11 +74,67 @@ impl<T, U> Default for IntermediateGrammar<T, U> {
}
}
pub type PrepareGrammarResult<T> = Result<T, PrepareGrammarError>;
#[derive(Debug, Error, Serialize)]
#[error(transparent)]
pub enum PrepareGrammarError {
ValidatePrecedences(#[from] ValidatePrecedenceError),
InternSymbols(#[from] InternSymbolsError),
ExtractTokens(#[from] ExtractTokensError),
FlattenGrammar(#[from] FlattenGrammarError),
ExpandTokens(#[from] ExpandTokensError),
ProcessInlines(#[from] ProcessInlinesError),
}
pub type ValidatePrecedenceResult<T> = Result<T, ValidatePrecedenceError>;
#[derive(Debug, Error, Serialize)]
#[error(transparent)]
pub enum ValidatePrecedenceError {
Undeclared(#[from] UndeclaredPrecedenceError),
Ordering(#[from] ConflictingPrecedenceOrderingError),
}
#[derive(Debug, Error, Serialize)]
pub struct UndeclaredPrecedenceError {
pub precedence: String,
pub rule: String,
}
impl std::fmt::Display for UndeclaredPrecedenceError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Undeclared precedence '{}' in rule '{}'",
self.precedence, self.rule
)?;
Ok(())
}
}
#[derive(Debug, Error, Serialize)]
pub struct ConflictingPrecedenceOrderingError {
pub precedence_1: String,
pub precedence_2: String,
}
impl std::fmt::Display for ConflictingPrecedenceOrderingError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Conflicting orderings for precedences {} and {}",
self.precedence_1, self.precedence_2
)?;
Ok(())
}
}
/// Transform an input grammar into separate components that are ready
/// for parse table construction.
pub fn prepare_grammar(
input_grammar: &InputGrammar,
) -> Result<(
) -> PrepareGrammarResult<(
SyntaxGrammar,
LexicalGrammar,
InlinedProductionMap,
@ -92,10 +155,14 @@ pub fn prepare_grammar(
/// Check that all of the named precedences used in the grammar are declared
/// within the `precedences` lists, and also that there are no conflicting
/// precedence orderings declared in those lists.
fn validate_precedences(grammar: &InputGrammar) -> Result<()> {
fn validate_precedences(grammar: &InputGrammar) -> ValidatePrecedenceResult<()> {
// Check that no rule contains a named precedence that is not present in
// any of the `precedences` lists.
fn validate(rule_name: &str, rule: &Rule, names: &HashSet<&String>) -> Result<()> {
fn validate(
rule_name: &str,
rule: &Rule,
names: &HashSet<&String>,
) -> ValidatePrecedenceResult<()> {
match rule {
Rule::Repeat(rule) => validate(rule_name, rule, names),
Rule::Seq(elements) | Rule::Choice(elements) => elements
@ -104,7 +171,10 @@ fn validate_precedences(grammar: &InputGrammar) -> Result<()> {
Rule::Metadata { rule, params } => {
if let Precedence::Name(n) = &params.precedence {
if !names.contains(n) {
return Err(anyhow!("Undeclared precedence '{n}' in rule '{rule_name}'"));
Err(UndeclaredPrecedenceError {
precedence: n.to_string(),
rule: rule_name.to_string(),
})?;
}
}
validate(rule_name, rule, names)?;
@ -134,9 +204,10 @@ fn validate_precedences(grammar: &InputGrammar) -> Result<()> {
}
hash_map::Entry::Occupied(e) => {
if e.get() != &ordering {
return Err(anyhow!(
"Conflicting orderings for precedences {entry1} and {entry2}",
));
Err(ConflictingPrecedenceOrderingError {
precedence_1: entry1.to_string(),
precedence_2: entry2.to_string(),
})?;
}
}
}

View file

@ -1,6 +1,8 @@
use std::collections::HashMap;
use anyhow::{anyhow, Result};
use anyhow::Result;
use serde::Serialize;
use thiserror::Error;
use crate::{
grammars::{InlinedProductionMap, LexicalGrammar, Production, ProductionStep, SyntaxGrammar},
@ -187,29 +189,38 @@ impl InlinedProductionMapBuilder {
}
}
pub type ProcessInlinesResult<T> = Result<T, ProcessInlinesError>;
#[derive(Debug, Error, Serialize)]
pub enum ProcessInlinesError {
#[error("External token `{0}` cannot be inlined")]
ExternalToken(String),
#[error("Token `{0}` cannot be inlined")]
Token(String),
#[error("Rule `{0}` cannot be inlined because it is the first rule")]
FirstRule(String),
}
pub(super) fn process_inlines(
grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
) -> Result<InlinedProductionMap> {
) -> ProcessInlinesResult<InlinedProductionMap> {
for symbol in &grammar.variables_to_inline {
match symbol.kind {
SymbolType::External => {
return Err(anyhow!(
"External token `{}` cannot be inlined",
grammar.external_tokens[symbol.index].name
))
Err(ProcessInlinesError::ExternalToken(
grammar.external_tokens[symbol.index].name.clone(),
))?;
}
SymbolType::Terminal => {
return Err(anyhow!(
"Token `{}` cannot be inlined",
lexical_grammar.variables[symbol.index].name,
))
Err(ProcessInlinesError::Token(
lexical_grammar.variables[symbol.index].name.clone(),
))?;
}
SymbolType::NonTerminal if symbol.index == 0 => {
return Err(anyhow!(
"Rule `{}` cannot be inlined because it is the first rule",
grammar.variables[symbol.index].name,
))
Err(ProcessInlinesError::FirstRule(
grammar.variables[symbol.index].name.clone(),
))?;
}
_ => {}
}

View file

@ -1,10 +1,11 @@
use std::{collections::HashMap, fmt};
use serde::Serialize;
use smallbitvec::SmallBitVec;
use super::grammars::VariableType;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
pub enum SymbolType {
External,
End,
@ -13,19 +14,19 @@ pub enum SymbolType {
NonTerminal,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
pub enum Associativity {
Left,
Right,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
pub struct Alias {
pub value: String,
pub is_named: bool,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Default, Serialize)]
pub enum Precedence {
#[default]
None,
@ -35,7 +36,7 @@ pub enum Precedence {
pub type AliasMap = HashMap<Symbol, Alias>;
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Serialize)]
pub struct MetadataParams {
pub precedence: Precedence,
pub dynamic_precedence: i32,
@ -46,13 +47,13 @@ pub struct MetadataParams {
pub field_name: Option<String>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
pub struct Symbol {
pub kind: SymbolType,
pub index: usize,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize)]
pub enum Rule {
Blank,
String(String),

View file

@ -381,7 +381,8 @@ pub fn generate_grammar_files(
let Some(opts) = opts else { unreachable!() };
let tree_sitter_json = opts.clone().to_tree_sitter_json();
write_file(path, serde_json::to_string_pretty(&tree_sitter_json)?)
write_file(path, serde_json::to_string_pretty(&tree_sitter_json)?)?;
Ok(())
},
|path| {
// updating the config, if needed
@ -523,10 +524,9 @@ pub fn generate_grammar_files(
|path| {
let contents = fs::read_to_string(path)?;
if contents.contains("fs.exists(") {
write_file(path, contents.replace("fs.exists(", "fs.existsSync("))
} else {
Ok(())
write_file(path, contents.replace("fs.exists(", "fs.existsSync("))?;
}
Ok(())
},
)?;
@ -566,10 +566,9 @@ pub fn generate_grammar_files(
let contents = fs::read_to_string(path)?;
let old = "add_custom_target(test";
if contents.contains(old) {
write_file(path, contents.replace(old, "add_custom_target(ts-test"))
} else {
Ok(())
write_file(path, contents.replace(old, "add_custom_target(ts-test"))?;
}
Ok(())
},
)?;
@ -671,10 +670,9 @@ pub fn generate_grammar_files(
"egg_info": EggInfo,
"#},
);
write_file(path, contents)
} else {
Ok(())
write_file(path, contents)?;
}
Ok(())
},
)?;
@ -951,7 +949,8 @@ fn generate_file(
}
}
write_file(path, replacement)
write_file(path, replacement)?;
Ok(())
}
fn create_dir(path: &Path) -> Result<()> {

View file

@ -22,7 +22,7 @@ use tree_sitter_cli::{
init::{generate_grammar_files, get_root_path, migrate_package_json, JsonConfigOpts},
input::{get_input, get_tmp_source_file, CliInput},
logger,
parse::{self, ParseFileOptions, ParseOutput, ParseResult, ParseTheme},
parse::{self, ParseFileOptions, ParseOutput, ParseTheme},
playground, query,
tags::{self, TagsOptions},
test::{self, TestOptions, TestStats},
@ -121,6 +121,9 @@ struct Generate {
/// Produce a report of the states for the given rule, use `-` to report every rule
#[arg(long)]
pub report_states_for_rule: Option<String>,
/// Report conflicts in a JSON format
#[arg(long)]
pub json: bool,
/// The name or path of the JavaScript runtime to use for generating parsers
#[arg(
long,
@ -215,7 +218,7 @@ struct Parse {
pub open_log: bool,
/// Output parsing results in a JSON format
#[arg(long, short = 'j')]
pub output_json_summary: bool,
pub json: bool,
/// The path to an alternative config.json file
#[arg(long)]
pub config_path: Option<PathBuf>,
@ -729,14 +732,22 @@ impl Generate {
version.parse().expect("invalid abi version flag")
}
});
tree_sitter_generate::generate_parser_in_directory(
if let Err(err) = tree_sitter_generate::generate_parser_in_directory(
current_dir,
self.output.as_deref(),
self.grammar_path.as_deref(),
abi_version,
self.report_states_for_rule.as_deref(),
self.js_runtime.as_deref(),
)?;
) {
if self.json {
eprintln!("{}", serde_json::to_string_pretty(&err)?);
// Exit early to prevent errors from being printed a second time in the caller
std::process::exit(1);
} else {
return Err(err.into());
}
}
if self.build {
if let Some(path) = self.libdir {
loader = loader::Loader::with_parser_lib_path(PathBuf::from(path));
@ -815,7 +826,7 @@ impl Parse {
ParseOutput::Xml
} else if self.output_cst {
ParseOutput::Cst
} else if self.quiet || self.output_json_summary {
} else if self.quiet || self.json {
ParseOutput::Quiet
} else {
ParseOutput::Normal
@ -862,9 +873,9 @@ impl Parse {
loader.find_all_languages(&loader_config)?;
let should_track_stats = self.stat;
let mut stats = parse::ParseStats::new();
let mut stats = parse::ParseStats::default();
let options = ParseFileOptions {
let mut options = ParseFileOptions {
edits: &edits
.iter()
.map(std::string::String::as_str)
@ -872,6 +883,7 @@ impl Parse {
output,
print_time: time,
timeout,
stats: &mut stats,
debug: self.debug,
debug_graph: self.debug_graph,
cancellation_flag: Some(&cancellation_flag),
@ -881,14 +893,15 @@ impl Parse {
parse_theme: &parse_theme,
};
let mut update_stats = |parse_result: ParseResult| {
let mut update_stats = |stats: &mut parse::ParseStats| {
let parse_result = stats.parse_summaries.last().unwrap();
if should_track_stats {
stats.total_parses += 1;
stats.cumulative_stats.total_parses += 1;
if parse_result.successful {
stats.cumulative_stats.successful_parses += 1;
}
if let Some(duration) = parse_result.duration {
stats.cumulative_stats.total_bytes += parse_result.bytes;
if let (Some(duration), Some(bytes)) = (parse_result.duration, parse_result.bytes) {
stats.cumulative_stats.total_bytes += bytes;
stats.cumulative_stats.total_duration += duration;
}
}
@ -915,15 +928,15 @@ impl Parse {
let language =
loader.select_language(path, current_dir, self.scope.as_deref())?;
let parse_result = parse::parse_file_at_path(
parse::parse_file_at_path(
&mut parser,
&language,
path,
&path.display().to_string(),
max_path_length,
&options,
&mut options,
)?;
update_stats(parse_result);
update_stats(options.stats);
}
}
@ -941,15 +954,15 @@ impl Parse {
.map(|(l, _)| l.clone())
.ok_or_else(|| anyhow!("No language found"))?;
let parse_result = parse::parse_file_at_path(
parse::parse_file_at_path(
&mut parser,
&language,
&path,
&name,
name.chars().count(),
&options,
&mut options,
)?;
update_stats(parse_result);
update_stats(&mut stats);
fs::remove_file(path)?;
}
@ -961,15 +974,15 @@ impl Parse {
let name = "stdin";
let language = loader.select_language(&path, current_dir, None)?;
let parse_result = parse::parse_file_at_path(
parse::parse_file_at_path(
&mut parser,
&language,
&path,
name,
name.chars().count(),
&options,
&mut options,
)?;
update_stats(parse_result);
update_stats(&mut stats);
fs::remove_file(path)?;
}
}
@ -977,7 +990,7 @@ impl Parse {
if should_track_stats {
println!("\n{}", stats.cumulative_stats);
}
if self.output_json_summary {
if self.json {
println!("{}", serde_json::to_string_pretty(&stats)?);
}

View file

@ -204,10 +204,11 @@ pub struct ParseSummary {
pub start: Option<ParsePoint>,
pub end: Option<ParsePoint>,
pub duration: Option<Duration>,
pub bytes: Option<u128>,
pub bytes: Option<usize>,
}
impl ParseSummary {
#[must_use]
pub fn new(path: &Path) -> Self {
Self {
file: path.to_path_buf(),
@ -217,21 +218,12 @@ impl ParseSummary {
}
}
#[derive(Serialize, Debug)]
#[derive(Serialize, Debug, Default)]
pub struct ParseStats {
pub parse_summaries: Vec<ParseSummary>,
pub cumulative_stats: Stats,
}
impl ParseStats {
pub fn new() -> Self {
Self {
parse_summaries: Vec::new(),
cumulative_stats: Stats::default(),
}
}
}
pub struct ParseFileOptions<'a> {
pub edits: &'a [&'a str],
pub output: ParseOutput,
@ -260,8 +252,8 @@ pub fn parse_file_at_path(
path: &Path,
name: &str,
max_path_length: usize,
opts: &ParseFileOptions,
) -> Result<ParseResult> {
opts: &mut ParseFileOptions,
) -> Result<()> {
let mut _log_session = None;
parser.set_language(language)?;
let mut source_code = fs::read(path).with_context(|| format!("Error reading {name:?}"))?;
@ -398,10 +390,6 @@ pub fn parse_file_at_path(
parser.stop_printing_dot_graphs();
let current_summary = opts.stats.parse_summaries.last_mut().unwrap();
current_summary.start = Some(tree.root_node().start_position().into());
current_summary.end = Some(tree.root_node().end_position().into());
let parse_duration_ms = parse_duration.as_micros() as f64 / 1e3;
let edit_duration_ms = edit_duration.as_micros() as f64 / 1e3;
let mut cursor = tree.walk();
@ -656,11 +644,16 @@ pub fn parse_file_at_path(
writeln!(&mut stdout)?;
}
return Ok(ParseResult {
successful: first_error.is_none(),
bytes: source_code.len(),
opts.stats.parse_summaries.push(ParseSummary {
file: path.to_path_buf(),
successful: true,
start: Some(tree.root_node().start_position().into()),
end: Some(tree.root_node().end_position().into()),
duration: Some(parse_duration),
bytes: Some(source_code.len()),
});
return Ok(());
}
parser.stop_printing_dot_graphs();
@ -675,11 +668,16 @@ pub fn parse_file_at_path(
)?;
}
Ok(ParseResult {
opts.stats.parse_summaries.push(ParseSummary {
file: path.to_path_buf(),
successful: false,
bytes: source_code.len(),
start: None,
end: None,
duration: None,
})
bytes: Some(source_code.len()),
});
Ok(())
}
const fn escape_invisible(c: char) -> Option<&'static str> {