feat: add the semantic version to TSLanguage, and expose an API for retrieving it

This commit is contained in:
Amaan Qureshi 2025-01-21 01:59:24 -05:00
parent f0222107b8
commit 8bb1448a6f
24 changed files with 371 additions and 77 deletions

View file

@ -7,16 +7,10 @@ use std::{
};
use anyhow::Result;
use build_tables::build_tables;
use grammars::InputGrammar;
pub use node_types::VariableInfoError;
use parse_grammar::parse_grammar;
pub use parse_grammar::ParseGrammarError;
use prepare_grammar::prepare_grammar;
pub use prepare_grammar::PrepareGrammarError;
use regex::{Regex, RegexBuilder};
use render::render_c_code;
use semver::Version;
use serde::{Deserialize, Serialize};
use thiserror::Error;
mod build_tables;
mod dedup;
@ -30,9 +24,15 @@ mod render;
mod rules;
mod tables;
use build_tables::build_tables;
pub use build_tables::ParseTableBuilderError;
use serde::Serialize;
use thiserror::Error;
use grammars::InputGrammar;
pub use node_types::VariableInfoError;
use parse_grammar::parse_grammar;
pub use parse_grammar::ParseGrammarError;
use prepare_grammar::prepare_grammar;
pub use prepare_grammar::PrepareGrammarError;
use render::render_c_code;
static JSON_COMMENT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
RegexBuilder::new("^\\s*//.*")
@ -67,6 +67,8 @@ pub enum GenerateError {
VariableInfo(#[from] VariableInfoError),
#[error(transparent)]
BuildTables(#[from] ParseTableBuilderError),
#[error(transparent)]
ParseVersion(#[from] ParseVersionError),
}
impl From<std::io::Error> for GenerateError {
@ -95,6 +97,16 @@ impl From<std::io::Error> for LoadGrammarError {
}
}
#[derive(Debug, Error, Serialize)]
pub enum ParseVersionError {
#[error("{0}")]
Version(String),
#[error("{0}")]
JSON(String),
#[error("{0}")]
IO(String),
}
pub type JSResult<T> = Result<T, JSError>;
#[derive(Debug, Error, Serialize)]
@ -178,11 +190,18 @@ pub fn generate_parser_in_directory(
// Parse and preprocess the grammar.
let input_grammar = parse_grammar(&grammar_json)?;
let semantic_version = read_grammar_version(&repo_path)?;
// Generate the parser and related files.
let GeneratedParser {
c_code,
node_types_json,
} = generate_parser_for_grammar_with_opts(&input_grammar, abi_version, report_symbol_name)?;
} = generate_parser_for_grammar_with_opts(
&input_grammar,
abi_version,
semantic_version.map(|v| (v.major as u8, v.minor as u8, v.patch as u8)),
report_symbol_name,
)?;
write_file(&src_path.join("parser.c"), c_code)?;
write_file(&src_path.join("node-types.json"), node_types_json)?;
@ -193,17 +212,25 @@ pub fn generate_parser_in_directory(
Ok(())
}
pub fn generate_parser_for_grammar(grammar_json: &str) -> GenerateResult<(String, String)> {
pub fn generate_parser_for_grammar(
grammar_json: &str,
semantic_version: Option<(u8, u8, u8)>,
) -> GenerateResult<(String, String)> {
let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
let input_grammar = parse_grammar(&grammar_json)?;
let parser =
generate_parser_for_grammar_with_opts(&input_grammar, tree_sitter::LANGUAGE_VERSION, None)?;
let parser = generate_parser_for_grammar_with_opts(
&input_grammar,
tree_sitter::LANGUAGE_VERSION,
semantic_version,
None,
)?;
Ok((input_grammar.name, parser.c_code))
}
fn generate_parser_for_grammar_with_opts(
input_grammar: &InputGrammar,
abi_version: usize,
semantic_version: Option<(u8, u8, u8)>,
report_symbol_name: Option<&str>,
) -> GenerateResult<GeneratedParser> {
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
@ -233,6 +260,7 @@ fn generate_parser_for_grammar_with_opts(
lexical_grammar,
simple_aliases,
abi_version,
semantic_version,
supertype_symbol_map,
);
Ok(GeneratedParser {
@ -241,6 +269,55 @@ fn generate_parser_for_grammar_with_opts(
})
}
/// This will read the `tree-sitter.json` config file and attempt to extract the version.
///
/// If the file is not found in the current directory or any of its parent directories, this will
/// return `None` to maintain backwards compatibility. If the file is found but the version cannot
/// be parsed as semver, this will return an error.
fn read_grammar_version(repo_path: &Path) -> Result<Option<Version>, ParseVersionError> {
#[derive(Deserialize)]
struct TreeSitterJson {
metadata: Metadata,
}
#[derive(Deserialize)]
struct Metadata {
version: String,
}
let filename = "tree-sitter.json";
let mut path = repo_path.join(filename);
loop {
let json = path
.exists()
.then(|| {
let contents = fs::read_to_string(path.as_path()).map_err(|e| {
ParseVersionError::IO(format!("Failed to read `{}` -- {e}", path.display()))
})?;
serde_json::from_str::<TreeSitterJson>(&contents).map_err(|e| {
ParseVersionError::JSON(format!("Failed to parse `{}` -- {e}", path.display()))
})
})
.transpose()?;
if let Some(json) = json {
return Version::parse(&json.metadata.version)
.map_err(|e| {
ParseVersionError::Version(format!(
"Failed to parse `{}` version as semver -- {e}",
path.display()
))
})
.map(Some);
}
path.pop(); // filename
if !path.pop() {
return Ok(None);
}
path.push(filename);
}
}
pub fn load_grammar_file(
grammar_path: &Path,
js_runtime: Option<&str>,

View file

@ -5,6 +5,8 @@ use std::{
mem::swap,
};
use indoc::indoc;
use super::{
build_tables::Tables,
grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType},
@ -83,9 +85,8 @@ struct Generator {
field_names: Vec<String>,
supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
supertype_map: BTreeMap<String, Vec<ChildType>>,
#[allow(unused)]
abi_version: usize,
metadata: Option<Metadata>,
}
struct LargeCharacterSetInfo {
@ -93,6 +94,12 @@ struct LargeCharacterSetInfo {
is_used: bool,
}
struct Metadata {
major_version: u8,
minor_version: u8,
patch_version: u8,
}
impl Generator {
fn generate(mut self) -> String {
self.init();
@ -1539,7 +1546,7 @@ impl Generator {
indent!(self);
add_line!(self, "static const TSLanguage language = {{");
indent!(self);
add_line!(self, ".version = LANGUAGE_VERSION,");
add_line!(self, ".abi_version = LANGUAGE_VERSION,");
// Quantities
add_line!(self, ".symbol_count = SYMBOL_COUNT,");
@ -1629,6 +1636,24 @@ impl Generator {
.max()
.unwrap()
);
let Some(metadata) = &self.metadata else {
panic!(
indoc! {"
Metadata is required to generate ABI version {}.
This means that your grammar doesn't have a tree-sitter.json config file with an appropriate version field in the metadata table.
"},
self.abi_version
);
};
add_line!(self, ".metadata = {{");
indent!(self);
add_line!(self, ".major_version = {},", metadata.major_version);
add_line!(self, ".minor_version = {},", metadata.minor_version);
add_line!(self, ".patch_version = {},", metadata.patch_version);
dedent!(self);
add_line!(self, "}},");
}
dedent!(self);
@ -1914,6 +1939,7 @@ pub fn render_c_code(
lexical_grammar: LexicalGrammar,
default_aliases: AliasMap,
abi_version: usize,
semantic_version: Option<(u8, u8, u8)>,
supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
) -> String {
assert!(
@ -1932,6 +1958,11 @@ pub fn render_c_code(
lexical_grammar,
default_aliases,
abi_version,
metadata: semantic_version.map(|(major_version, minor_version, patch_version)| Metadata {
major_version,
minor_version,
patch_version,
}),
supertype_symbol_map,
..Default::default()
}