feat: add the semantic version to TSLanguage, and expose an API for retrieving it

This commit is contained in:
Amaan Qureshi 2025-01-21 01:59:24 -05:00
parent f0222107b8
commit 8bb1448a6f
24 changed files with 371 additions and 77 deletions

View file

@ -7,16 +7,10 @@ use std::{
};
use anyhow::Result;
use build_tables::build_tables;
use grammars::InputGrammar;
pub use node_types::VariableInfoError;
use parse_grammar::parse_grammar;
pub use parse_grammar::ParseGrammarError;
use prepare_grammar::prepare_grammar;
pub use prepare_grammar::PrepareGrammarError;
use regex::{Regex, RegexBuilder};
use render::render_c_code;
use semver::Version;
use serde::{Deserialize, Serialize};
use thiserror::Error;
mod build_tables;
mod dedup;
@ -30,9 +24,15 @@ mod render;
mod rules;
mod tables;
use build_tables::build_tables;
pub use build_tables::ParseTableBuilderError;
use serde::Serialize;
use thiserror::Error;
use grammars::InputGrammar;
pub use node_types::VariableInfoError;
use parse_grammar::parse_grammar;
pub use parse_grammar::ParseGrammarError;
use prepare_grammar::prepare_grammar;
pub use prepare_grammar::PrepareGrammarError;
use render::render_c_code;
static JSON_COMMENT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
RegexBuilder::new("^\\s*//.*")
@ -67,6 +67,8 @@ pub enum GenerateError {
VariableInfo(#[from] VariableInfoError),
#[error(transparent)]
BuildTables(#[from] ParseTableBuilderError),
#[error(transparent)]
ParseVersion(#[from] ParseVersionError),
}
impl From<std::io::Error> for GenerateError {
@ -95,6 +97,16 @@ impl From<std::io::Error> for LoadGrammarError {
}
}
#[derive(Debug, Error, Serialize)]
pub enum ParseVersionError {
#[error("{0}")]
Version(String),
#[error("{0}")]
JSON(String),
#[error("{0}")]
IO(String),
}
pub type JSResult<T> = Result<T, JSError>;
#[derive(Debug, Error, Serialize)]
@ -178,11 +190,18 @@ pub fn generate_parser_in_directory(
// Parse and preprocess the grammar.
let input_grammar = parse_grammar(&grammar_json)?;
let semantic_version = read_grammar_version(&repo_path)?;
// Generate the parser and related files.
let GeneratedParser {
c_code,
node_types_json,
} = generate_parser_for_grammar_with_opts(&input_grammar, abi_version, report_symbol_name)?;
} = generate_parser_for_grammar_with_opts(
&input_grammar,
abi_version,
semantic_version.map(|v| (v.major as u8, v.minor as u8, v.patch as u8)),
report_symbol_name,
)?;
write_file(&src_path.join("parser.c"), c_code)?;
write_file(&src_path.join("node-types.json"), node_types_json)?;
@ -193,17 +212,25 @@ pub fn generate_parser_in_directory(
Ok(())
}
pub fn generate_parser_for_grammar(grammar_json: &str) -> GenerateResult<(String, String)> {
pub fn generate_parser_for_grammar(
grammar_json: &str,
semantic_version: Option<(u8, u8, u8)>,
) -> GenerateResult<(String, String)> {
let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
let input_grammar = parse_grammar(&grammar_json)?;
let parser =
generate_parser_for_grammar_with_opts(&input_grammar, tree_sitter::LANGUAGE_VERSION, None)?;
let parser = generate_parser_for_grammar_with_opts(
&input_grammar,
tree_sitter::LANGUAGE_VERSION,
semantic_version,
None,
)?;
Ok((input_grammar.name, parser.c_code))
}
fn generate_parser_for_grammar_with_opts(
input_grammar: &InputGrammar,
abi_version: usize,
semantic_version: Option<(u8, u8, u8)>,
report_symbol_name: Option<&str>,
) -> GenerateResult<GeneratedParser> {
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
@ -233,6 +260,7 @@ fn generate_parser_for_grammar_with_opts(
lexical_grammar,
simple_aliases,
abi_version,
semantic_version,
supertype_symbol_map,
);
Ok(GeneratedParser {
@ -241,6 +269,55 @@ fn generate_parser_for_grammar_with_opts(
})
}
/// This will read the `tree-sitter.json` config file and attempt to extract the version.
///
/// If the file is not found in the current directory or any of its parent directories, this will
/// return `None` to maintain backwards compatibility. If the file is found but the version cannot
/// be parsed as semver, this will return an error.
fn read_grammar_version(repo_path: &Path) -> Result<Option<Version>, ParseVersionError> {
#[derive(Deserialize)]
struct TreeSitterJson {
metadata: Metadata,
}
#[derive(Deserialize)]
struct Metadata {
version: String,
}
let filename = "tree-sitter.json";
let mut path = repo_path.join(filename);
loop {
let json = path
.exists()
.then(|| {
let contents = fs::read_to_string(path.as_path()).map_err(|e| {
ParseVersionError::IO(format!("Failed to read `{}` -- {e}", path.display()))
})?;
serde_json::from_str::<TreeSitterJson>(&contents).map_err(|e| {
ParseVersionError::JSON(format!("Failed to parse `{}` -- {e}", path.display()))
})
})
.transpose()?;
if let Some(json) = json {
return Version::parse(&json.metadata.version)
.map_err(|e| {
ParseVersionError::Version(format!(
"Failed to parse `{}` version as semver -- {e}",
path.display()
))
})
.map(Some);
}
path.pop(); // filename
if !path.pop() {
return Ok(None);
}
path.push(filename);
}
}
pub fn load_grammar_file(
grammar_path: &Path,
js_runtime: Option<&str>,

View file

@ -5,6 +5,8 @@ use std::{
mem::swap,
};
use indoc::indoc;
use super::{
build_tables::Tables,
grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType},
@ -83,9 +85,8 @@ struct Generator {
field_names: Vec<String>,
supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
supertype_map: BTreeMap<String, Vec<ChildType>>,
#[allow(unused)]
abi_version: usize,
metadata: Option<Metadata>,
}
struct LargeCharacterSetInfo {
@ -93,6 +94,12 @@ struct LargeCharacterSetInfo {
is_used: bool,
}
struct Metadata {
major_version: u8,
minor_version: u8,
patch_version: u8,
}
impl Generator {
fn generate(mut self) -> String {
self.init();
@ -1539,7 +1546,7 @@ impl Generator {
indent!(self);
add_line!(self, "static const TSLanguage language = {{");
indent!(self);
add_line!(self, ".version = LANGUAGE_VERSION,");
add_line!(self, ".abi_version = LANGUAGE_VERSION,");
// Quantities
add_line!(self, ".symbol_count = SYMBOL_COUNT,");
@ -1629,6 +1636,24 @@ impl Generator {
.max()
.unwrap()
);
let Some(metadata) = &self.metadata else {
panic!(
indoc! {"
Metadata is required to generate ABI version {}.
This means that your grammar doesn't have a tree-sitter.json config file with an appropriate version field in the metadata table.
"},
self.abi_version
);
};
add_line!(self, ".metadata = {{");
indent!(self);
add_line!(self, ".major_version = {},", metadata.major_version);
add_line!(self, ".minor_version = {},", metadata.minor_version);
add_line!(self, ".patch_version = {},", metadata.patch_version);
dedent!(self);
add_line!(self, "}},");
}
dedent!(self);
@ -1914,6 +1939,7 @@ pub fn render_c_code(
lexical_grammar: LexicalGrammar,
default_aliases: AliasMap,
abi_version: usize,
semantic_version: Option<(u8, u8, u8)>,
supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
) -> String {
assert!(
@ -1932,6 +1958,11 @@ pub fn render_c_code(
lexical_grammar,
default_aliases,
abi_version,
metadata: semantic_version.map(|(major_version, minor_version, patch_version)| Metadata {
major_version,
minor_version,
patch_version,
}),
supertype_symbol_map,
..Default::default()
}

View file

@ -359,7 +359,8 @@ fn test_feature_corpus_files() {
}
let error_message_path = test_path.join("expected_error.txt");
let grammar_json = tree_sitter_generate::load_grammar_file(&grammar_path, None).unwrap();
let generate_result = tree_sitter_generate::generate_parser_for_grammar(&grammar_json);
let generate_result =
tree_sitter_generate::generate_parser_for_grammar(&grammar_json, Some((0, 0, 0)));
if error_message_path.exists() {
if EXAMPLE_INCLUDE.is_some() || EXAMPLE_EXCLUDE.is_some() {

View file

@ -101,7 +101,7 @@ fn test_supertypes() {
let language = get_language("rust");
let supertypes = language.supertypes();
if language.version() < 15 {
if language.abi_version() < 15 {
return;
}

View file

@ -18,9 +18,17 @@ mod tree_test;
#[cfg(feature = "wasm")]
mod wasm_language_test;
use tree_sitter_generate::GenerateResult;
pub use crate::fuzz::{
allocations,
edits::{get_random_edit, invert_edit},
random::Rand,
ITERATION_COUNT,
};
/// This is a simple wrapper around [`tree_sitter_generate::generate_parser_for_grammar`], because
/// our tests do not need to pass in a version number, only the grammar JSON.
fn generate_parser(grammar_json: &str) -> GenerateResult<(String, String)> {
tree_sitter_generate::generate_parser_for_grammar(grammar_json, Some((0, 0, 0)))
}

View file

@ -1,12 +1,12 @@
use tree_sitter::{Node, Parser, Point, Tree};
use tree_sitter_generate::{generate_parser_for_grammar, load_grammar_file};
use tree_sitter_generate::load_grammar_file;
use super::{
get_random_edit,
helpers::fixtures::{fixtures_dir, get_language, get_test_language},
Rand,
};
use crate::parse::perform_edit;
use crate::{parse::perform_edit, tests::generate_parser};
const JSON_EXAMPLE: &str = r#"
@ -317,7 +317,7 @@ fn test_next_sibling_of_zero_width_node() {
)
.unwrap();
let (parser_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap();
let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap();
let mut parser = Parser::new();
let language = get_test_language(&parser_name, &parser_code, None);
@ -563,8 +563,7 @@ fn test_node_named_child() {
#[test]
fn test_node_named_child_with_aliases_and_extras() {
let (parser_name, parser_code) =
generate_parser_for_grammar(GRAMMAR_WITH_ALIASES_AND_EXTRAS).unwrap();
let (parser_name, parser_code) = generate_parser(GRAMMAR_WITH_ALIASES_AND_EXTRAS).unwrap();
let mut parser = Parser::new();
parser
@ -871,7 +870,7 @@ fn test_node_sexp() {
#[test]
fn test_node_field_names() {
let (parser_name, parser_code) = generate_parser_for_grammar(
let (parser_name, parser_code) = generate_parser(
r#"
{
"name": "test_grammar_with_fields",
@ -981,7 +980,7 @@ fn test_node_field_names() {
#[test]
fn test_node_field_calls_in_language_without_fields() {
let (parser_name, parser_code) = generate_parser_for_grammar(
let (parser_name, parser_code) = generate_parser(
r#"
{
"name": "test_grammar_with_no_fields",
@ -1039,7 +1038,7 @@ fn test_node_is_named_but_aliased_as_anonymous() {
)
.unwrap();
let (parser_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap();
let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap();
let mut parser = Parser::new();
let language = get_test_language(&parser_name, &parser_code, None);

View file

@ -7,8 +7,9 @@ use std::{
};
use tree_sitter::Parser;
use tree_sitter_generate::{generate_parser_for_grammar, load_grammar_file};
use tree_sitter_generate::load_grammar_file;
use super::generate_parser;
use crate::tests::helpers::fixtures::{fixtures_dir, get_test_language};
// The `sanitizing` cfg is required to don't run tests under specific sunitizer
@ -90,7 +91,7 @@ fn hang_test() {
.join("get_col_should_hang_not_crash");
let grammar_json = load_grammar_file(&test_grammar_dir.join("grammar.js"), None).unwrap();
let (parser_name, parser_code) = generate_parser_for_grammar(grammar_json.as_str()).unwrap();
let (parser_name, parser_code) = generate_parser(grammar_json.as_str()).unwrap();
let language = get_test_language(&parser_name, &parser_code, Some(test_grammar_dir.as_path()));

View file

@ -6,7 +6,7 @@ use std::{
use tree_sitter::{
Decode, IncludedRangesError, InputEdit, LogType, ParseOptions, ParseState, Parser, Point, Range,
};
use tree_sitter_generate::{generate_parser_for_grammar, load_grammar_file};
use tree_sitter_generate::load_grammar_file;
use tree_sitter_proc_macro::retry;
use super::helpers::{
@ -17,7 +17,7 @@ use super::helpers::{
use crate::{
fuzz::edits::Edit,
parse::perform_edit,
tests::{helpers::fixtures::fixtures_dir, invert_edit},
tests::{generate_parser, helpers::fixtures::fixtures_dir, invert_edit},
};
#[test]
@ -486,7 +486,7 @@ fn test_parsing_after_editing_tree_that_depends_on_column_values() {
.join("test_grammars")
.join("uses_current_column");
let grammar_json = load_grammar_file(&dir.join("grammar.js"), None).unwrap();
let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap();
let (grammar_name, parser_code) = generate_parser(&grammar_json).unwrap();
let mut parser = Parser::new();
parser
@ -564,7 +564,7 @@ fn test_parsing_after_editing_tree_that_depends_on_column_position() {
.join("depends_on_column");
let grammar_json = load_grammar_file(&dir.join("grammar.js"), None).unwrap();
let (grammar_name, parser_code) = generate_parser_for_grammar(grammar_json.as_str()).unwrap();
let (grammar_name, parser_code) = generate_parser(grammar_json.as_str()).unwrap();
let mut parser = Parser::new();
parser
@ -1475,7 +1475,7 @@ fn test_parsing_with_a_newly_included_range() {
#[test]
fn test_parsing_with_included_ranges_and_missing_tokens() {
let (parser_name, parser_code) = generate_parser_for_grammar(
let (parser_name, parser_code) = generate_parser(
r#"{
"name": "test_leading_missing_token",
"rules": {
@ -1536,7 +1536,7 @@ fn test_parsing_with_included_ranges_and_missing_tokens() {
#[test]
fn test_grammars_that_can_hang_on_eof() {
let (parser_name, parser_code) = generate_parser_for_grammar(
let (parser_name, parser_code) = generate_parser(
r#"
{
"name": "test_single_null_char_regex",
@ -1562,7 +1562,7 @@ fn test_grammars_that_can_hang_on_eof() {
.unwrap();
parser.parse("\"", None).unwrap();
let (parser_name, parser_code) = generate_parser_for_grammar(
let (parser_name, parser_code) = generate_parser(
r#"
{
"name": "test_null_char_with_next_char_regex",
@ -1587,7 +1587,7 @@ fn test_grammars_that_can_hang_on_eof() {
.unwrap();
parser.parse("\"", None).unwrap();
let (parser_name, parser_code) = generate_parser_for_grammar(
let (parser_name, parser_code) = generate_parser(
r#"
{
"name": "test_null_char_with_range_regex",
@ -1650,7 +1650,7 @@ if foo && bar || baz {}
fn test_parsing_with_scanner_logging() {
let dir = fixtures_dir().join("test_grammars").join("external_tokens");
let grammar_json = load_grammar_file(&dir.join("grammar.js"), None).unwrap();
let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap();
let (grammar_name, parser_code) = generate_parser(&grammar_json).unwrap();
let mut parser = Parser::new();
parser
@ -1674,7 +1674,7 @@ fn test_parsing_with_scanner_logging() {
fn test_parsing_get_column_at_eof() {
let dir = fixtures_dir().join("test_grammars").join("get_col_eof");
let grammar_json = load_grammar_file(&dir.join("grammar.js"), None).unwrap();
let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap();
let (grammar_name, parser_code) = generate_parser(&grammar_json).unwrap();
let mut parser = Parser::new();
parser
@ -1884,7 +1884,7 @@ fn test_decode_utf24le() {
#[test]
fn test_grammars_that_should_not_compile() {
assert!(generate_parser_for_grammar(
assert!(generate_parser(
r#"
{
"name": "issue_1111",
@ -1896,7 +1896,7 @@ fn test_grammars_that_should_not_compile() {
)
.is_err());
assert!(generate_parser_for_grammar(
assert!(generate_parser(
r#"
{
"name": "issue_1271",
@ -1911,11 +1911,11 @@ fn test_grammars_that_should_not_compile() {
}
},
}
"#,
"#
)
.is_err());
assert!(generate_parser_for_grammar(
assert!(generate_parser(
r#"
{
"name": "issue_1156_expl_1",
@ -1929,11 +1929,11 @@ fn test_grammars_that_should_not_compile() {
}
},
}
"#
"#
)
.is_err());
assert!(generate_parser_for_grammar(
assert!(generate_parser(
r#"
{
"name": "issue_1156_expl_2",
@ -1950,11 +1950,11 @@ fn test_grammars_that_should_not_compile() {
}
},
}
"#
"#
)
.is_err());
assert!(generate_parser_for_grammar(
assert!(generate_parser(
r#"
{
"name": "issue_1156_expl_3",
@ -1968,11 +1968,11 @@ fn test_grammars_that_should_not_compile() {
}
},
}
"#
"#
)
.is_err());
assert!(generate_parser_for_grammar(
assert!(generate_parser(
r#"
{
"name": "issue_1156_expl_4",
@ -1989,7 +1989,7 @@ fn test_grammars_that_should_not_compile() {
}
},
}
"#
"#
)
.is_err());
}

View file

@ -8,7 +8,6 @@ use tree_sitter::{
QueryCursorOptions, QueryError, QueryErrorKind, QueryPredicate, QueryPredicateArg,
QueryProperty, Range,
};
use tree_sitter_generate::generate_parser_for_grammar;
use unindent::Unindent;
use super::helpers::{
@ -17,6 +16,7 @@ use super::helpers::{
query_helpers::{assert_query_matches, Match, Pattern},
};
use crate::tests::{
generate_parser,
helpers::query_helpers::{collect_captures, collect_matches},
ITERATION_COUNT,
};
@ -532,7 +532,7 @@ fn test_query_errors_on_impossible_patterns() {
}
);
if js_lang.version() >= 15 {
if js_lang.abi_version() >= 15 {
assert_eq!(
Query::new(&js_lang, "(statement/identifier)").unwrap_err(),
QueryError {
@ -5216,7 +5216,7 @@ fn test_grammar_with_aliased_literal_query() {
// expansion: $ => seq('}'),
// },
// });
let (parser_name, parser_code) = generate_parser_for_grammar(
let (parser_name, parser_code) = generate_parser(
r#"
{
"name": "test",