feat: add the semantic version to TSLanguage, and expose an API for retrieving it

This commit is contained in:
Amaan Qureshi 2025-01-21 01:59:24 -05:00
parent f0222107b8
commit 8bb1448a6f
24 changed files with 371 additions and 77 deletions

View file

@ -7,16 +7,10 @@ use std::{
};
use anyhow::Result;
use build_tables::build_tables;
use grammars::InputGrammar;
pub use node_types::VariableInfoError;
use parse_grammar::parse_grammar;
pub use parse_grammar::ParseGrammarError;
use prepare_grammar::prepare_grammar;
pub use prepare_grammar::PrepareGrammarError;
use regex::{Regex, RegexBuilder};
use render::render_c_code;
use semver::Version;
use serde::{Deserialize, Serialize};
use thiserror::Error;
mod build_tables;
mod dedup;
@ -30,9 +24,15 @@ mod render;
mod rules;
mod tables;
use build_tables::build_tables;
pub use build_tables::ParseTableBuilderError;
use serde::Serialize;
use thiserror::Error;
use grammars::InputGrammar;
pub use node_types::VariableInfoError;
use parse_grammar::parse_grammar;
pub use parse_grammar::ParseGrammarError;
use prepare_grammar::prepare_grammar;
pub use prepare_grammar::PrepareGrammarError;
use render::render_c_code;
static JSON_COMMENT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
RegexBuilder::new("^\\s*//.*")
@ -67,6 +67,8 @@ pub enum GenerateError {
VariableInfo(#[from] VariableInfoError),
#[error(transparent)]
BuildTables(#[from] ParseTableBuilderError),
#[error(transparent)]
ParseVersion(#[from] ParseVersionError),
}
impl From<std::io::Error> for GenerateError {
@ -95,6 +97,16 @@ impl From<std::io::Error> for LoadGrammarError {
}
}
#[derive(Debug, Error, Serialize)]
pub enum ParseVersionError {
#[error("{0}")]
Version(String),
#[error("{0}")]
JSON(String),
#[error("{0}")]
IO(String),
}
pub type JSResult<T> = Result<T, JSError>;
#[derive(Debug, Error, Serialize)]
@ -178,11 +190,18 @@ pub fn generate_parser_in_directory(
// Parse and preprocess the grammar.
let input_grammar = parse_grammar(&grammar_json)?;
let semantic_version = read_grammar_version(&repo_path)?;
// Generate the parser and related files.
let GeneratedParser {
c_code,
node_types_json,
} = generate_parser_for_grammar_with_opts(&input_grammar, abi_version, report_symbol_name)?;
} = generate_parser_for_grammar_with_opts(
&input_grammar,
abi_version,
semantic_version.map(|v| (v.major as u8, v.minor as u8, v.patch as u8)),
report_symbol_name,
)?;
write_file(&src_path.join("parser.c"), c_code)?;
write_file(&src_path.join("node-types.json"), node_types_json)?;
@ -193,17 +212,25 @@ pub fn generate_parser_in_directory(
Ok(())
}
pub fn generate_parser_for_grammar(grammar_json: &str) -> GenerateResult<(String, String)> {
pub fn generate_parser_for_grammar(
grammar_json: &str,
semantic_version: Option<(u8, u8, u8)>,
) -> GenerateResult<(String, String)> {
let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
let input_grammar = parse_grammar(&grammar_json)?;
let parser =
generate_parser_for_grammar_with_opts(&input_grammar, tree_sitter::LANGUAGE_VERSION, None)?;
let parser = generate_parser_for_grammar_with_opts(
&input_grammar,
tree_sitter::LANGUAGE_VERSION,
semantic_version,
None,
)?;
Ok((input_grammar.name, parser.c_code))
}
fn generate_parser_for_grammar_with_opts(
input_grammar: &InputGrammar,
abi_version: usize,
semantic_version: Option<(u8, u8, u8)>,
report_symbol_name: Option<&str>,
) -> GenerateResult<GeneratedParser> {
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
@ -233,6 +260,7 @@ fn generate_parser_for_grammar_with_opts(
lexical_grammar,
simple_aliases,
abi_version,
semantic_version,
supertype_symbol_map,
);
Ok(GeneratedParser {
@ -241,6 +269,55 @@ fn generate_parser_for_grammar_with_opts(
})
}
/// This will read the `tree-sitter.json` config file and attempt to extract the version.
///
/// If the file is not found in the current directory or any of its parent directories, this will
/// return `None` to maintain backwards compatibility. If the file is found but the version cannot
/// be parsed as semver, this will return an error.
fn read_grammar_version(repo_path: &Path) -> Result<Option<Version>, ParseVersionError> {
#[derive(Deserialize)]
struct TreeSitterJson {
metadata: Metadata,
}
#[derive(Deserialize)]
struct Metadata {
version: String,
}
let filename = "tree-sitter.json";
let mut path = repo_path.join(filename);
loop {
let json = path
.exists()
.then(|| {
let contents = fs::read_to_string(path.as_path()).map_err(|e| {
ParseVersionError::IO(format!("Failed to read `{}` -- {e}", path.display()))
})?;
serde_json::from_str::<TreeSitterJson>(&contents).map_err(|e| {
ParseVersionError::JSON(format!("Failed to parse `{}` -- {e}", path.display()))
})
})
.transpose()?;
if let Some(json) = json {
return Version::parse(&json.metadata.version)
.map_err(|e| {
ParseVersionError::Version(format!(
"Failed to parse `{}` version as semver -- {e}",
path.display()
))
})
.map(Some);
}
path.pop(); // filename
if !path.pop() {
return Ok(None);
}
path.push(filename);
}
}
pub fn load_grammar_file(
grammar_path: &Path,
js_runtime: Option<&str>,

View file

@ -5,6 +5,8 @@ use std::{
mem::swap,
};
use indoc::indoc;
use super::{
build_tables::Tables,
grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType},
@ -83,9 +85,8 @@ struct Generator {
field_names: Vec<String>,
supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
supertype_map: BTreeMap<String, Vec<ChildType>>,
#[allow(unused)]
abi_version: usize,
metadata: Option<Metadata>,
}
struct LargeCharacterSetInfo {
@ -93,6 +94,12 @@ struct LargeCharacterSetInfo {
is_used: bool,
}
struct Metadata {
major_version: u8,
minor_version: u8,
patch_version: u8,
}
impl Generator {
fn generate(mut self) -> String {
self.init();
@ -1539,7 +1546,7 @@ impl Generator {
indent!(self);
add_line!(self, "static const TSLanguage language = {{");
indent!(self);
add_line!(self, ".version = LANGUAGE_VERSION,");
add_line!(self, ".abi_version = LANGUAGE_VERSION,");
// Quantities
add_line!(self, ".symbol_count = SYMBOL_COUNT,");
@ -1629,6 +1636,24 @@ impl Generator {
.max()
.unwrap()
);
let Some(metadata) = &self.metadata else {
panic!(
indoc! {"
Metadata is required to generate ABI version {}.
This means that your grammar doesn't have a tree-sitter.json config file with an appropriate version field in the metadata table.
"},
self.abi_version
);
};
add_line!(self, ".metadata = {{");
indent!(self);
add_line!(self, ".major_version = {},", metadata.major_version);
add_line!(self, ".minor_version = {},", metadata.minor_version);
add_line!(self, ".patch_version = {},", metadata.patch_version);
dedent!(self);
add_line!(self, "}},");
}
dedent!(self);
@ -1914,6 +1939,7 @@ pub fn render_c_code(
lexical_grammar: LexicalGrammar,
default_aliases: AliasMap,
abi_version: usize,
semantic_version: Option<(u8, u8, u8)>,
supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
) -> String {
assert!(
@ -1932,6 +1958,11 @@ pub fn render_c_code(
lexical_grammar,
default_aliases,
abi_version,
metadata: semantic_version.map(|(major_version, minor_version, patch_version)| Metadata {
major_version,
minor_version,
patch_version,
}),
supertype_symbol_map,
..Default::default()
}

View file

@ -359,7 +359,8 @@ fn test_feature_corpus_files() {
}
let error_message_path = test_path.join("expected_error.txt");
let grammar_json = tree_sitter_generate::load_grammar_file(&grammar_path, None).unwrap();
let generate_result = tree_sitter_generate::generate_parser_for_grammar(&grammar_json);
let generate_result =
tree_sitter_generate::generate_parser_for_grammar(&grammar_json, Some((0, 0, 0)));
if error_message_path.exists() {
if EXAMPLE_INCLUDE.is_some() || EXAMPLE_EXCLUDE.is_some() {

View file

@ -101,7 +101,7 @@ fn test_supertypes() {
let language = get_language("rust");
let supertypes = language.supertypes();
if language.version() < 15 {
if language.abi_version() < 15 {
return;
}

View file

@ -18,9 +18,17 @@ mod tree_test;
#[cfg(feature = "wasm")]
mod wasm_language_test;
use tree_sitter_generate::GenerateResult;
pub use crate::fuzz::{
allocations,
edits::{get_random_edit, invert_edit},
random::Rand,
ITERATION_COUNT,
};
/// This is a simple wrapper around [`tree_sitter_generate::generate_parser_for_grammar`], because
/// our tests do not need to pass in a version number, only the grammar JSON.
fn generate_parser(grammar_json: &str) -> GenerateResult<(String, String)> {
tree_sitter_generate::generate_parser_for_grammar(grammar_json, Some((0, 0, 0)))
}

View file

@ -1,12 +1,12 @@
use tree_sitter::{Node, Parser, Point, Tree};
use tree_sitter_generate::{generate_parser_for_grammar, load_grammar_file};
use tree_sitter_generate::load_grammar_file;
use super::{
get_random_edit,
helpers::fixtures::{fixtures_dir, get_language, get_test_language},
Rand,
};
use crate::parse::perform_edit;
use crate::{parse::perform_edit, tests::generate_parser};
const JSON_EXAMPLE: &str = r#"
@ -317,7 +317,7 @@ fn test_next_sibling_of_zero_width_node() {
)
.unwrap();
let (parser_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap();
let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap();
let mut parser = Parser::new();
let language = get_test_language(&parser_name, &parser_code, None);
@ -563,8 +563,7 @@ fn test_node_named_child() {
#[test]
fn test_node_named_child_with_aliases_and_extras() {
let (parser_name, parser_code) =
generate_parser_for_grammar(GRAMMAR_WITH_ALIASES_AND_EXTRAS).unwrap();
let (parser_name, parser_code) = generate_parser(GRAMMAR_WITH_ALIASES_AND_EXTRAS).unwrap();
let mut parser = Parser::new();
parser
@ -871,7 +870,7 @@ fn test_node_sexp() {
#[test]
fn test_node_field_names() {
let (parser_name, parser_code) = generate_parser_for_grammar(
let (parser_name, parser_code) = generate_parser(
r#"
{
"name": "test_grammar_with_fields",
@ -981,7 +980,7 @@ fn test_node_field_names() {
#[test]
fn test_node_field_calls_in_language_without_fields() {
let (parser_name, parser_code) = generate_parser_for_grammar(
let (parser_name, parser_code) = generate_parser(
r#"
{
"name": "test_grammar_with_no_fields",
@ -1039,7 +1038,7 @@ fn test_node_is_named_but_aliased_as_anonymous() {
)
.unwrap();
let (parser_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap();
let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap();
let mut parser = Parser::new();
let language = get_test_language(&parser_name, &parser_code, None);

View file

@ -7,8 +7,9 @@ use std::{
};
use tree_sitter::Parser;
use tree_sitter_generate::{generate_parser_for_grammar, load_grammar_file};
use tree_sitter_generate::load_grammar_file;
use super::generate_parser;
use crate::tests::helpers::fixtures::{fixtures_dir, get_test_language};
// The `sanitizing` cfg is required to don't run tests under specific sunitizer
@ -90,7 +91,7 @@ fn hang_test() {
.join("get_col_should_hang_not_crash");
let grammar_json = load_grammar_file(&test_grammar_dir.join("grammar.js"), None).unwrap();
let (parser_name, parser_code) = generate_parser_for_grammar(grammar_json.as_str()).unwrap();
let (parser_name, parser_code) = generate_parser(grammar_json.as_str()).unwrap();
let language = get_test_language(&parser_name, &parser_code, Some(test_grammar_dir.as_path()));

View file

@ -6,7 +6,7 @@ use std::{
use tree_sitter::{
Decode, IncludedRangesError, InputEdit, LogType, ParseOptions, ParseState, Parser, Point, Range,
};
use tree_sitter_generate::{generate_parser_for_grammar, load_grammar_file};
use tree_sitter_generate::load_grammar_file;
use tree_sitter_proc_macro::retry;
use super::helpers::{
@ -17,7 +17,7 @@ use super::helpers::{
use crate::{
fuzz::edits::Edit,
parse::perform_edit,
tests::{helpers::fixtures::fixtures_dir, invert_edit},
tests::{generate_parser, helpers::fixtures::fixtures_dir, invert_edit},
};
#[test]
@ -486,7 +486,7 @@ fn test_parsing_after_editing_tree_that_depends_on_column_values() {
.join("test_grammars")
.join("uses_current_column");
let grammar_json = load_grammar_file(&dir.join("grammar.js"), None).unwrap();
let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap();
let (grammar_name, parser_code) = generate_parser(&grammar_json).unwrap();
let mut parser = Parser::new();
parser
@ -564,7 +564,7 @@ fn test_parsing_after_editing_tree_that_depends_on_column_position() {
.join("depends_on_column");
let grammar_json = load_grammar_file(&dir.join("grammar.js"), None).unwrap();
let (grammar_name, parser_code) = generate_parser_for_grammar(grammar_json.as_str()).unwrap();
let (grammar_name, parser_code) = generate_parser(grammar_json.as_str()).unwrap();
let mut parser = Parser::new();
parser
@ -1475,7 +1475,7 @@ fn test_parsing_with_a_newly_included_range() {
#[test]
fn test_parsing_with_included_ranges_and_missing_tokens() {
let (parser_name, parser_code) = generate_parser_for_grammar(
let (parser_name, parser_code) = generate_parser(
r#"{
"name": "test_leading_missing_token",
"rules": {
@ -1536,7 +1536,7 @@ fn test_parsing_with_included_ranges_and_missing_tokens() {
#[test]
fn test_grammars_that_can_hang_on_eof() {
let (parser_name, parser_code) = generate_parser_for_grammar(
let (parser_name, parser_code) = generate_parser(
r#"
{
"name": "test_single_null_char_regex",
@ -1562,7 +1562,7 @@ fn test_grammars_that_can_hang_on_eof() {
.unwrap();
parser.parse("\"", None).unwrap();
let (parser_name, parser_code) = generate_parser_for_grammar(
let (parser_name, parser_code) = generate_parser(
r#"
{
"name": "test_null_char_with_next_char_regex",
@ -1587,7 +1587,7 @@ fn test_grammars_that_can_hang_on_eof() {
.unwrap();
parser.parse("\"", None).unwrap();
let (parser_name, parser_code) = generate_parser_for_grammar(
let (parser_name, parser_code) = generate_parser(
r#"
{
"name": "test_null_char_with_range_regex",
@ -1650,7 +1650,7 @@ if foo && bar || baz {}
fn test_parsing_with_scanner_logging() {
let dir = fixtures_dir().join("test_grammars").join("external_tokens");
let grammar_json = load_grammar_file(&dir.join("grammar.js"), None).unwrap();
let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap();
let (grammar_name, parser_code) = generate_parser(&grammar_json).unwrap();
let mut parser = Parser::new();
parser
@ -1674,7 +1674,7 @@ fn test_parsing_with_scanner_logging() {
fn test_parsing_get_column_at_eof() {
let dir = fixtures_dir().join("test_grammars").join("get_col_eof");
let grammar_json = load_grammar_file(&dir.join("grammar.js"), None).unwrap();
let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap();
let (grammar_name, parser_code) = generate_parser(&grammar_json).unwrap();
let mut parser = Parser::new();
parser
@ -1884,7 +1884,7 @@ fn test_decode_utf24le() {
#[test]
fn test_grammars_that_should_not_compile() {
assert!(generate_parser_for_grammar(
assert!(generate_parser(
r#"
{
"name": "issue_1111",
@ -1896,7 +1896,7 @@ fn test_grammars_that_should_not_compile() {
)
.is_err());
assert!(generate_parser_for_grammar(
assert!(generate_parser(
r#"
{
"name": "issue_1271",
@ -1911,11 +1911,11 @@ fn test_grammars_that_should_not_compile() {
}
},
}
"#,
"#
)
.is_err());
assert!(generate_parser_for_grammar(
assert!(generate_parser(
r#"
{
"name": "issue_1156_expl_1",
@ -1929,11 +1929,11 @@ fn test_grammars_that_should_not_compile() {
}
},
}
"#
"#
)
.is_err());
assert!(generate_parser_for_grammar(
assert!(generate_parser(
r#"
{
"name": "issue_1156_expl_2",
@ -1950,11 +1950,11 @@ fn test_grammars_that_should_not_compile() {
}
},
}
"#
"#
)
.is_err());
assert!(generate_parser_for_grammar(
assert!(generate_parser(
r#"
{
"name": "issue_1156_expl_3",
@ -1968,11 +1968,11 @@ fn test_grammars_that_should_not_compile() {
}
},
}
"#
"#
)
.is_err());
assert!(generate_parser_for_grammar(
assert!(generate_parser(
r#"
{
"name": "issue_1156_expl_4",
@ -1989,7 +1989,7 @@ fn test_grammars_that_should_not_compile() {
}
},
}
"#
"#
)
.is_err());
}

View file

@ -8,7 +8,6 @@ use tree_sitter::{
QueryCursorOptions, QueryError, QueryErrorKind, QueryPredicate, QueryPredicateArg,
QueryProperty, Range,
};
use tree_sitter_generate::generate_parser_for_grammar;
use unindent::Unindent;
use super::helpers::{
@ -17,6 +16,7 @@ use super::helpers::{
query_helpers::{assert_query_matches, Match, Pattern},
};
use crate::tests::{
generate_parser,
helpers::query_helpers::{collect_captures, collect_matches},
ITERATION_COUNT,
};
@ -532,7 +532,7 @@ fn test_query_errors_on_impossible_patterns() {
}
);
if js_lang.version() >= 15 {
if js_lang.abi_version() >= 15 {
assert_eq!(
Query::new(&js_lang, "(statement/identifier)").unwrap_err(),
QueryError {
@ -5216,7 +5216,7 @@ fn test_grammar_with_aliased_literal_query() {
// expansion: $ => seq('}'),
// },
// });
let (parser_name, parser_code) = generate_parser_for_grammar(
let (parser_name, parser_code) = generate_parser(
r#"
{
"name": "test",

View file

@ -180,6 +180,14 @@ pub struct TSQueryCursorOptions {
pub progress_callback:
::core::option::Option<unsafe extern "C" fn(state: *mut TSQueryCursorState) -> bool>,
}
#[doc = " The metadata associated with a language.\n\n Currently, this metadata can be used to check the [Semantic Version](https://semver.org/)\n of the language. This version information should be used to signal if a given parser might\n be incompatible with existing queries when upgrading between major versions, or minor versions\n if it's in zerover."]
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct TSLanguageMetadata {
pub major_version: u8,
pub minor_version: u8,
pub patch_version: u8,
}
extern "C" {
#[doc = " Create a new parser."]
pub fn ts_parser_new() -> *mut TSParser;
@ -193,7 +201,7 @@ extern "C" {
pub fn ts_parser_language(self_: *const TSParser) -> *const TSLanguage;
}
extern "C" {
#[doc = " Set the language that the parser should use for parsing.\n\n Returns a boolean indicating whether or not the language was successfully\n assigned. True means assignment succeeded. False means there was a version\n mismatch: the language was generated with an incompatible version of the\n Tree-sitter CLI. Check the language's version using [`ts_language_version`]\n and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and\n [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants."]
#[doc = " Set the language that the parser should use for parsing.\n\n Returns a boolean indicating whether or not the language was successfully\n assigned. True means assignment succeeded. False means there was a version\n mismatch: the language was generated with an incompatible version of the\n Tree-sitter CLI. Check the language's ABI version using [`ts_language_abi_version`]\n and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and\n [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants."]
pub fn ts_parser_set_language(self_: *mut TSParser, language: *const TSLanguage) -> bool;
}
extern "C" {
@ -807,9 +815,17 @@ extern "C" {
pub fn ts_language_symbol_type(self_: *const TSLanguage, symbol: TSSymbol) -> TSSymbolType;
}
extern "C" {
#[doc = " Get the ABI version number for this language. This version number is used\n to ensure that languages were generated by a compatible version of\n Tree-sitter.\n\n See also [`ts_parser_set_language`]."]
#[doc = " @deprecated use [`ts_language_abi_version`] instead, this will be removed in 0.26.\n\n Get the ABI version number for this language. This version number is used\n to ensure that languages were generated by a compatible version of\n Tree-sitter.\n\n See also [`ts_parser_set_language`]."]
pub fn ts_language_version(self_: *const TSLanguage) -> u32;
}
extern "C" {
#[doc = " Get the ABI version number for this language. This version number is used\n to ensure that languages were generated by a compatible version of\n Tree-sitter.\n\n See also [`ts_parser_set_language`]."]
pub fn ts_language_abi_version(self_: *const TSLanguage) -> u32;
}
extern "C" {
#[doc = " Get the metadata for this language. This information is generated by the\n CLI, and relies on the language author providing the correct metadata in\n the language's `tree-sitter.json` file.\n\n See also [`TSMetadata`]."]
pub fn ts_language_metadata(self_: *const TSLanguage) -> *const TSLanguageMetadata;
}
extern "C" {
#[doc = " Get the next parse state. Combine this with lookahead iterators to generate\n completion suggestions or valid symbols in error nodes. Use\n [`ts_node_grammar_symbol`] for valid symbols."]
pub fn ts_language_next_state(

View file

@ -64,6 +64,29 @@ pub struct Language(*const ffi::TSLanguage);
pub struct LanguageRef<'a>(*const ffi::TSLanguage, PhantomData<&'a ()>);
/// The metadata associated with a language.
///
/// Currently, this metadata can be used to check the [Semantic Version](https://semver.org/)
/// of the language. This version information should be used to signal if a given parser might
/// be incompatible with existing queries when upgrading between major versions, or minor versions
/// if it's in zerover.
#[doc(alias = "TSLanguageMetadata")]
pub struct LanguageMetadata {
pub major_version: u8,
pub minor_version: u8,
pub patch_version: u8,
}
impl From<ffi::TSLanguageMetadata> for LanguageMetadata {
fn from(val: ffi::TSLanguageMetadata) -> Self {
Self {
major_version: val.major_version,
minor_version: val.minor_version,
patch_version: val.patch_version,
}
}
}
/// A tree that represents the syntactic structure of a source code file.
#[doc(alias = "TSTree")]
pub struct Tree(NonNull<ffi::TSTree>);
@ -394,7 +417,7 @@ impl Language {
}
/// Get the name of this language. This returns `None` in older parsers.
#[doc(alias = "ts_language_version")]
#[doc(alias = "ts_language_name")]
#[must_use]
pub fn name(&self) -> Option<&'static str> {
let ptr = unsafe { ffi::ts_language_name(self.0) };
@ -404,11 +427,34 @@ impl Language {
/// Get the ABI version number that indicates which version of the
/// Tree-sitter CLI that was used to generate this [`Language`].
#[doc(alias = "ts_language_version")]
#[deprecated(since = "0.25.0", note = "Use abi_version instead")]
#[must_use]
pub fn version(&self) -> usize {
unsafe { ffi::ts_language_version(self.0) as usize }
}
/// Get the ABI version number that indicates which version of the
/// Tree-sitter CLI that was used to generate this [`Language`].
#[doc(alias = "ts_language_abi_version")]
#[must_use]
pub fn abi_version(&self) -> usize {
unsafe { ffi::ts_language_abi_version(self.0) as usize }
}
/// Get the metadata for this language. This information is generated by the
/// CLI, and relies on the language author providing the correct metadata in
/// the language's `tree-sitter.json` file.
///
/// See also [`LanguageMetadata`].
#[doc(alias = "ts_language_metadata")]
#[must_use]
pub fn metadata(&self) -> Option<LanguageMetadata> {
unsafe {
let ptr = ffi::ts_language_metadata(self.0);
(!ptr.is_null()).then(|| (*ptr).into())
}
}
/// Get the number of distinct node types in this language.
#[doc(alias = "ts_language_symbol_count")]
#[must_use]
@ -613,7 +659,7 @@ impl Parser {
/// [`LANGUAGE_VERSION`] and [`MIN_COMPATIBLE_LANGUAGE_VERSION`] constants.
#[doc(alias = "ts_parser_set_language")]
pub fn set_language(&mut self, language: &Language) -> Result<(), LanguageError> {
let version = language.version();
let version = language.abi_version();
if (MIN_COMPATIBLE_LANGUAGE_VERSION..=LANGUAGE_VERSION).contains(&version) {
unsafe {
ffi::ts_parser_set_language(self.0.as_ptr(), language.0);
@ -2360,7 +2406,7 @@ impl Query {
column: 0,
offset: 0,
message: LanguageError {
version: language.version(),
version: language.abi_version(),
}
.to_string(),
kind: QueryErrorKind::Language,

View file

@ -12,6 +12,8 @@
"ts_language_symbol_type",
"ts_language_name",
"ts_language_version",
"ts_language_abi_version",
"ts_language_metadata",
"ts_language_next_state",
"ts_node_field_name_for_child_wasm",
"ts_node_field_name_for_named_child_wasm",

View file

@ -110,6 +110,17 @@ static TSInputEdit unmarshal_edit() {
return edit;
}
static void marshal_language_metadata(const TSLanguageMetadata *metadata) {
if (metadata == NULL) {
TRANSFER_BUFFER[0] = 0;
return;
}
TRANSFER_BUFFER[0] = (const void*)3;
TRANSFER_BUFFER[1] = (const void*)(uint32_t)metadata->major_version;
TRANSFER_BUFFER[2] = (const void*)(uint32_t)metadata->minor_version;
TRANSFER_BUFFER[3] = (const void*)(uint32_t)metadata->patch_version;
}
/********************/
/* Section - Parser */
/********************/
@ -242,6 +253,11 @@ int ts_language_type_is_visible_wasm(const TSLanguage *self, TSSymbol typeId) {
return symbolType <= TSSymbolTypeAnonymous;
}
void ts_language_metadata_wasm(const TSLanguage *self) {
const TSLanguageMetadata *metadata = ts_language_metadata(self);
marshal_language_metadata(metadata);
}
void ts_language_supertypes_wasm(const TSLanguage *self) {
uint32_t length;
const TSSymbol *supertypes = ts_language_supertypes(self, &length);

View file

@ -72,6 +72,8 @@ interface WasmModule {
_ts_language_symbol_count(_0: number): number;
_ts_language_state_count(_0: number): number;
_ts_language_version(_0: number): number;
_ts_language_abi_version(_0: number): number;
_ts_language_metadata(_0: number): number;
_ts_language_name(_0: number): number;
_ts_language_field_count(_0: number): number;
_ts_language_next_state(_0: number, _1: number, _2: number): number;

View file

@ -1,10 +1,17 @@
import { C, INTERNAL, Internal, assertInternal, SIZE_OF_INT, SIZE_OF_SHORT } from './constants';
import { LookaheadIterator } from './lookahead_iterator';
import { unmarshalLanguageMetadata } from './marshal';
import { TRANSFER_BUFFER } from './parser';
import { Query } from './query';
const LANGUAGE_FUNCTION_REGEX = /^tree_sitter_\w+$/;
export class LanguageMetadata {
readonly major_version: number;
readonly minor_version: number;
readonly patch_version: number;
}
/**
* An opaque object that defines how to parse a particular language.
* The code for each `Language` is generated by the Tree-sitter CLI.
@ -46,7 +53,7 @@ export class Language {
}
}
/**
* Gets the name of the language.
*/
@ -57,11 +64,33 @@ export class Language {
}
/**
* @deprecated since version 0.25.0, use {@link Language#abiVersion} instead
* Gets the version of the language.
*/
get version(): number {
return C._ts_language_version(this[0]);
}
/**
* Gets the ABI version of the language.
*/
get abiVersion(): number {
return C._ts_language_abi_version(this[0]);
}
/**
* Get the metadata for this language. This information is generated by the
* CLI, and relies on the language author providing the correct metadata in
* the language's `tree-sitter.json` file.
*/
get metadata(): LanguageMetadata | null {
C._ts_language_metadata(this[0]);
const length = C.getValue(TRANSFER_BUFFER, 'i32');
const address = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
if (length === 0) return null;
return unmarshalLanguageMetadata(address);
}
/**
* Gets the number of fields in the language.
*/

View file

@ -5,6 +5,7 @@ import { Tree } from "./tree";
import { Query, QueryCapture, type QueryMatch } from "./query";
import { TreeCursor } from "./tree_cursor";
import { TRANSFER_BUFFER } from "./parser";
import { LanguageMetadata } from "./language";
/**
* @internal
@ -161,3 +162,16 @@ export function marshalEdit(edit: Edit, address = TRANSFER_BUFFER) {
C.setValue(address, edit.oldEndIndex, 'i32'); address += SIZE_OF_INT;
C.setValue(address, edit.newEndIndex, 'i32'); address += SIZE_OF_INT;
}
/**
* @internal
*
* Unmarshals a {@link LanguageMetadata} from the transfer buffer.
*/
export function unmarshalLanguageMetadata(address: number): LanguageMetadata {
const result = {} as LanguageMetadata;
result.major_version = C.getValue(address, 'i32'); address += SIZE_OF_INT;
result.minor_version = C.getValue(address, 'i32'); address += SIZE_OF_INT;
result.field_count = C.getValue(address, 'i32');
return result;
}

View file

@ -12,7 +12,7 @@ describe('Language', () => {
describe('.name, .version', () => {
it('returns the name and version of the language', () => {
expect(JavaScript.name).toBe('javascript');
expect(JavaScript.version).toBe(15);
expect(JavaScript.abiVersion).toBe(15);
});
});

View file

@ -42,6 +42,7 @@ typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
typedef struct TSLanguageMetadata TSLanguageMetadata;
typedef struct TSParser TSParser;
typedef struct TSTree TSTree;
typedef struct TSQuery TSQuery;
@ -182,6 +183,20 @@ typedef struct TSQueryCursorOptions {
bool (*progress_callback)(TSQueryCursorState *state);
} TSQueryCursorOptions;
/**
* The metadata associated with a language.
*
* Currently, this metadata can be used to check the [Semantic Version](https://semver.org/)
* of the language. This version information should be used to signal if a given parser might
* be incompatible with existing queries when upgrading between major versions, or minor versions
* if it's in zerover.
*/
typedef struct TSLanguageMetadata {
uint8_t major_version;
uint8_t minor_version;
uint8_t patch_version;
} TSLanguageMetadata;
/********************/
/* Section - Parser */
/********************/
@ -207,7 +222,7 @@ const TSLanguage *ts_parser_language(const TSParser *self);
* Returns a boolean indicating whether or not the language was successfully
* assigned. True means assignment succeeded. False means there was a version
* mismatch: the language was generated with an incompatible version of the
* Tree-sitter CLI. Check the language's version using [`ts_language_version`]
* Tree-sitter CLI. Check the language's ABI version using [`ts_language_abi_version`]
* and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and
* [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants.
*/
@ -1247,6 +1262,8 @@ const char *ts_language_symbol_name(const TSLanguage *self, TSSymbol symbol);
TSSymbolType ts_language_symbol_type(const TSLanguage *self, TSSymbol symbol);
/**
* @deprecated use [`ts_language_abi_version`] instead, this will be removed in 0.26.
*
* Get the ABI version number for this language. This version number is used
* to ensure that languages were generated by a compatible version of
* Tree-sitter.
@ -1255,6 +1272,24 @@ TSSymbolType ts_language_symbol_type(const TSLanguage *self, TSSymbol symbol);
*/
uint32_t ts_language_version(const TSLanguage *self);
/**
* Get the ABI version number for this language. This version number is used
* to ensure that languages were generated by a compatible version of
* Tree-sitter.
*
* See also [`ts_parser_set_language`].
*/
uint32_t ts_language_abi_version(const TSLanguage *self);
/**
* Get the metadata for this language. This information is generated by the
* CLI, and relies on the language author providing the correct metadata in
* the language's `tree-sitter.json` file.
*
* See also [`TSMetadata`].
*/
const TSLanguageMetadata *ts_language_metadata(const TSLanguage *self);
/**
* Get the next parse state. Combine this with lookahead iterators to generate
* completion suggestions or valid symbols in error nodes. Use

View file

@ -25,7 +25,7 @@ uint32_t ts_language_state_count(const TSLanguage *self) {
}
const TSSymbol *ts_language_supertypes(const TSLanguage *self, uint32_t *length) {
if (self->version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) {
if (self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) {
*length = self->supertype_count;
return self->supertype_symbols;
} else {
@ -39,7 +39,7 @@ const TSSymbol *ts_language_subtypes(
TSSymbol supertype,
uint32_t *length
) {
if (self->version < LANGUAGE_VERSION_WITH_RESERVED_WORDS || !ts_language_symbol_metadata(self, supertype).supertype) {
if (self->abi_version < LANGUAGE_VERSION_WITH_RESERVED_WORDS || !ts_language_symbol_metadata(self, supertype).supertype) {
*length = 0;
return NULL;
}
@ -50,11 +50,19 @@ const TSSymbol *ts_language_subtypes(
}
uint32_t ts_language_version(const TSLanguage *self) {
return self->version;
return self->abi_version;
}
uint32_t ts_language_abi_version(const TSLanguage *self) {
return self->abi_version;
}
const TSLanguageMetadata *ts_language_metadata(const TSLanguage *self) {
return self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? &self->metadata : NULL;
}
const char *ts_language_name(const TSLanguage *self) {
return self->version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? self->name : NULL;
return self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? self->name : NULL;
}
uint32_t ts_language_field_count(const TSLanguage *self) {
@ -85,7 +93,7 @@ TSLexerMode ts_language_lex_mode_for_state(
const TSLanguage *self,
TSStateId state
) {
if (self->version < 15) {
if (self->abi_version < 15) {
TSLexMode mode = ((const TSLexMode *)self->lex_modes)[state];
return (TSLexerMode) {
.lex_state = mode.lex_state,

View file

@ -183,7 +183,7 @@ static inline bool ts_language_state_is_primary(
const TSLanguage *self,
TSStateId state
) {
if (self->version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) {
if (self->abi_version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) {
return state == self->primary_state_ids[state];
} else {
return true;

View file

@ -1977,8 +1977,8 @@ bool ts_parser_set_language(TSParser *self, const TSLanguage *language) {
if (language) {
if (
language->version > TREE_SITTER_LANGUAGE_VERSION ||
language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
language->abi_version > TREE_SITTER_LANGUAGE_VERSION ||
language->abi_version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
) return false;
if (ts_language_is_wasm(language)) {

View file

@ -18,6 +18,12 @@ typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
typedef struct TSLanguageMetadata TSLanguageMetadata;
typedef struct TSLanguageMetadata {
uint8_t major_version;
uint8_t minor_version;
uint8_t patch_version;
} TSLanguageMetadata;
#endif
typedef struct {
@ -100,7 +106,7 @@ typedef struct {
} TSCharacterRange;
struct TSLanguage {
uint32_t version;
uint32_t abi_version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
@ -143,6 +149,7 @@ struct TSLanguage {
const TSSymbol *supertype_symbols;
const TSMapSlice *supertype_map_slices;
const TSSymbol *supertype_map_entries;
TSLanguageMetadata metadata;
};
static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {

View file

@ -2433,7 +2433,7 @@ static TSQueryError ts_query__parse_pattern(
// Get all the possible subtypes for the given supertype,
// and check if the given subtype is valid.
if (self->language->version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) {
if (self->language->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) {
uint32_t subtype_length;
const TSSymbol *subtypes = ts_language_subtypes(
self->language,
@ -2774,8 +2774,8 @@ TSQuery *ts_query_new(
) {
if (
!language ||
language->version > TREE_SITTER_LANGUAGE_VERSION ||
language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
language->abi_version > TREE_SITTER_LANGUAGE_VERSION ||
language->abi_version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
) {
*error_type = TSQueryErrorLanguage;
return NULL;

View file

@ -117,7 +117,7 @@ typedef Array(char) StringData;
// LanguageInWasmMemory - The memory layout of a `TSLanguage` when compiled to
// wasm32. This is used to copy static language data out of the wasm memory.
typedef struct {
uint32_t version;
uint32_t abi_version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
@ -160,6 +160,7 @@ typedef struct {
int32_t supertype_symbols;
int32_t supertype_map_slices;
int32_t supertype_map_entries;
TSLanguageMetadata metadata;
} LanguageInWasmMemory;
// LexerInWasmMemory - The memory layout of a `TSLexer` when compiled to wasm32.
@ -1258,7 +1259,7 @@ const TSLanguage *ts_wasm_store_load_language(
StringData field_name_buffer = array_new();
*language = (TSLanguage) {
.version = wasm_language.version,
.abi_version = wasm_language.abi_version,
.symbol_count = wasm_language.symbol_count,
.alias_count = wasm_language.alias_count,
.token_count = wasm_language.token_count,
@ -1270,6 +1271,7 @@ const TSLanguage *ts_wasm_store_load_language(
.supertype_count = wasm_language.supertype_count,
.max_alias_sequence_length = wasm_language.max_alias_sequence_length,
.keyword_capture_token = wasm_language.keyword_capture_token,
.metadata = wasm_language.metadata,
.parse_table = copy(
&memory[wasm_language.parse_table],
wasm_language.large_state_count * wasm_language.symbol_count * sizeof(uint16_t)
@ -1396,14 +1398,14 @@ const TSLanguage *ts_wasm_store_load_language(
);
}
if (language->version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) {
if (language->abi_version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) {
language->primary_state_ids = copy(
&memory[wasm_language.primary_state_ids],
wasm_language.state_count * sizeof(TSStateId)
);
}
if (language->version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) {
if (language->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) {
language->name = copy_string(memory, wasm_language.name);
language->reserved_words = copy(
&memory[wasm_language.reserved_words],