diff --git a/cli/generate/src/lib.rs b/cli/generate/src/lib.rs index 9fdb15db..f1196827 100644 --- a/cli/generate/src/lib.rs +++ b/cli/generate/src/lib.rs @@ -7,16 +7,10 @@ use std::{ }; use anyhow::Result; -use build_tables::build_tables; -use grammars::InputGrammar; -pub use node_types::VariableInfoError; -use parse_grammar::parse_grammar; -pub use parse_grammar::ParseGrammarError; -use prepare_grammar::prepare_grammar; -pub use prepare_grammar::PrepareGrammarError; use regex::{Regex, RegexBuilder}; -use render::render_c_code; use semver::Version; +use serde::{Deserialize, Serialize}; +use thiserror::Error; mod build_tables; mod dedup; @@ -30,9 +24,15 @@ mod render; mod rules; mod tables; +use build_tables::build_tables; pub use build_tables::ParseTableBuilderError; -use serde::Serialize; -use thiserror::Error; +use grammars::InputGrammar; +pub use node_types::VariableInfoError; +use parse_grammar::parse_grammar; +pub use parse_grammar::ParseGrammarError; +use prepare_grammar::prepare_grammar; +pub use prepare_grammar::PrepareGrammarError; +use render::render_c_code; static JSON_COMMENT_REGEX: LazyLock = LazyLock::new(|| { RegexBuilder::new("^\\s*//.*") @@ -67,6 +67,8 @@ pub enum GenerateError { VariableInfo(#[from] VariableInfoError), #[error(transparent)] BuildTables(#[from] ParseTableBuilderError), + #[error(transparent)] + ParseVersion(#[from] ParseVersionError), } impl From for GenerateError { @@ -95,6 +97,16 @@ impl From for LoadGrammarError { } } +#[derive(Debug, Error, Serialize)] +pub enum ParseVersionError { + #[error("{0}")] + Version(String), + #[error("{0}")] + JSON(String), + #[error("{0}")] + IO(String), +} + pub type JSResult = Result; #[derive(Debug, Error, Serialize)] @@ -178,11 +190,18 @@ pub fn generate_parser_in_directory( // Parse and preprocess the grammar. let input_grammar = parse_grammar(&grammar_json)?; + let semantic_version = read_grammar_version(&repo_path)?; + // Generate the parser and related files. let GeneratedParser { c_code, node_types_json, - } = generate_parser_for_grammar_with_opts(&input_grammar, abi_version, report_symbol_name)?; + } = generate_parser_for_grammar_with_opts( + &input_grammar, + abi_version, + semantic_version.map(|v| (v.major as u8, v.minor as u8, v.patch as u8)), + report_symbol_name, + )?; write_file(&src_path.join("parser.c"), c_code)?; write_file(&src_path.join("node-types.json"), node_types_json)?; @@ -193,17 +212,25 @@ pub fn generate_parser_in_directory( Ok(()) } -pub fn generate_parser_for_grammar(grammar_json: &str) -> GenerateResult<(String, String)> { +pub fn generate_parser_for_grammar( + grammar_json: &str, + semantic_version: Option<(u8, u8, u8)>, +) -> GenerateResult<(String, String)> { let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n"); let input_grammar = parse_grammar(&grammar_json)?; - let parser = - generate_parser_for_grammar_with_opts(&input_grammar, tree_sitter::LANGUAGE_VERSION, None)?; + let parser = generate_parser_for_grammar_with_opts( + &input_grammar, + tree_sitter::LANGUAGE_VERSION, + semantic_version, + None, + )?; Ok((input_grammar.name, parser.c_code)) } fn generate_parser_for_grammar_with_opts( input_grammar: &InputGrammar, abi_version: usize, + semantic_version: Option<(u8, u8, u8)>, report_symbol_name: Option<&str>, ) -> GenerateResult { let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = @@ -233,6 +260,7 @@ fn generate_parser_for_grammar_with_opts( lexical_grammar, simple_aliases, abi_version, + semantic_version, supertype_symbol_map, ); Ok(GeneratedParser { @@ -241,6 +269,55 @@ fn generate_parser_for_grammar_with_opts( }) } +/// This will read the `tree-sitter.json` config file and attempt to extract the version. +/// +/// If the file is not found in the current directory or any of its parent directories, this will +/// return `None` to maintain backwards compatibility. If the file is found but the version cannot +/// be parsed as semver, this will return an error. +fn read_grammar_version(repo_path: &Path) -> Result, ParseVersionError> { + #[derive(Deserialize)] + struct TreeSitterJson { + metadata: Metadata, + } + + #[derive(Deserialize)] + struct Metadata { + version: String, + } + + let filename = "tree-sitter.json"; + let mut path = repo_path.join(filename); + + loop { + let json = path + .exists() + .then(|| { + let contents = fs::read_to_string(path.as_path()).map_err(|e| { + ParseVersionError::IO(format!("Failed to read `{}` -- {e}", path.display())) + })?; + serde_json::from_str::(&contents).map_err(|e| { + ParseVersionError::JSON(format!("Failed to parse `{}` -- {e}", path.display())) + }) + }) + .transpose()?; + if let Some(json) = json { + return Version::parse(&json.metadata.version) + .map_err(|e| { + ParseVersionError::Version(format!( + "Failed to parse `{}` version as semver -- {e}", + path.display() + )) + }) + .map(Some); + } + path.pop(); // filename + if !path.pop() { + return Ok(None); + } + path.push(filename); + } +} + pub fn load_grammar_file( grammar_path: &Path, js_runtime: Option<&str>, diff --git a/cli/generate/src/render.rs b/cli/generate/src/render.rs index 09685526..e2ef2389 100644 --- a/cli/generate/src/render.rs +++ b/cli/generate/src/render.rs @@ -5,6 +5,8 @@ use std::{ mem::swap, }; +use indoc::indoc; + use super::{ build_tables::Tables, grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType}, @@ -83,9 +85,8 @@ struct Generator { field_names: Vec, supertype_symbol_map: BTreeMap>, supertype_map: BTreeMap>, - - #[allow(unused)] abi_version: usize, + metadata: Option, } struct LargeCharacterSetInfo { @@ -93,6 +94,12 @@ struct LargeCharacterSetInfo { is_used: bool, } +struct Metadata { + major_version: u8, + minor_version: u8, + patch_version: u8, +} + impl Generator { fn generate(mut self) -> String { self.init(); @@ -1539,7 +1546,7 @@ impl Generator { indent!(self); add_line!(self, "static const TSLanguage language = {{"); indent!(self); - add_line!(self, ".version = LANGUAGE_VERSION,"); + add_line!(self, ".abi_version = LANGUAGE_VERSION,"); // Quantities add_line!(self, ".symbol_count = SYMBOL_COUNT,"); @@ -1629,6 +1636,24 @@ impl Generator { .max() .unwrap() ); + + let Some(metadata) = &self.metadata else { + panic!( + indoc! {" + Metadata is required to generate ABI version {}. + This means that your grammar doesn't have a tree-sitter.json config file with an appropriate version field in the metadata table. + "}, + self.abi_version + ); + }; + + add_line!(self, ".metadata = {{"); + indent!(self); + add_line!(self, ".major_version = {},", metadata.major_version); + add_line!(self, ".minor_version = {},", metadata.minor_version); + add_line!(self, ".patch_version = {},", metadata.patch_version); + dedent!(self); + add_line!(self, "}},"); } dedent!(self); @@ -1914,6 +1939,7 @@ pub fn render_c_code( lexical_grammar: LexicalGrammar, default_aliases: AliasMap, abi_version: usize, + semantic_version: Option<(u8, u8, u8)>, supertype_symbol_map: BTreeMap>, ) -> String { assert!( @@ -1932,6 +1958,11 @@ pub fn render_c_code( lexical_grammar, default_aliases, abi_version, + metadata: semantic_version.map(|(major_version, minor_version, patch_version)| Metadata { + major_version, + minor_version, + patch_version, + }), supertype_symbol_map, ..Default::default() } diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index c6da6baf..750bf442 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -359,7 +359,8 @@ fn test_feature_corpus_files() { } let error_message_path = test_path.join("expected_error.txt"); let grammar_json = tree_sitter_generate::load_grammar_file(&grammar_path, None).unwrap(); - let generate_result = tree_sitter_generate::generate_parser_for_grammar(&grammar_json); + let generate_result = + tree_sitter_generate::generate_parser_for_grammar(&grammar_json, Some((0, 0, 0))); if error_message_path.exists() { if EXAMPLE_INCLUDE.is_some() || EXAMPLE_EXCLUDE.is_some() { diff --git a/cli/src/tests/language_test.rs b/cli/src/tests/language_test.rs index a479191a..e59b0eb6 100644 --- a/cli/src/tests/language_test.rs +++ b/cli/src/tests/language_test.rs @@ -101,7 +101,7 @@ fn test_supertypes() { let language = get_language("rust"); let supertypes = language.supertypes(); - if language.version() < 15 { + if language.abi_version() < 15 { return; } diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs index 234172e4..c64744f8 100644 --- a/cli/src/tests/mod.rs +++ b/cli/src/tests/mod.rs @@ -18,9 +18,17 @@ mod tree_test; #[cfg(feature = "wasm")] mod wasm_language_test; +use tree_sitter_generate::GenerateResult; + pub use crate::fuzz::{ allocations, edits::{get_random_edit, invert_edit}, random::Rand, ITERATION_COUNT, }; + +/// This is a simple wrapper around [`tree_sitter_generate::generate_parser_for_grammar`], because +/// our tests do not need to pass in a version number, only the grammar JSON. +fn generate_parser(grammar_json: &str) -> GenerateResult<(String, String)> { + tree_sitter_generate::generate_parser_for_grammar(grammar_json, Some((0, 0, 0))) +} diff --git a/cli/src/tests/node_test.rs b/cli/src/tests/node_test.rs index 8242ea41..22e920d6 100644 --- a/cli/src/tests/node_test.rs +++ b/cli/src/tests/node_test.rs @@ -1,12 +1,12 @@ use tree_sitter::{Node, Parser, Point, Tree}; -use tree_sitter_generate::{generate_parser_for_grammar, load_grammar_file}; +use tree_sitter_generate::load_grammar_file; use super::{ get_random_edit, helpers::fixtures::{fixtures_dir, get_language, get_test_language}, Rand, }; -use crate::parse::perform_edit; +use crate::{parse::perform_edit, tests::generate_parser}; const JSON_EXAMPLE: &str = r#" @@ -317,7 +317,7 @@ fn test_next_sibling_of_zero_width_node() { ) .unwrap(); - let (parser_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap(); + let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap(); let mut parser = Parser::new(); let language = get_test_language(&parser_name, &parser_code, None); @@ -563,8 +563,7 @@ fn test_node_named_child() { #[test] fn test_node_named_child_with_aliases_and_extras() { - let (parser_name, parser_code) = - generate_parser_for_grammar(GRAMMAR_WITH_ALIASES_AND_EXTRAS).unwrap(); + let (parser_name, parser_code) = generate_parser(GRAMMAR_WITH_ALIASES_AND_EXTRAS).unwrap(); let mut parser = Parser::new(); parser @@ -871,7 +870,7 @@ fn test_node_sexp() { #[test] fn test_node_field_names() { - let (parser_name, parser_code) = generate_parser_for_grammar( + let (parser_name, parser_code) = generate_parser( r#" { "name": "test_grammar_with_fields", @@ -981,7 +980,7 @@ fn test_node_field_names() { #[test] fn test_node_field_calls_in_language_without_fields() { - let (parser_name, parser_code) = generate_parser_for_grammar( + let (parser_name, parser_code) = generate_parser( r#" { "name": "test_grammar_with_no_fields", @@ -1039,7 +1038,7 @@ fn test_node_is_named_but_aliased_as_anonymous() { ) .unwrap(); - let (parser_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap(); + let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap(); let mut parser = Parser::new(); let language = get_test_language(&parser_name, &parser_code, None); diff --git a/cli/src/tests/parser_hang_test.rs b/cli/src/tests/parser_hang_test.rs index 3f7b6394..1ff9d17e 100644 --- a/cli/src/tests/parser_hang_test.rs +++ b/cli/src/tests/parser_hang_test.rs @@ -7,8 +7,9 @@ use std::{ }; use tree_sitter::Parser; -use tree_sitter_generate::{generate_parser_for_grammar, load_grammar_file}; +use tree_sitter_generate::load_grammar_file; +use super::generate_parser; use crate::tests::helpers::fixtures::{fixtures_dir, get_test_language}; // The `sanitizing` cfg is required to don't run tests under specific sunitizer @@ -90,7 +91,7 @@ fn hang_test() { .join("get_col_should_hang_not_crash"); let grammar_json = load_grammar_file(&test_grammar_dir.join("grammar.js"), None).unwrap(); - let (parser_name, parser_code) = generate_parser_for_grammar(grammar_json.as_str()).unwrap(); + let (parser_name, parser_code) = generate_parser(grammar_json.as_str()).unwrap(); let language = get_test_language(&parser_name, &parser_code, Some(test_grammar_dir.as_path())); diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 6aff4c42..35f14098 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -6,7 +6,7 @@ use std::{ use tree_sitter::{ Decode, IncludedRangesError, InputEdit, LogType, ParseOptions, ParseState, Parser, Point, Range, }; -use tree_sitter_generate::{generate_parser_for_grammar, load_grammar_file}; +use tree_sitter_generate::load_grammar_file; use tree_sitter_proc_macro::retry; use super::helpers::{ @@ -17,7 +17,7 @@ use super::helpers::{ use crate::{ fuzz::edits::Edit, parse::perform_edit, - tests::{helpers::fixtures::fixtures_dir, invert_edit}, + tests::{generate_parser, helpers::fixtures::fixtures_dir, invert_edit}, }; #[test] @@ -486,7 +486,7 @@ fn test_parsing_after_editing_tree_that_depends_on_column_values() { .join("test_grammars") .join("uses_current_column"); let grammar_json = load_grammar_file(&dir.join("grammar.js"), None).unwrap(); - let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap(); + let (grammar_name, parser_code) = generate_parser(&grammar_json).unwrap(); let mut parser = Parser::new(); parser @@ -564,7 +564,7 @@ fn test_parsing_after_editing_tree_that_depends_on_column_position() { .join("depends_on_column"); let grammar_json = load_grammar_file(&dir.join("grammar.js"), None).unwrap(); - let (grammar_name, parser_code) = generate_parser_for_grammar(grammar_json.as_str()).unwrap(); + let (grammar_name, parser_code) = generate_parser(grammar_json.as_str()).unwrap(); let mut parser = Parser::new(); parser @@ -1475,7 +1475,7 @@ fn test_parsing_with_a_newly_included_range() { #[test] fn test_parsing_with_included_ranges_and_missing_tokens() { - let (parser_name, parser_code) = generate_parser_for_grammar( + let (parser_name, parser_code) = generate_parser( r#"{ "name": "test_leading_missing_token", "rules": { @@ -1536,7 +1536,7 @@ fn test_parsing_with_included_ranges_and_missing_tokens() { #[test] fn test_grammars_that_can_hang_on_eof() { - let (parser_name, parser_code) = generate_parser_for_grammar( + let (parser_name, parser_code) = generate_parser( r#" { "name": "test_single_null_char_regex", @@ -1562,7 +1562,7 @@ fn test_grammars_that_can_hang_on_eof() { .unwrap(); parser.parse("\"", None).unwrap(); - let (parser_name, parser_code) = generate_parser_for_grammar( + let (parser_name, parser_code) = generate_parser( r#" { "name": "test_null_char_with_next_char_regex", @@ -1587,7 +1587,7 @@ fn test_grammars_that_can_hang_on_eof() { .unwrap(); parser.parse("\"", None).unwrap(); - let (parser_name, parser_code) = generate_parser_for_grammar( + let (parser_name, parser_code) = generate_parser( r#" { "name": "test_null_char_with_range_regex", @@ -1650,7 +1650,7 @@ if foo && bar || baz {} fn test_parsing_with_scanner_logging() { let dir = fixtures_dir().join("test_grammars").join("external_tokens"); let grammar_json = load_grammar_file(&dir.join("grammar.js"), None).unwrap(); - let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap(); + let (grammar_name, parser_code) = generate_parser(&grammar_json).unwrap(); let mut parser = Parser::new(); parser @@ -1674,7 +1674,7 @@ fn test_parsing_with_scanner_logging() { fn test_parsing_get_column_at_eof() { let dir = fixtures_dir().join("test_grammars").join("get_col_eof"); let grammar_json = load_grammar_file(&dir.join("grammar.js"), None).unwrap(); - let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap(); + let (grammar_name, parser_code) = generate_parser(&grammar_json).unwrap(); let mut parser = Parser::new(); parser @@ -1884,7 +1884,7 @@ fn test_decode_utf24le() { #[test] fn test_grammars_that_should_not_compile() { - assert!(generate_parser_for_grammar( + assert!(generate_parser( r#" { "name": "issue_1111", @@ -1896,7 +1896,7 @@ fn test_grammars_that_should_not_compile() { ) .is_err()); - assert!(generate_parser_for_grammar( + assert!(generate_parser( r#" { "name": "issue_1271", @@ -1911,11 +1911,11 @@ fn test_grammars_that_should_not_compile() { } }, } - "#, + "# ) .is_err()); - assert!(generate_parser_for_grammar( + assert!(generate_parser( r#" { "name": "issue_1156_expl_1", @@ -1929,11 +1929,11 @@ fn test_grammars_that_should_not_compile() { } }, } - "# + "# ) .is_err()); - assert!(generate_parser_for_grammar( + assert!(generate_parser( r#" { "name": "issue_1156_expl_2", @@ -1950,11 +1950,11 @@ fn test_grammars_that_should_not_compile() { } }, } - "# + "# ) .is_err()); - assert!(generate_parser_for_grammar( + assert!(generate_parser( r#" { "name": "issue_1156_expl_3", @@ -1968,11 +1968,11 @@ fn test_grammars_that_should_not_compile() { } }, } - "# + "# ) .is_err()); - assert!(generate_parser_for_grammar( + assert!(generate_parser( r#" { "name": "issue_1156_expl_4", @@ -1989,7 +1989,7 @@ fn test_grammars_that_should_not_compile() { } }, } - "# + "# ) .is_err()); } diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 6f5e7077..f741d88c 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -8,7 +8,6 @@ use tree_sitter::{ QueryCursorOptions, QueryError, QueryErrorKind, QueryPredicate, QueryPredicateArg, QueryProperty, Range, }; -use tree_sitter_generate::generate_parser_for_grammar; use unindent::Unindent; use super::helpers::{ @@ -17,6 +16,7 @@ use super::helpers::{ query_helpers::{assert_query_matches, Match, Pattern}, }; use crate::tests::{ + generate_parser, helpers::query_helpers::{collect_captures, collect_matches}, ITERATION_COUNT, }; @@ -532,7 +532,7 @@ fn test_query_errors_on_impossible_patterns() { } ); - if js_lang.version() >= 15 { + if js_lang.abi_version() >= 15 { assert_eq!( Query::new(&js_lang, "(statement/identifier)").unwrap_err(), QueryError { @@ -5216,7 +5216,7 @@ fn test_grammar_with_aliased_literal_query() { // expansion: $ => seq('}'), // }, // }); - let (parser_name, parser_code) = generate_parser_for_grammar( + let (parser_name, parser_code) = generate_parser( r#" { "name": "test", diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 73803082..73cd457e 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -180,6 +180,14 @@ pub struct TSQueryCursorOptions { pub progress_callback: ::core::option::Option bool>, } +#[doc = " The metadata associated with a language.\n\n Currently, this metadata can be used to check the [Semantic Version](https://semver.org/)\n of the language. This version information should be used to signal if a given parser might\n be incompatible with existing queries when upgrading between major versions, or minor versions\n if it's in zerover."] +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSLanguageMetadata { + pub major_version: u8, + pub minor_version: u8, + pub patch_version: u8, +} extern "C" { #[doc = " Create a new parser."] pub fn ts_parser_new() -> *mut TSParser; @@ -193,7 +201,7 @@ extern "C" { pub fn ts_parser_language(self_: *const TSParser) -> *const TSLanguage; } extern "C" { - #[doc = " Set the language that the parser should use for parsing.\n\n Returns a boolean indicating whether or not the language was successfully\n assigned. True means assignment succeeded. False means there was a version\n mismatch: the language was generated with an incompatible version of the\n Tree-sitter CLI. Check the language's version using [`ts_language_version`]\n and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and\n [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants."] + #[doc = " Set the language that the parser should use for parsing.\n\n Returns a boolean indicating whether or not the language was successfully\n assigned. True means assignment succeeded. False means there was a version\n mismatch: the language was generated with an incompatible version of the\n Tree-sitter CLI. Check the language's ABI version using [`ts_language_abi_version`]\n and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and\n [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants."] pub fn ts_parser_set_language(self_: *mut TSParser, language: *const TSLanguage) -> bool; } extern "C" { @@ -807,9 +815,17 @@ extern "C" { pub fn ts_language_symbol_type(self_: *const TSLanguage, symbol: TSSymbol) -> TSSymbolType; } extern "C" { - #[doc = " Get the ABI version number for this language. This version number is used\n to ensure that languages were generated by a compatible version of\n Tree-sitter.\n\n See also [`ts_parser_set_language`]."] + #[doc = " @deprecated use [`ts_language_abi_version`] instead, this will be removed in 0.26.\n\n Get the ABI version number for this language. This version number is used\n to ensure that languages were generated by a compatible version of\n Tree-sitter.\n\n See also [`ts_parser_set_language`]."] pub fn ts_language_version(self_: *const TSLanguage) -> u32; } +extern "C" { + #[doc = " Get the ABI version number for this language. This version number is used\n to ensure that languages were generated by a compatible version of\n Tree-sitter.\n\n See also [`ts_parser_set_language`]."] + pub fn ts_language_abi_version(self_: *const TSLanguage) -> u32; +} +extern "C" { + #[doc = " Get the metadata for this language. This information is generated by the\n CLI, and relies on the language author providing the correct metadata in\n the language's `tree-sitter.json` file.\n\n See also [`TSMetadata`]."] + pub fn ts_language_metadata(self_: *const TSLanguage) -> *const TSLanguageMetadata; +} extern "C" { #[doc = " Get the next parse state. Combine this with lookahead iterators to generate\n completion suggestions or valid symbols in error nodes. Use\n [`ts_node_grammar_symbol`] for valid symbols."] pub fn ts_language_next_state( diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 6827ba78..f45a40cc 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -64,6 +64,29 @@ pub struct Language(*const ffi::TSLanguage); pub struct LanguageRef<'a>(*const ffi::TSLanguage, PhantomData<&'a ()>); +/// The metadata associated with a language. +/// +/// Currently, this metadata can be used to check the [Semantic Version](https://semver.org/) +/// of the language. This version information should be used to signal if a given parser might +/// be incompatible with existing queries when upgrading between major versions, or minor versions +/// if it's in zerover. +#[doc(alias = "TSLanguageMetadata")] +pub struct LanguageMetadata { + pub major_version: u8, + pub minor_version: u8, + pub patch_version: u8, +} + +impl From for LanguageMetadata { + fn from(val: ffi::TSLanguageMetadata) -> Self { + Self { + major_version: val.major_version, + minor_version: val.minor_version, + patch_version: val.patch_version, + } + } +} + /// A tree that represents the syntactic structure of a source code file. #[doc(alias = "TSTree")] pub struct Tree(NonNull); @@ -394,7 +417,7 @@ impl Language { } /// Get the name of this language. This returns `None` in older parsers. - #[doc(alias = "ts_language_version")] + #[doc(alias = "ts_language_name")] #[must_use] pub fn name(&self) -> Option<&'static str> { let ptr = unsafe { ffi::ts_language_name(self.0) }; @@ -404,11 +427,34 @@ impl Language { /// Get the ABI version number that indicates which version of the /// Tree-sitter CLI that was used to generate this [`Language`]. #[doc(alias = "ts_language_version")] + #[deprecated(since = "0.25.0", note = "Use abi_version instead")] #[must_use] pub fn version(&self) -> usize { unsafe { ffi::ts_language_version(self.0) as usize } } + /// Get the ABI version number that indicates which version of the + /// Tree-sitter CLI that was used to generate this [`Language`]. + #[doc(alias = "ts_language_abi_version")] + #[must_use] + pub fn abi_version(&self) -> usize { + unsafe { ffi::ts_language_abi_version(self.0) as usize } + } + + /// Get the metadata for this language. This information is generated by the + /// CLI, and relies on the language author providing the correct metadata in + /// the language's `tree-sitter.json` file. + /// + /// See also [`LanguageMetadata`]. + #[doc(alias = "ts_language_metadata")] + #[must_use] + pub fn metadata(&self) -> Option { + unsafe { + let ptr = ffi::ts_language_metadata(self.0); + (!ptr.is_null()).then(|| (*ptr).into()) + } + } + /// Get the number of distinct node types in this language. #[doc(alias = "ts_language_symbol_count")] #[must_use] @@ -613,7 +659,7 @@ impl Parser { /// [`LANGUAGE_VERSION`] and [`MIN_COMPATIBLE_LANGUAGE_VERSION`] constants. #[doc(alias = "ts_parser_set_language")] pub fn set_language(&mut self, language: &Language) -> Result<(), LanguageError> { - let version = language.version(); + let version = language.abi_version(); if (MIN_COMPATIBLE_LANGUAGE_VERSION..=LANGUAGE_VERSION).contains(&version) { unsafe { ffi::ts_parser_set_language(self.0.as_ptr(), language.0); @@ -2360,7 +2406,7 @@ impl Query { column: 0, offset: 0, message: LanguageError { - version: language.version(), + version: language.abi_version(), } .to_string(), kind: QueryErrorKind::Language, diff --git a/lib/binding_web/lib/exports.txt b/lib/binding_web/lib/exports.txt index e8aaf822..eb6f5420 100644 --- a/lib/binding_web/lib/exports.txt +++ b/lib/binding_web/lib/exports.txt @@ -12,6 +12,8 @@ "ts_language_symbol_type", "ts_language_name", "ts_language_version", +"ts_language_abi_version", +"ts_language_metadata", "ts_language_next_state", "ts_node_field_name_for_child_wasm", "ts_node_field_name_for_named_child_wasm", diff --git a/lib/binding_web/lib/tree-sitter.c b/lib/binding_web/lib/tree-sitter.c index a54cf5ea..70f546aa 100644 --- a/lib/binding_web/lib/tree-sitter.c +++ b/lib/binding_web/lib/tree-sitter.c @@ -110,6 +110,17 @@ static TSInputEdit unmarshal_edit() { return edit; } +static void marshal_language_metadata(const TSLanguageMetadata *metadata) { + if (metadata == NULL) { + TRANSFER_BUFFER[0] = 0; + return; + } + TRANSFER_BUFFER[0] = (const void*)3; + TRANSFER_BUFFER[1] = (const void*)(uint32_t)metadata->major_version; + TRANSFER_BUFFER[2] = (const void*)(uint32_t)metadata->minor_version; + TRANSFER_BUFFER[3] = (const void*)(uint32_t)metadata->patch_version; +} + /********************/ /* Section - Parser */ /********************/ @@ -242,6 +253,11 @@ int ts_language_type_is_visible_wasm(const TSLanguage *self, TSSymbol typeId) { return symbolType <= TSSymbolTypeAnonymous; } +void ts_language_metadata_wasm(const TSLanguage *self) { + const TSLanguageMetadata *metadata = ts_language_metadata(self); + marshal_language_metadata(metadata); +} + void ts_language_supertypes_wasm(const TSLanguage *self) { uint32_t length; const TSSymbol *supertypes = ts_language_supertypes(self, &length); diff --git a/lib/binding_web/lib/tree-sitter.d.ts b/lib/binding_web/lib/tree-sitter.d.ts index f34a5aff..c6fdf7d5 100644 --- a/lib/binding_web/lib/tree-sitter.d.ts +++ b/lib/binding_web/lib/tree-sitter.d.ts @@ -72,6 +72,8 @@ interface WasmModule { _ts_language_symbol_count(_0: number): number; _ts_language_state_count(_0: number): number; _ts_language_version(_0: number): number; + _ts_language_abi_version(_0: number): number; + _ts_language_metadata(_0: number): number; _ts_language_name(_0: number): number; _ts_language_field_count(_0: number): number; _ts_language_next_state(_0: number, _1: number, _2: number): number; diff --git a/lib/binding_web/src/language.ts b/lib/binding_web/src/language.ts index 6ffe3808..0b63d27e 100644 --- a/lib/binding_web/src/language.ts +++ b/lib/binding_web/src/language.ts @@ -1,10 +1,17 @@ import { C, INTERNAL, Internal, assertInternal, SIZE_OF_INT, SIZE_OF_SHORT } from './constants'; import { LookaheadIterator } from './lookahead_iterator'; +import { unmarshalLanguageMetadata } from './marshal'; import { TRANSFER_BUFFER } from './parser'; import { Query } from './query'; const LANGUAGE_FUNCTION_REGEX = /^tree_sitter_\w+$/; +export class LanguageMetadata { + readonly major_version: number; + readonly minor_version: number; + readonly patch_version: number; +} + /** * An opaque object that defines how to parse a particular language. * The code for each `Language` is generated by the Tree-sitter CLI. @@ -46,7 +53,7 @@ export class Language { } } - + /** * Gets the name of the language. */ @@ -57,11 +64,33 @@ export class Language { } /** + * @deprecated since version 0.25.0, use {@link Language#abiVersion} instead * Gets the version of the language. */ get version(): number { return C._ts_language_version(this[0]); } + + /** + * Gets the ABI version of the language. + */ + get abiVersion(): number { + return C._ts_language_abi_version(this[0]); + } + + /** + * Get the metadata for this language. This information is generated by the + * CLI, and relies on the language author providing the correct metadata in + * the language's `tree-sitter.json` file. + */ + get metadata(): LanguageMetadata | null { + C._ts_language_metadata(this[0]); + const length = C.getValue(TRANSFER_BUFFER, 'i32'); + const address = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); + if (length === 0) return null; + return unmarshalLanguageMetadata(address); + } + /** * Gets the number of fields in the language. */ diff --git a/lib/binding_web/src/marshal.ts b/lib/binding_web/src/marshal.ts index 87a8c4db..b2e468ec 100644 --- a/lib/binding_web/src/marshal.ts +++ b/lib/binding_web/src/marshal.ts @@ -5,6 +5,7 @@ import { Tree } from "./tree"; import { Query, QueryCapture, type QueryMatch } from "./query"; import { TreeCursor } from "./tree_cursor"; import { TRANSFER_BUFFER } from "./parser"; +import { LanguageMetadata } from "./language"; /** * @internal @@ -161,3 +162,16 @@ export function marshalEdit(edit: Edit, address = TRANSFER_BUFFER) { C.setValue(address, edit.oldEndIndex, 'i32'); address += SIZE_OF_INT; C.setValue(address, edit.newEndIndex, 'i32'); address += SIZE_OF_INT; } + +/** + * @internal + * + * Unmarshals a {@link LanguageMetadata} from the transfer buffer. + */ +export function unmarshalLanguageMetadata(address: number): LanguageMetadata { + const result = {} as LanguageMetadata; + result.major_version = C.getValue(address, 'i32'); address += SIZE_OF_INT; + result.minor_version = C.getValue(address, 'i32'); address += SIZE_OF_INT; + result.field_count = C.getValue(address, 'i32'); + return result; +} diff --git a/lib/binding_web/test/language.test.ts b/lib/binding_web/test/language.test.ts index 9d5c49de..1748302b 100644 --- a/lib/binding_web/test/language.test.ts +++ b/lib/binding_web/test/language.test.ts @@ -12,7 +12,7 @@ describe('Language', () => { describe('.name, .version', () => { it('returns the name and version of the language', () => { expect(JavaScript.name).toBe('javascript'); - expect(JavaScript.version).toBe(15); + expect(JavaScript.abiVersion).toBe(15); }); }); diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 54021acf..493a37f9 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -42,6 +42,7 @@ typedef uint16_t TSStateId; typedef uint16_t TSSymbol; typedef uint16_t TSFieldId; typedef struct TSLanguage TSLanguage; +typedef struct TSLanguageMetadata TSLanguageMetadata; typedef struct TSParser TSParser; typedef struct TSTree TSTree; typedef struct TSQuery TSQuery; @@ -182,6 +183,20 @@ typedef struct TSQueryCursorOptions { bool (*progress_callback)(TSQueryCursorState *state); } TSQueryCursorOptions; +/** + * The metadata associated with a language. + * + * Currently, this metadata can be used to check the [Semantic Version](https://semver.org/) + * of the language. This version information should be used to signal if a given parser might + * be incompatible with existing queries when upgrading between major versions, or minor versions + * if it's in zerover. + */ +typedef struct TSLanguageMetadata { + uint8_t major_version; + uint8_t minor_version; + uint8_t patch_version; +} TSLanguageMetadata; + /********************/ /* Section - Parser */ /********************/ @@ -207,7 +222,7 @@ const TSLanguage *ts_parser_language(const TSParser *self); * Returns a boolean indicating whether or not the language was successfully * assigned. True means assignment succeeded. False means there was a version * mismatch: the language was generated with an incompatible version of the - * Tree-sitter CLI. Check the language's version using [`ts_language_version`] + * Tree-sitter CLI. Check the language's ABI version using [`ts_language_abi_version`] * and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and * [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants. */ @@ -1247,6 +1262,8 @@ const char *ts_language_symbol_name(const TSLanguage *self, TSSymbol symbol); TSSymbolType ts_language_symbol_type(const TSLanguage *self, TSSymbol symbol); /** + * @deprecated use [`ts_language_abi_version`] instead, this will be removed in 0.26. + * * Get the ABI version number for this language. This version number is used * to ensure that languages were generated by a compatible version of * Tree-sitter. @@ -1255,6 +1272,24 @@ TSSymbolType ts_language_symbol_type(const TSLanguage *self, TSSymbol symbol); */ uint32_t ts_language_version(const TSLanguage *self); +/** + * Get the ABI version number for this language. This version number is used + * to ensure that languages were generated by a compatible version of + * Tree-sitter. + * + * See also [`ts_parser_set_language`]. + */ +uint32_t ts_language_abi_version(const TSLanguage *self); + +/** + * Get the metadata for this language. This information is generated by the + * CLI, and relies on the language author providing the correct metadata in + * the language's `tree-sitter.json` file. + * + * See also [`TSMetadata`]. + */ +const TSLanguageMetadata *ts_language_metadata(const TSLanguage *self); + /** * Get the next parse state. Combine this with lookahead iterators to generate * completion suggestions or valid symbols in error nodes. Use diff --git a/lib/src/language.c b/lib/src/language.c index 93cc21b2..b341a670 100644 --- a/lib/src/language.c +++ b/lib/src/language.c @@ -25,7 +25,7 @@ uint32_t ts_language_state_count(const TSLanguage *self) { } const TSSymbol *ts_language_supertypes(const TSLanguage *self, uint32_t *length) { - if (self->version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) { + if (self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) { *length = self->supertype_count; return self->supertype_symbols; } else { @@ -39,7 +39,7 @@ const TSSymbol *ts_language_subtypes( TSSymbol supertype, uint32_t *length ) { - if (self->version < LANGUAGE_VERSION_WITH_RESERVED_WORDS || !ts_language_symbol_metadata(self, supertype).supertype) { + if (self->abi_version < LANGUAGE_VERSION_WITH_RESERVED_WORDS || !ts_language_symbol_metadata(self, supertype).supertype) { *length = 0; return NULL; } @@ -50,11 +50,19 @@ const TSSymbol *ts_language_subtypes( } uint32_t ts_language_version(const TSLanguage *self) { - return self->version; + return self->abi_version; +} + +uint32_t ts_language_abi_version(const TSLanguage *self) { + return self->abi_version; +} + +const TSLanguageMetadata *ts_language_metadata(const TSLanguage *self) { + return self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? &self->metadata : NULL; } const char *ts_language_name(const TSLanguage *self) { - return self->version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? self->name : NULL; + return self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? self->name : NULL; } uint32_t ts_language_field_count(const TSLanguage *self) { @@ -85,7 +93,7 @@ TSLexerMode ts_language_lex_mode_for_state( const TSLanguage *self, TSStateId state ) { - if (self->version < 15) { + if (self->abi_version < 15) { TSLexMode mode = ((const TSLexMode *)self->lex_modes)[state]; return (TSLexerMode) { .lex_state = mode.lex_state, diff --git a/lib/src/language.h b/lib/src/language.h index 003123a3..518c06bf 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -183,7 +183,7 @@ static inline bool ts_language_state_is_primary( const TSLanguage *self, TSStateId state ) { - if (self->version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) { + if (self->abi_version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) { return state == self->primary_state_ids[state]; } else { return true; diff --git a/lib/src/parser.c b/lib/src/parser.c index 3001e4cd..fb7d60d8 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -1977,8 +1977,8 @@ bool ts_parser_set_language(TSParser *self, const TSLanguage *language) { if (language) { if ( - language->version > TREE_SITTER_LANGUAGE_VERSION || - language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION + language->abi_version > TREE_SITTER_LANGUAGE_VERSION || + language->abi_version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION ) return false; if (ts_language_is_wasm(language)) { diff --git a/lib/src/parser.h b/lib/src/parser.h index a61358d1..cdbe64cc 100644 --- a/lib/src/parser.h +++ b/lib/src/parser.h @@ -18,6 +18,12 @@ typedef uint16_t TSStateId; typedef uint16_t TSSymbol; typedef uint16_t TSFieldId; typedef struct TSLanguage TSLanguage; +typedef struct TSLanguageMetadata TSLanguageMetadata; +typedef struct TSLanguageMetadata { + uint8_t major_version; + uint8_t minor_version; + uint8_t patch_version; +} TSLanguageMetadata; #endif typedef struct { @@ -100,7 +106,7 @@ typedef struct { } TSCharacterRange; struct TSLanguage { - uint32_t version; + uint32_t abi_version; uint32_t symbol_count; uint32_t alias_count; uint32_t token_count; @@ -143,6 +149,7 @@ struct TSLanguage { const TSSymbol *supertype_symbols; const TSMapSlice *supertype_map_slices; const TSSymbol *supertype_map_entries; + TSLanguageMetadata metadata; }; static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { diff --git a/lib/src/query.c b/lib/src/query.c index 20317d24..05eddef5 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -2433,7 +2433,7 @@ static TSQueryError ts_query__parse_pattern( // Get all the possible subtypes for the given supertype, // and check if the given subtype is valid. - if (self->language->version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) { + if (self->language->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) { uint32_t subtype_length; const TSSymbol *subtypes = ts_language_subtypes( self->language, @@ -2774,8 +2774,8 @@ TSQuery *ts_query_new( ) { if ( !language || - language->version > TREE_SITTER_LANGUAGE_VERSION || - language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION + language->abi_version > TREE_SITTER_LANGUAGE_VERSION || + language->abi_version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION ) { *error_type = TSQueryErrorLanguage; return NULL; diff --git a/lib/src/wasm_store.c b/lib/src/wasm_store.c index b5e0a5c7..d5fff257 100644 --- a/lib/src/wasm_store.c +++ b/lib/src/wasm_store.c @@ -117,7 +117,7 @@ typedef Array(char) StringData; // LanguageInWasmMemory - The memory layout of a `TSLanguage` when compiled to // wasm32. This is used to copy static language data out of the wasm memory. typedef struct { - uint32_t version; + uint32_t abi_version; uint32_t symbol_count; uint32_t alias_count; uint32_t token_count; @@ -160,6 +160,7 @@ typedef struct { int32_t supertype_symbols; int32_t supertype_map_slices; int32_t supertype_map_entries; + TSLanguageMetadata metadata; } LanguageInWasmMemory; // LexerInWasmMemory - The memory layout of a `TSLexer` when compiled to wasm32. @@ -1258,7 +1259,7 @@ const TSLanguage *ts_wasm_store_load_language( StringData field_name_buffer = array_new(); *language = (TSLanguage) { - .version = wasm_language.version, + .abi_version = wasm_language.abi_version, .symbol_count = wasm_language.symbol_count, .alias_count = wasm_language.alias_count, .token_count = wasm_language.token_count, @@ -1270,6 +1271,7 @@ const TSLanguage *ts_wasm_store_load_language( .supertype_count = wasm_language.supertype_count, .max_alias_sequence_length = wasm_language.max_alias_sequence_length, .keyword_capture_token = wasm_language.keyword_capture_token, + .metadata = wasm_language.metadata, .parse_table = copy( &memory[wasm_language.parse_table], wasm_language.large_state_count * wasm_language.symbol_count * sizeof(uint16_t) @@ -1396,14 +1398,14 @@ const TSLanguage *ts_wasm_store_load_language( ); } - if (language->version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) { + if (language->abi_version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) { language->primary_state_ids = copy( &memory[wasm_language.primary_state_ids], wasm_language.state_count * sizeof(TSStateId) ); } - if (language->version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) { + if (language->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) { language->name = copy_string(memory, wasm_language.name); language->reserved_words = copy( &memory[wasm_language.reserved_words],