feat: add the semantic version to TSLanguage, and expose an API for retrieving it

This commit is contained in:
Amaan Qureshi 2025-01-21 01:59:24 -05:00
parent f0222107b8
commit 8bb1448a6f
24 changed files with 371 additions and 77 deletions

View file

@ -180,6 +180,14 @@ pub struct TSQueryCursorOptions {
pub progress_callback:
::core::option::Option<unsafe extern "C" fn(state: *mut TSQueryCursorState) -> bool>,
}
#[doc = " The metadata associated with a language.\n\n Currently, this metadata can be used to check the [Semantic Version](https://semver.org/)\n of the language. This version information should be used to signal if a given parser might\n be incompatible with existing queries when upgrading between major versions, or minor versions\n if it's in zerover."]
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct TSLanguageMetadata {
pub major_version: u8,
pub minor_version: u8,
pub patch_version: u8,
}
extern "C" {
#[doc = " Create a new parser."]
pub fn ts_parser_new() -> *mut TSParser;
@ -193,7 +201,7 @@ extern "C" {
pub fn ts_parser_language(self_: *const TSParser) -> *const TSLanguage;
}
extern "C" {
#[doc = " Set the language that the parser should use for parsing.\n\n Returns a boolean indicating whether or not the language was successfully\n assigned. True means assignment succeeded. False means there was a version\n mismatch: the language was generated with an incompatible version of the\n Tree-sitter CLI. Check the language's version using [`ts_language_version`]\n and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and\n [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants."]
#[doc = " Set the language that the parser should use for parsing.\n\n Returns a boolean indicating whether or not the language was successfully\n assigned. True means assignment succeeded. False means there was a version\n mismatch: the language was generated with an incompatible version of the\n Tree-sitter CLI. Check the language's ABI version using [`ts_language_abi_version`]\n and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and\n [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants."]
pub fn ts_parser_set_language(self_: *mut TSParser, language: *const TSLanguage) -> bool;
}
extern "C" {
@ -807,9 +815,17 @@ extern "C" {
pub fn ts_language_symbol_type(self_: *const TSLanguage, symbol: TSSymbol) -> TSSymbolType;
}
extern "C" {
#[doc = " Get the ABI version number for this language. This version number is used\n to ensure that languages were generated by a compatible version of\n Tree-sitter.\n\n See also [`ts_parser_set_language`]."]
#[doc = " @deprecated use [`ts_language_abi_version`] instead, this will be removed in 0.26.\n\n Get the ABI version number for this language. This version number is used\n to ensure that languages were generated by a compatible version of\n Tree-sitter.\n\n See also [`ts_parser_set_language`]."]
pub fn ts_language_version(self_: *const TSLanguage) -> u32;
}
extern "C" {
#[doc = " Get the ABI version number for this language. This version number is used\n to ensure that languages were generated by a compatible version of\n Tree-sitter.\n\n See also [`ts_parser_set_language`]."]
pub fn ts_language_abi_version(self_: *const TSLanguage) -> u32;
}
extern "C" {
#[doc = " Get the metadata for this language. This information is generated by the\n CLI, and relies on the language author providing the correct metadata in\n the language's `tree-sitter.json` file.\n\n See also [`TSMetadata`]."]
pub fn ts_language_metadata(self_: *const TSLanguage) -> *const TSLanguageMetadata;
}
extern "C" {
#[doc = " Get the next parse state. Combine this with lookahead iterators to generate\n completion suggestions or valid symbols in error nodes. Use\n [`ts_node_grammar_symbol`] for valid symbols."]
pub fn ts_language_next_state(

View file

@ -64,6 +64,29 @@ pub struct Language(*const ffi::TSLanguage);
pub struct LanguageRef<'a>(*const ffi::TSLanguage, PhantomData<&'a ()>);
/// The metadata associated with a language.
///
/// Currently, this metadata can be used to check the [Semantic Version](https://semver.org/)
/// of the language. This version information should be used to signal if a given parser might
/// be incompatible with existing queries when upgrading between major versions, or minor versions
/// if it's in zerover.
#[doc(alias = "TSLanguageMetadata")]
pub struct LanguageMetadata {
pub major_version: u8,
pub minor_version: u8,
pub patch_version: u8,
}
impl From<ffi::TSLanguageMetadata> for LanguageMetadata {
fn from(val: ffi::TSLanguageMetadata) -> Self {
Self {
major_version: val.major_version,
minor_version: val.minor_version,
patch_version: val.patch_version,
}
}
}
/// A tree that represents the syntactic structure of a source code file.
#[doc(alias = "TSTree")]
pub struct Tree(NonNull<ffi::TSTree>);
@ -394,7 +417,7 @@ impl Language {
}
/// Get the name of this language. This returns `None` in older parsers.
#[doc(alias = "ts_language_version")]
#[doc(alias = "ts_language_name")]
#[must_use]
pub fn name(&self) -> Option<&'static str> {
let ptr = unsafe { ffi::ts_language_name(self.0) };
@ -404,11 +427,34 @@ impl Language {
/// Get the ABI version number that indicates which version of the
/// Tree-sitter CLI that was used to generate this [`Language`].
#[doc(alias = "ts_language_version")]
#[deprecated(since = "0.25.0", note = "Use abi_version instead")]
#[must_use]
pub fn version(&self) -> usize {
unsafe { ffi::ts_language_version(self.0) as usize }
}
/// Get the ABI version number that indicates which version of the
/// Tree-sitter CLI that was used to generate this [`Language`].
#[doc(alias = "ts_language_abi_version")]
#[must_use]
pub fn abi_version(&self) -> usize {
unsafe { ffi::ts_language_abi_version(self.0) as usize }
}
/// Get the metadata for this language. This information is generated by the
/// CLI, and relies on the language author providing the correct metadata in
/// the language's `tree-sitter.json` file.
///
/// See also [`LanguageMetadata`].
#[doc(alias = "ts_language_metadata")]
#[must_use]
pub fn metadata(&self) -> Option<LanguageMetadata> {
unsafe {
let ptr = ffi::ts_language_metadata(self.0);
(!ptr.is_null()).then(|| (*ptr).into())
}
}
/// Get the number of distinct node types in this language.
#[doc(alias = "ts_language_symbol_count")]
#[must_use]
@ -613,7 +659,7 @@ impl Parser {
/// [`LANGUAGE_VERSION`] and [`MIN_COMPATIBLE_LANGUAGE_VERSION`] constants.
#[doc(alias = "ts_parser_set_language")]
pub fn set_language(&mut self, language: &Language) -> Result<(), LanguageError> {
let version = language.version();
let version = language.abi_version();
if (MIN_COMPATIBLE_LANGUAGE_VERSION..=LANGUAGE_VERSION).contains(&version) {
unsafe {
ffi::ts_parser_set_language(self.0.as_ptr(), language.0);
@ -2360,7 +2406,7 @@ impl Query {
column: 0,
offset: 0,
message: LanguageError {
version: language.version(),
version: language.abi_version(),
}
.to_string(),
kind: QueryErrorKind::Language,

View file

@ -12,6 +12,8 @@
"ts_language_symbol_type",
"ts_language_name",
"ts_language_version",
"ts_language_abi_version",
"ts_language_metadata",
"ts_language_next_state",
"ts_node_field_name_for_child_wasm",
"ts_node_field_name_for_named_child_wasm",

View file

@ -110,6 +110,17 @@ static TSInputEdit unmarshal_edit() {
return edit;
}
static void marshal_language_metadata(const TSLanguageMetadata *metadata) {
if (metadata == NULL) {
TRANSFER_BUFFER[0] = 0;
return;
}
TRANSFER_BUFFER[0] = (const void*)3;
TRANSFER_BUFFER[1] = (const void*)(uint32_t)metadata->major_version;
TRANSFER_BUFFER[2] = (const void*)(uint32_t)metadata->minor_version;
TRANSFER_BUFFER[3] = (const void*)(uint32_t)metadata->patch_version;
}
/********************/
/* Section - Parser */
/********************/
@ -242,6 +253,11 @@ int ts_language_type_is_visible_wasm(const TSLanguage *self, TSSymbol typeId) {
return symbolType <= TSSymbolTypeAnonymous;
}
void ts_language_metadata_wasm(const TSLanguage *self) {
const TSLanguageMetadata *metadata = ts_language_metadata(self);
marshal_language_metadata(metadata);
}
void ts_language_supertypes_wasm(const TSLanguage *self) {
uint32_t length;
const TSSymbol *supertypes = ts_language_supertypes(self, &length);

View file

@ -72,6 +72,8 @@ interface WasmModule {
_ts_language_symbol_count(_0: number): number;
_ts_language_state_count(_0: number): number;
_ts_language_version(_0: number): number;
_ts_language_abi_version(_0: number): number;
_ts_language_metadata(_0: number): number;
_ts_language_name(_0: number): number;
_ts_language_field_count(_0: number): number;
_ts_language_next_state(_0: number, _1: number, _2: number): number;

View file

@ -1,10 +1,17 @@
import { C, INTERNAL, Internal, assertInternal, SIZE_OF_INT, SIZE_OF_SHORT } from './constants';
import { LookaheadIterator } from './lookahead_iterator';
import { unmarshalLanguageMetadata } from './marshal';
import { TRANSFER_BUFFER } from './parser';
import { Query } from './query';
const LANGUAGE_FUNCTION_REGEX = /^tree_sitter_\w+$/;
export class LanguageMetadata {
readonly major_version: number;
readonly minor_version: number;
readonly patch_version: number;
}
/**
* An opaque object that defines how to parse a particular language.
* The code for each `Language` is generated by the Tree-sitter CLI.
@ -46,7 +53,7 @@ export class Language {
}
}
/**
* Gets the name of the language.
*/
@ -57,11 +64,33 @@ export class Language {
}
/**
* @deprecated since version 0.25.0, use {@link Language#abiVersion} instead
* Gets the version of the language.
*/
get version(): number {
return C._ts_language_version(this[0]);
}
/**
* Gets the ABI version of the language.
*/
get abiVersion(): number {
return C._ts_language_abi_version(this[0]);
}
/**
* Get the metadata for this language. This information is generated by the
* CLI, and relies on the language author providing the correct metadata in
* the language's `tree-sitter.json` file.
*/
get metadata(): LanguageMetadata | null {
C._ts_language_metadata(this[0]);
const length = C.getValue(TRANSFER_BUFFER, 'i32');
const address = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
if (length === 0) return null;
return unmarshalLanguageMetadata(address);
}
/**
* Gets the number of fields in the language.
*/

View file

@ -5,6 +5,7 @@ import { Tree } from "./tree";
import { Query, QueryCapture, type QueryMatch } from "./query";
import { TreeCursor } from "./tree_cursor";
import { TRANSFER_BUFFER } from "./parser";
import { LanguageMetadata } from "./language";
/**
* @internal
@ -161,3 +162,16 @@ export function marshalEdit(edit: Edit, address = TRANSFER_BUFFER) {
C.setValue(address, edit.oldEndIndex, 'i32'); address += SIZE_OF_INT;
C.setValue(address, edit.newEndIndex, 'i32'); address += SIZE_OF_INT;
}
/**
* @internal
*
* Unmarshals a {@link LanguageMetadata} from the transfer buffer.
*/
export function unmarshalLanguageMetadata(address: number): LanguageMetadata {
const result = {} as LanguageMetadata;
result.major_version = C.getValue(address, 'i32'); address += SIZE_OF_INT;
result.minor_version = C.getValue(address, 'i32'); address += SIZE_OF_INT;
result.field_count = C.getValue(address, 'i32');
return result;
}

View file

@ -12,7 +12,7 @@ describe('Language', () => {
describe('.name, .version', () => {
it('returns the name and version of the language', () => {
expect(JavaScript.name).toBe('javascript');
expect(JavaScript.version).toBe(15);
expect(JavaScript.abiVersion).toBe(15);
});
});

View file

@ -42,6 +42,7 @@ typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
typedef struct TSLanguageMetadata TSLanguageMetadata;
typedef struct TSParser TSParser;
typedef struct TSTree TSTree;
typedef struct TSQuery TSQuery;
@ -182,6 +183,20 @@ typedef struct TSQueryCursorOptions {
bool (*progress_callback)(TSQueryCursorState *state);
} TSQueryCursorOptions;
/**
* The metadata associated with a language.
*
* Currently, this metadata can be used to check the [Semantic Version](https://semver.org/)
* of the language. This version information should be used to signal if a given parser might
* be incompatible with existing queries when upgrading between major versions, or minor versions
* if it's in zerover.
*/
typedef struct TSLanguageMetadata {
uint8_t major_version;
uint8_t minor_version;
uint8_t patch_version;
} TSLanguageMetadata;
/********************/
/* Section - Parser */
/********************/
@ -207,7 +222,7 @@ const TSLanguage *ts_parser_language(const TSParser *self);
* Returns a boolean indicating whether or not the language was successfully
* assigned. True means assignment succeeded. False means there was a version
* mismatch: the language was generated with an incompatible version of the
* Tree-sitter CLI. Check the language's version using [`ts_language_version`]
* Tree-sitter CLI. Check the language's ABI version using [`ts_language_abi_version`]
* and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and
* [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants.
*/
@ -1247,6 +1262,8 @@ const char *ts_language_symbol_name(const TSLanguage *self, TSSymbol symbol);
TSSymbolType ts_language_symbol_type(const TSLanguage *self, TSSymbol symbol);
/**
* @deprecated use [`ts_language_abi_version`] instead, this will be removed in 0.26.
*
* Get the ABI version number for this language. This version number is used
* to ensure that languages were generated by a compatible version of
* Tree-sitter.
@ -1255,6 +1272,24 @@ TSSymbolType ts_language_symbol_type(const TSLanguage *self, TSSymbol symbol);
*/
uint32_t ts_language_version(const TSLanguage *self);
/**
* Get the ABI version number for this language. This version number is used
* to ensure that languages were generated by a compatible version of
* Tree-sitter.
*
* See also [`ts_parser_set_language`].
*/
uint32_t ts_language_abi_version(const TSLanguage *self);
/**
* Get the metadata for this language. This information is generated by the
* CLI, and relies on the language author providing the correct metadata in
* the language's `tree-sitter.json` file.
*
* See also [`TSMetadata`].
*/
const TSLanguageMetadata *ts_language_metadata(const TSLanguage *self);
/**
* Get the next parse state. Combine this with lookahead iterators to generate
* completion suggestions or valid symbols in error nodes. Use

View file

@ -25,7 +25,7 @@ uint32_t ts_language_state_count(const TSLanguage *self) {
}
const TSSymbol *ts_language_supertypes(const TSLanguage *self, uint32_t *length) {
if (self->version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) {
if (self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) {
*length = self->supertype_count;
return self->supertype_symbols;
} else {
@ -39,7 +39,7 @@ const TSSymbol *ts_language_subtypes(
TSSymbol supertype,
uint32_t *length
) {
if (self->version < LANGUAGE_VERSION_WITH_RESERVED_WORDS || !ts_language_symbol_metadata(self, supertype).supertype) {
if (self->abi_version < LANGUAGE_VERSION_WITH_RESERVED_WORDS || !ts_language_symbol_metadata(self, supertype).supertype) {
*length = 0;
return NULL;
}
@ -50,11 +50,19 @@ const TSSymbol *ts_language_subtypes(
}
uint32_t ts_language_version(const TSLanguage *self) {
return self->version;
return self->abi_version;
}
uint32_t ts_language_abi_version(const TSLanguage *self) {
return self->abi_version;
}
const TSLanguageMetadata *ts_language_metadata(const TSLanguage *self) {
return self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? &self->metadata : NULL;
}
const char *ts_language_name(const TSLanguage *self) {
return self->version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? self->name : NULL;
return self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? self->name : NULL;
}
uint32_t ts_language_field_count(const TSLanguage *self) {
@ -85,7 +93,7 @@ TSLexerMode ts_language_lex_mode_for_state(
const TSLanguage *self,
TSStateId state
) {
if (self->version < 15) {
if (self->abi_version < 15) {
TSLexMode mode = ((const TSLexMode *)self->lex_modes)[state];
return (TSLexerMode) {
.lex_state = mode.lex_state,

View file

@ -183,7 +183,7 @@ static inline bool ts_language_state_is_primary(
const TSLanguage *self,
TSStateId state
) {
if (self->version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) {
if (self->abi_version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) {
return state == self->primary_state_ids[state];
} else {
return true;

View file

@ -1977,8 +1977,8 @@ bool ts_parser_set_language(TSParser *self, const TSLanguage *language) {
if (language) {
if (
language->version > TREE_SITTER_LANGUAGE_VERSION ||
language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
language->abi_version > TREE_SITTER_LANGUAGE_VERSION ||
language->abi_version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
) return false;
if (ts_language_is_wasm(language)) {

View file

@ -18,6 +18,12 @@ typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
typedef struct TSLanguageMetadata TSLanguageMetadata;
typedef struct TSLanguageMetadata {
uint8_t major_version;
uint8_t minor_version;
uint8_t patch_version;
} TSLanguageMetadata;
#endif
typedef struct {
@ -100,7 +106,7 @@ typedef struct {
} TSCharacterRange;
struct TSLanguage {
uint32_t version;
uint32_t abi_version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
@ -143,6 +149,7 @@ struct TSLanguage {
const TSSymbol *supertype_symbols;
const TSMapSlice *supertype_map_slices;
const TSSymbol *supertype_map_entries;
TSLanguageMetadata metadata;
};
static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {

View file

@ -2433,7 +2433,7 @@ static TSQueryError ts_query__parse_pattern(
// Get all the possible subtypes for the given supertype,
// and check if the given subtype is valid.
if (self->language->version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) {
if (self->language->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) {
uint32_t subtype_length;
const TSSymbol *subtypes = ts_language_subtypes(
self->language,
@ -2774,8 +2774,8 @@ TSQuery *ts_query_new(
) {
if (
!language ||
language->version > TREE_SITTER_LANGUAGE_VERSION ||
language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
language->abi_version > TREE_SITTER_LANGUAGE_VERSION ||
language->abi_version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
) {
*error_type = TSQueryErrorLanguage;
return NULL;

View file

@ -117,7 +117,7 @@ typedef Array(char) StringData;
// LanguageInWasmMemory - The memory layout of a `TSLanguage` when compiled to
// wasm32. This is used to copy static language data out of the wasm memory.
typedef struct {
uint32_t version;
uint32_t abi_version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
@ -160,6 +160,7 @@ typedef struct {
int32_t supertype_symbols;
int32_t supertype_map_slices;
int32_t supertype_map_entries;
TSLanguageMetadata metadata;
} LanguageInWasmMemory;
// LexerInWasmMemory - The memory layout of a `TSLexer` when compiled to wasm32.
@ -1258,7 +1259,7 @@ const TSLanguage *ts_wasm_store_load_language(
StringData field_name_buffer = array_new();
*language = (TSLanguage) {
.version = wasm_language.version,
.abi_version = wasm_language.abi_version,
.symbol_count = wasm_language.symbol_count,
.alias_count = wasm_language.alias_count,
.token_count = wasm_language.token_count,
@ -1270,6 +1271,7 @@ const TSLanguage *ts_wasm_store_load_language(
.supertype_count = wasm_language.supertype_count,
.max_alias_sequence_length = wasm_language.max_alias_sequence_length,
.keyword_capture_token = wasm_language.keyword_capture_token,
.metadata = wasm_language.metadata,
.parse_table = copy(
&memory[wasm_language.parse_table],
wasm_language.large_state_count * wasm_language.symbol_count * sizeof(uint16_t)
@ -1396,14 +1398,14 @@ const TSLanguage *ts_wasm_store_load_language(
);
}
if (language->version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) {
if (language->abi_version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) {
language->primary_state_ids = copy(
&memory[wasm_language.primary_state_ids],
wasm_language.state_count * sizeof(TSStateId)
);
}
if (language->version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) {
if (language->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) {
language->name = copy_string(memory, wasm_language.name);
language->reserved_words = copy(
&memory[wasm_language.reserved_words],