Compare commits

...
Sign in to create a new pull request.

2 commits

16 changed files with 1895 additions and 2160 deletions

View file

@ -47,6 +47,7 @@ serde_derive.workspace = true
serde_json.workspace = true
smallbitvec.workspace = true
tiny_http.workspace = true
toml.workspace = true
walkdir.workspace = true
wasmparser.workspace = true
webbrowser.workspace = true

View file

@ -11,20 +11,23 @@ mod render;
mod rules;
mod tables;
use std::io::Write;
use std::path::Path;
use std::process::{Command, Stdio};
use std::{env, fs};
use anyhow::{anyhow, Context, Result};
use lazy_static::lazy_static;
use regex::{Regex, RegexBuilder};
use semver::Version;
use serde::Deserialize;
use self::build_tables::build_tables;
use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
use self::parse_grammar::parse_grammar;
use self::prepare_grammar::prepare_grammar;
use self::render::render_c_code;
use self::rules::AliasMap;
use anyhow::{anyhow, Context, Result};
use lazy_static::lazy_static;
use regex::{Regex, RegexBuilder};
use semver::Version;
use std::io::Write;
use std::path::Path;
use std::process::{Command, Stdio};
use std::{env, fs};
lazy_static! {
static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*")
@ -73,6 +76,19 @@ pub fn generate_parser_in_directory(
prepare_grammar(&input_grammar)?;
let language_name = input_grammar.name;
let language_semver = read_package_json_version()?;
let rust_binding_version = read_rust_binding_version()?;
if language_semver != rust_binding_version {
anyhow::bail!(
"Error:
The version of your language grammar in `package.json` is `{language_semver}`, but the version of your language grammar in `Cargo.toml` is `{rust_binding_version}`.
These versions must match. Please adjust one of these files to match the other, and then try running `tree-sitter generate` again.
Consider delegating this process to the `release` subcommand, which will handle git tags, GitHub releases, and publishing to crates.io, npmjs, and PyPI for you.
Read more here: https://tree-sitter.github.io/tree-sitter/creating-parsers#releasing-a-new-grammar-version",
);
}
// Generate the parser and related files.
let GeneratedParser {
c_code,
@ -85,6 +101,11 @@ pub fn generate_parser_in_directory(
simple_aliases,
abi_version,
report_symbol_name,
(
language_semver.major as u8,
language_semver.minor as u8,
language_semver.patch as u8,
),
)?;
write_file(&src_path.join("parser.c"), c_code)?;
@ -111,6 +132,7 @@ pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String
simple_aliases,
tree_sitter::LANGUAGE_VERSION,
None,
(0, 0, 0),
)?;
Ok((input_grammar.name, parser.c_code))
}
@ -123,6 +145,7 @@ fn generate_parser_for_grammar_with_opts(
simple_aliases: AliasMap,
abi_version: usize,
report_symbol_name: Option<&str>,
semantic_version: (u8, u8, u8),
) -> Result<GeneratedParser> {
let variable_info =
node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?;
@ -150,6 +173,7 @@ fn generate_parser_for_grammar_with_opts(
lexical_grammar,
simple_aliases,
abi_version,
semantic_version,
);
Ok(GeneratedParser {
c_code,
@ -157,6 +181,30 @@ fn generate_parser_for_grammar_with_opts(
})
}
fn read_package_json_version() -> Result<Version> {
#[derive(Deserialize)]
struct PackageJSON {
version: String,
}
let path = "package.json";
let text = fs::read_to_string(path).with_context(|| format!("Failed to read {path:?}"))?;
let package_json: PackageJSON =
serde_json::from_str(&text).with_context(|| format!("Failed to parse {path:?} as JSON"))?;
Ok(Version::parse(&package_json.version)?)
}
fn read_rust_binding_version() -> Result<Version> {
let path = "Cargo.toml";
let text = fs::read_to_string(path)?;
let cargo_toml = toml::from_str::<toml::Value>(text.as_ref())?;
Ok(Version::parse(
cargo_toml["package"]["version"].as_str().unwrap(),
)?)
}
pub fn load_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result<String> {
if grammar_path.is_dir() {
return Err(anyhow!(

View file

@ -17,9 +17,9 @@ use std::{
const LARGE_CHARACTER_RANGE_COUNT: usize = 8;
const SMALL_STATE_THRESHOLD: usize = 64;
const ABI_VERSION_MIN: usize = 13;
const ABI_VERSION_MIN: usize = 15;
const ABI_VERSION_MAX: usize = tree_sitter::LANGUAGE_VERSION;
const ABI_VERSION_WITH_PRIMARY_STATES: usize = 14;
const ABI_VERSION_WITH_PRIMARY_STATES: usize = 15;
macro_rules! add {
($this: tt, $($arg: tt)*) => {{
@ -74,9 +74,10 @@ struct Generator {
unique_aliases: Vec<Alias>,
symbol_map: HashMap<Symbol, Symbol>,
field_names: Vec<String>,
#[allow(unused)]
abi_version: usize,
major_version: u8,
minor_version: u8,
patch_version: u8,
}
struct TransitionSummary {
@ -984,7 +985,7 @@ impl Generator {
if action.in_main_token {
add!(self, "ADVANCE({});", action.state);
} else {
add!(self, "SKIP({})", action.state);
add!(self, "SKIP({});", action.state);
}
}
@ -1342,7 +1343,7 @@ impl Generator {
indent!(self);
add_line!(self, "static const TSLanguage language = {{");
indent!(self);
add_line!(self, ".version = LANGUAGE_VERSION,");
add_line!(self, ".abi_version = LANGUAGE_VERSION,");
// Quantities
add_line!(self, ".symbol_count = SYMBOL_COUNT,");
@ -1406,9 +1407,11 @@ impl Generator {
add_line!(self, "}},");
}
if self.abi_version >= ABI_VERSION_WITH_PRIMARY_STATES {
add_line!(self, ".primary_state_ids = ts_primary_state_ids,");
}
add_line!(self, ".primary_state_ids = ts_primary_state_ids,");
add_line!(self, ".major_version = {},", self.major_version);
add_line!(self, ".minor_version = {},", self.minor_version);
add_line!(self, ".patch_version = {},", self.patch_version);
dedent!(self);
add_line!(self, "}};");
@ -1678,6 +1681,7 @@ pub fn render_c_code(
lexical_grammar: LexicalGrammar,
default_aliases: AliasMap,
abi_version: usize,
semantic_version: (u8, u8, u8),
) -> String {
assert!(
(ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version),
@ -1703,6 +1707,9 @@ pub fn render_c_code(
unique_aliases: Vec::new(),
field_names: Vec::new(),
abi_version,
major_version: semantic_version.0,
minor_version: semantic_version.1,
patch_version: semantic_version.2,
}
.generate()
}

View file

@ -20,7 +20,7 @@ use tree_sitter_tags::TagsContext;
const BUILD_VERSION: &str = env!("CARGO_PKG_VERSION");
const BUILD_SHA: Option<&'static str> = option_env!("BUILD_SHA");
const DEFAULT_GENERATE_ABI_VERSION: usize = 14;
const DEFAULT_GENERATE_ABI_VERSION: usize = 15;
#[derive(Subcommand)]
#[command(about="Generates and tests parsers", author=crate_authors!("\n"), styles=get_styles())]

View file

@ -1,7 +1,7 @@
/* automatically generated by rust-bindgen 0.69.4 */
pub const TREE_SITTER_LANGUAGE_VERSION: u32 = 14;
pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: u32 = 13;
pub const TREE_SITTER_LANGUAGE_VERSION: u32 = 15;
pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: u32 = 15;
pub type TSStateId = u16;
pub type TSSymbol = u16;
pub type TSFieldId = u16;
@ -721,7 +721,11 @@ extern "C" {
}
extern "C" {
#[doc = " Get the ABI version number for this language. This version number is used\n to ensure that languages were generated by a compatible version of\n Tree-sitter.\n\n See also [`ts_parser_set_language`]."]
pub fn ts_language_version(self_: *const TSLanguage) -> u32;
pub fn ts_language_abi_version(self_: *const TSLanguage) -> u32;
}
extern "C" {
#[doc = " Get the [Semantic Version](https://semver.org/) for this language. This\n version number is used to signal if a given parser might be incompatible\n with existing queries when upgraded between major versions.\n\n The Semantic Version is encoded as an unsigned 32-bit integer, where the\n major, minor, and patch version numbers are encoded in the 24 least significant\n bits of the integer, using 8 bits for each number.\n\n Layout of the returned integer:\n\n MSB LSB\n +--------+--------+--------+--------+\n |00000000| Major | Minor | Patch |\n +--------+--------+--------+--------+\n 31 24 23 16 15 8 7 0\n"]
pub fn ts_language_semantic_version(self_: *const TSLanguage) -> u32;
}
extern "C" {
#[doc = " Get the next parse state. Combine this with lookahead iterators to generate\n completion suggestions or valid symbols in error nodes. Use\n [`ts_node_grammar_symbol`] for valid symbols."]

View file

@ -282,10 +282,31 @@ pub struct LossyUtf8<'a> {
impl Language {
/// Get the ABI version number that indicates which version of the Tree-sitter CLI
/// that was used to generate this [`Language`].
#[doc(alias = "ts_language_version")]
#[doc(alias = "ts_language_abi_version")]
#[must_use]
pub fn version(&self) -> usize {
unsafe { ffi::ts_language_version(self.0) as usize }
pub fn abi_version(&self) -> usize {
unsafe { ffi::ts_language_abi_version(self.0) as usize }
}
/// Get the [Semantic Version](https://semver.org/) for this language. This
/// version number is used to signal if a given parser might be incompatible
/// with existing queries when upgraded between major versions.
///
/// The Semantic Version is encoded as an unsigned 32-bit integer, where the
/// major, minor, and patch version numbers are encoded in the 24 least significant
/// bits of the integer, using 8 bits for each number.
///
/// Layout of the returned integer:
///
/// MSB LSB
/// +--------+--------+--------+--------+
/// |00000000| Major | Minor | Patch |
/// +--------+--------+--------+--------+
/// 31 24 23 16 15 8 7 0
#[doc(alias = "ts_language_semantic_version")]
#[must_use]
pub fn semantic_version(&self) -> usize {
unsafe { ffi::ts_language_semantic_version(self.0) as usize }
}
/// Get the number of distinct node types in this language.
@ -448,7 +469,7 @@ impl Parser {
/// [`MIN_COMPATIBLE_LANGUAGE_VERSION`](MIN_COMPATIBLE_LANGUAGE_VERSION) constants.
#[doc(alias = "ts_parser_set_language")]
pub fn set_language(&mut self, language: &Language) -> Result<(), LanguageError> {
let version = language.version();
let version = language.abi_version();
if (MIN_COMPATIBLE_LANGUAGE_VERSION..=LANGUAGE_VERSION).contains(&version) {
unsafe {
ffi::ts_parser_set_language(self.0.as_ptr(), language.0);
@ -1728,7 +1749,7 @@ impl Query {
column: 0,
offset: 0,
message: LanguageError {
version: language.version(),
version: language.abi_version(),
}
.to_string(),
kind: QueryErrorKind::Language,

View file

@ -48,10 +48,10 @@ class ParserImpl {
language = null;
} else if (language.constructor === Language) {
address = language[0];
const version = C._ts_language_version(address);
const version = C._ts_language_abi_version(address);
if (version < MIN_COMPATIBLE_VERSION || VERSION < version) {
throw new Error(
`Incompatible language version ${version}. ` +
`Incompatible language abi version ${version}. ` +
`Compatibility range ${MIN_COMPATIBLE_VERSION} through ${VERSION}.`,
);
}
@ -680,8 +680,12 @@ class Language {
}
}
get version() {
return C._ts_language_version(this[0]);
get abiVersion() {
return C._ts_language_abi_version(this[0]);
}
get semanticVersion() {
return C._ts_language_semantic_version(this[0]);
}
get fieldCount() {

View file

@ -9,9 +9,9 @@
extern "C" {
#endif
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
/****************************/
/* Section - ABI Versioning */
@ -24,13 +24,13 @@ extern "C" {
* The Tree-sitter library is generally backwards-compatible with languages
* generated using older CLI versions, but is not forwards-compatible.
*/
#define TREE_SITTER_LANGUAGE_VERSION 14
#define TREE_SITTER_LANGUAGE_VERSION 15
/**
* The earliest ABI version that is supported by the current version of the
* library.
*/
#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 13
#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 15
/*******************/
/* Section - Types */
@ -1079,7 +1079,27 @@ TSSymbolType ts_language_symbol_type(const TSLanguage *self, TSSymbol symbol);
*
* See also [`ts_parser_set_language`].
*/
uint32_t ts_language_version(const TSLanguage *self);
uint32_t ts_language_abi_version(const TSLanguage *self);
/**
* Get the [Semantic Version](https://semver.org/) for this language. This
* version number is used to signal if a given parser might be incompatible
* with existing queries when upgraded between major versions.
*
* The Semantic Version is encoded as an unsigned 32-bit integer, where the
* major, minor, and patch version numbers are encoded in the 24 least significant
* bits of the integer, using 8 bits for each number.
*
* Layout of the returned integer:
*
* MSB LSB
* +--------+--------+--------+--------+
* |00000000| Major | Minor | Patch |
* +--------+--------+--------+--------+
* 31 24 23 16 15 8 7 0
*
*/
uint32_t ts_language_semantic_version(const TSLanguage *self);
/**
* Get the next parse state. Combine this with lookahead iterators to generate

View file

@ -1,6 +1,12 @@
#include "alloc.h"
#include <stdlib.h>
#ifdef _WIN32
#define PUBLIC __declspec(dllexport)
#else
#define PUBLIC __attribute__((visibility("default")))
#endif
static void *ts_malloc_default(size_t size) {
void *result = malloc(size);
if (size > 0 && !result) {
@ -29,10 +35,10 @@ static void *ts_realloc_default(void *buffer, size_t size) {
}
// Allow clients to override allocation functions dynamically
void *(*ts_current_malloc)(size_t) = ts_malloc_default;
void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default;
void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default;
void (*ts_current_free)(void *) = free;
PUBLIC void *(*ts_current_malloc)(size_t) = ts_malloc_default;
PUBLIC void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default;
PUBLIC void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default;
PUBLIC void (*ts_current_free)(void *) = free;
void ts_set_allocator(
void *(*new_malloc)(size_t size),

View file

@ -24,8 +24,12 @@ uint32_t ts_language_state_count(const TSLanguage *self) {
return self->state_count;
}
uint32_t ts_language_version(const TSLanguage *self) {
return self->version;
uint32_t ts_language_abi_version(const TSLanguage *self) {
return self->abi_version;
}
uint32_t ts_language_semantic_version(const TSLanguage *self) {
return self->major_version << 16 | self->minor_version << 8 | self->patch_version;
}
uint32_t ts_language_field_count(const TSLanguage *self) {

View file

@ -186,7 +186,7 @@ static inline bool ts_language_state_is_primary(
const TSLanguage *self,
TSStateId state
) {
if (self->version >= 14) {
if (self->abi_version >= 14) {
return state == self->primary_state_ids[state];
} else {
return true;

View file

@ -1875,8 +1875,8 @@ bool ts_parser_set_language(TSParser *self, const TSLanguage *language) {
if (language) {
if (
language->version > TREE_SITTER_LANGUAGE_VERSION ||
language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
language->abi_version > TREE_SITTER_LANGUAGE_VERSION ||
language->abi_version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
) return false;
if (ts_language_is_wasm(language)) {

View file

@ -20,6 +20,29 @@ typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
extern void *(*ts_current_malloc)(size_t);
extern void *(*ts_current_calloc)(size_t, size_t);
extern void *(*ts_current_realloc)(void *, size_t);
extern void (*ts_current_free)(void *);
// Allow consumers to use allocation functions
#ifndef ts_malloc
#define ts_malloc ts_current_malloc
#define malloc ts_current_malloc
#endif
#ifndef ts_calloc
#define ts_calloc ts_current_calloc
#define calloc ts_current_calloc
#endif
#ifndef ts_realloc
#define ts_realloc ts_current_realloc
#define realloc ts_current_realloc
#endif
#ifndef ts_free
#define ts_free ts_current_free
#define free ts_current_free
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
@ -87,7 +110,7 @@ typedef union {
} TSParseActionEntry;
struct TSLanguage {
uint32_t version;
uint32_t abi_version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
@ -123,6 +146,9 @@ struct TSLanguage {
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
uint32_t major_version;
uint32_t minor_version;
uint32_t patch_version;
};
/*

View file

@ -2677,8 +2677,8 @@ TSQuery *ts_query_new(
) {
if (
!language ||
language->version > TREE_SITTER_LANGUAGE_VERSION ||
language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
language->abi_version > TREE_SITTER_LANGUAGE_VERSION ||
language->abi_version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
) {
*error_type = TSQueryErrorLanguage;
return NULL;

View file

@ -1188,7 +1188,7 @@ const TSLanguage *ts_wasm_store_load_language(
);
}
if (language->version >= 14) {
if (language->abi_version >= 14) {
language->primary_state_ids = copy(
&memory[wasm_language.primary_state_ids],
wasm_language.state_count * sizeof(TSStateId)

File diff suppressed because it is too large Load diff