feat!: introduce parser introspection via the repo's Semantic Version baked in

This commit is contained in:
Amaan Qureshi 2024-02-12 02:27:17 -05:00
parent b66b1a7a92
commit 2c192fa038
No known key found for this signature in database
GPG key ID: E67890ADC4227273
15 changed files with 1862 additions and 2156 deletions

View file

@ -11,20 +11,23 @@ mod render;
mod rules;
mod tables;
use std::io::Write;
use std::path::Path;
use std::process::{Command, Stdio};
use std::{env, fs};
use anyhow::{anyhow, Context, Result};
use lazy_static::lazy_static;
use regex::{Regex, RegexBuilder};
use semver::Version;
use serde::Deserialize;
use self::build_tables::build_tables;
use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
use self::parse_grammar::parse_grammar;
use self::prepare_grammar::prepare_grammar;
use self::render::render_c_code;
use self::rules::AliasMap;
use anyhow::{anyhow, Context, Result};
use lazy_static::lazy_static;
use regex::{Regex, RegexBuilder};
use semver::Version;
use std::io::Write;
use std::path::Path;
use std::process::{Command, Stdio};
use std::{env, fs};
lazy_static! {
static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*")
@ -73,6 +76,19 @@ pub fn generate_parser_in_directory(
prepare_grammar(&input_grammar)?;
let language_name = input_grammar.name;
let language_semver = read_package_json_version()?;
let rust_binding_version = read_rust_binding_version()?;
if language_semver != rust_binding_version {
anyhow::bail!(
"Error:
The version of your language grammar in `package.json` is `{language_semver}`, but the version of your language grammar in `Cargo.toml` is `{rust_binding_version}`.
These versions must match. Please adjust one of these files to match the other, and then try running `tree-sitter generate` again.
Consider delegating this process to the `release` subcommand, which will handle git tags, GitHub releases, and publishing to crates.io, npmjs, and PyPI for you.
Read more here: https://tree-sitter.github.io/tree-sitter/creating-parsers#releasing-a-new-grammar-version",
);
}
// Generate the parser and related files.
let GeneratedParser {
c_code,
@ -85,6 +101,11 @@ pub fn generate_parser_in_directory(
simple_aliases,
abi_version,
report_symbol_name,
(
language_semver.major as u8,
language_semver.minor as u8,
language_semver.patch as u8,
),
)?;
write_file(&src_path.join("parser.c"), c_code)?;
@ -111,6 +132,7 @@ pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String
simple_aliases,
tree_sitter::LANGUAGE_VERSION,
None,
(0, 0, 0),
)?;
Ok((input_grammar.name, parser.c_code))
}
@ -123,6 +145,7 @@ fn generate_parser_for_grammar_with_opts(
simple_aliases: AliasMap,
abi_version: usize,
report_symbol_name: Option<&str>,
semantic_version: (u8, u8, u8),
) -> Result<GeneratedParser> {
let variable_info =
node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?;
@ -150,6 +173,7 @@ fn generate_parser_for_grammar_with_opts(
lexical_grammar,
simple_aliases,
abi_version,
semantic_version,
);
Ok(GeneratedParser {
c_code,
@ -157,6 +181,30 @@ fn generate_parser_for_grammar_with_opts(
})
}
fn read_package_json_version() -> Result<Version> {
#[derive(Deserialize)]
struct PackageJSON {
version: String,
}
let path = "package.json";
let text = fs::read_to_string(path).with_context(|| format!("Failed to read {path:?}"))?;
let package_json: PackageJSON =
serde_json::from_str(&text).with_context(|| format!("Failed to parse {path:?} as JSON"))?;
Ok(Version::parse(&package_json.version)?)
}
fn read_rust_binding_version() -> Result<Version> {
let path = "Cargo.toml";
let text = fs::read_to_string(path)?;
let cargo_toml = toml::from_str::<toml::Value>(text.as_ref())?;
Ok(Version::parse(
cargo_toml["package"]["version"].as_str().unwrap(),
)?)
}
pub fn load_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result<String> {
if grammar_path.is_dir() {
return Err(anyhow!(

View file

@ -17,9 +17,9 @@ use std::{
const LARGE_CHARACTER_RANGE_COUNT: usize = 8;
const SMALL_STATE_THRESHOLD: usize = 64;
const ABI_VERSION_MIN: usize = 13;
const ABI_VERSION_MIN: usize = 15;
const ABI_VERSION_MAX: usize = tree_sitter::LANGUAGE_VERSION;
const ABI_VERSION_WITH_PRIMARY_STATES: usize = 14;
const ABI_VERSION_WITH_PRIMARY_STATES: usize = 15;
macro_rules! add {
($this: tt, $($arg: tt)*) => {{
@ -74,9 +74,10 @@ struct Generator {
unique_aliases: Vec<Alias>,
symbol_map: HashMap<Symbol, Symbol>,
field_names: Vec<String>,
#[allow(unused)]
abi_version: usize,
major_version: u8,
minor_version: u8,
patch_version: u8,
}
struct TransitionSummary {
@ -984,7 +985,7 @@ impl Generator {
if action.in_main_token {
add!(self, "ADVANCE({});", action.state);
} else {
add!(self, "SKIP({})", action.state);
add!(self, "SKIP({});", action.state);
}
}
@ -1342,7 +1343,7 @@ impl Generator {
indent!(self);
add_line!(self, "static const TSLanguage language = {{");
indent!(self);
add_line!(self, ".version = LANGUAGE_VERSION,");
add_line!(self, ".abi_version = LANGUAGE_VERSION,");
// Quantities
add_line!(self, ".symbol_count = SYMBOL_COUNT,");
@ -1406,9 +1407,11 @@ impl Generator {
add_line!(self, "}},");
}
if self.abi_version >= ABI_VERSION_WITH_PRIMARY_STATES {
add_line!(self, ".primary_state_ids = ts_primary_state_ids,");
}
add_line!(self, ".primary_state_ids = ts_primary_state_ids,");
add_line!(self, ".major_version = {},", self.major_version);
add_line!(self, ".minor_version = {},", self.minor_version);
add_line!(self, ".patch_version = {},", self.patch_version);
dedent!(self);
add_line!(self, "}};");
@ -1678,6 +1681,7 @@ pub fn render_c_code(
lexical_grammar: LexicalGrammar,
default_aliases: AliasMap,
abi_version: usize,
semantic_version: (u8, u8, u8),
) -> String {
assert!(
(ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version),
@ -1703,6 +1707,9 @@ pub fn render_c_code(
unique_aliases: Vec::new(),
field_names: Vec::new(),
abi_version,
major_version: semantic_version.0,
minor_version: semantic_version.1,
patch_version: semantic_version.2,
}
.generate()
}