From 516fd6f6def1615cb5dc004ab41c348c7de6d182 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 17 Jan 2022 14:45:07 -0800 Subject: [PATCH] Add --abi flag to generate command, generate version 13 by default --- cli/src/generate/mod.rs | 12 +++--- cli/src/generate/render.rs | 65 ++++++++++++++++++-------------- cli/src/main.rs | 28 ++++++++++++-- lib/include/tree_sitter/parser.h | 2 +- lib/src/language.h | 2 +- script/generate-fixtures | 2 +- script/generate-fixtures.cmd | 2 +- 7 files changed, 71 insertions(+), 42 deletions(-) diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index d61a63f0..ca2209cb 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -40,7 +40,7 @@ struct GeneratedParser { pub fn generate_parser_in_directory( repo_path: &PathBuf, grammar_path: Option<&str>, - next_abi: bool, + abi_version: usize, generate_bindings: bool, report_symbol_name: Option<&str>, ) -> Result<()> { @@ -80,14 +80,14 @@ pub fn generate_parser_in_directory( lexical_grammar, inlines, simple_aliases, - next_abi, + abi_version, report_symbol_name, )?; write_file(&src_path.join("parser.c"), c_code)?; write_file(&src_path.join("node-types.json"), node_types_json)?; - if next_abi { + if abi_version == tree_sitter::LANGUAGE_VERSION { write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?; } @@ -109,7 +109,7 @@ pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String lexical_grammar, inlines, simple_aliases, - true, + tree_sitter::LANGUAGE_VERSION, None, )?; Ok((input_grammar.name, parser.c_code)) @@ -121,7 +121,7 @@ fn generate_parser_for_grammar_with_opts( lexical_grammar: LexicalGrammar, inlines: InlinedProductionMap, simple_aliases: AliasMap, - next_abi: bool, + abi_version: usize, report_symbol_name: Option<&str>, ) -> Result { let variable_info = @@ -149,7 +149,7 @@ fn generate_parser_for_grammar_with_opts( syntax_grammar, lexical_grammar, simple_aliases, - next_abi, + abi_version, ); Ok(GeneratedParser { c_code, diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 6217cce8..06e22fa8 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -1,18 +1,25 @@ -use super::char_tree::{CharacterTree, Comparator}; -use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType}; -use super::rules::{Alias, AliasMap, Symbol, SymbolType}; -use super::tables::{ - AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable, - ParseTableEntry, +use super::{ + char_tree::{CharacterTree, Comparator}, + grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType}, + rules::{Alias, AliasMap, Symbol, SymbolType}, + tables::{ + AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable, + ParseTableEntry, + }, }; use core::ops::Range; -use std::cmp; -use std::collections::{HashMap, HashSet}; -use std::fmt::Write; -use std::mem::swap; +use std::{ + cmp, + collections::{HashMap, HashSet}, + fmt::Write, + mem::swap, +}; const LARGE_CHARACTER_RANGE_COUNT: usize = 8; const SMALL_STATE_THRESHOLD: usize = 64; +const ABI_VERSION_MIN: usize = 13; +const ABI_VERSION_MAX: usize = tree_sitter::LANGUAGE_VERSION; +const ABI_VERSION_WITH_PRIMARY_STATES: usize = 14; macro_rules! add { ($this: tt, $($arg: tt)*) => {{ @@ -69,7 +76,7 @@ struct Generator { field_names: Vec, #[allow(unused)] - next_abi: bool, + abi_version: usize, } struct TransitionSummary { @@ -291,16 +298,7 @@ impl Generator { }) .count(); - add_line!( - self, - "#define LANGUAGE_VERSION {}", - if self.next_abi { - tree_sitter::LANGUAGE_VERSION - } else { - tree_sitter::LANGUAGE_VERSION - 1 - } - ); - + add_line!(self, "#define LANGUAGE_VERSION {}", self.abi_version); add_line!( self, "#define STATE_COUNT {}", @@ -1363,9 +1361,7 @@ impl Generator { add_line!(self, ".external_token_count = EXTERNAL_TOKEN_COUNT,"); add_line!(self, ".state_count = STATE_COUNT,"); add_line!(self, ".large_state_count = LARGE_STATE_COUNT,"); - if self.next_abi { - add_line!(self, ".production_id_count = PRODUCTION_ID_COUNT,"); - } + add_line!(self, ".production_id_count = PRODUCTION_ID_COUNT,"); add_line!(self, ".field_count = FIELD_COUNT,"); add_line!( self, @@ -1393,7 +1389,10 @@ impl Generator { if !self.parse_table.production_infos.is_empty() { add_line!(self, ".alias_sequences = &ts_alias_sequences[0][0],"); } - add_line!(self, ".ts_primary_state_ids = ts_primary_state_ids,"); + + if self.abi_version >= ABI_VERSION_WITH_PRIMARY_STATES { + add_line!(self, ".primary_state_ids = ts_primary_state_ids,"); + } // Lexing add_line!(self, ".lex_modes = ts_lex_modes,"); @@ -1627,8 +1626,9 @@ impl Generator { /// * `default_aliases` - A map describing the global rename rules that should apply. /// the keys are symbols that are *always* aliased in the same way, and the values /// are the aliases that are applied to those symbols. -/// * `next_abi` - A boolean indicating whether to opt into the new, unstable parse -/// table format. This is mainly used for testing, when developing Tree-sitter itself. +/// * `abi_version` - The language ABI version that should be generated. Usually +/// you want Tree-sitter's current version, but right after making an ABI +/// change, it may be useful to generate code with the previous ABI. pub(crate) fn render_c_code( name: &str, parse_table: ParseTable, @@ -1638,8 +1638,15 @@ pub(crate) fn render_c_code( syntax_grammar: SyntaxGrammar, lexical_grammar: LexicalGrammar, default_aliases: AliasMap, - next_abi: bool, + abi_version: usize, ) -> String { + if !(ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version) { + panic!( + "This version of Tree-sitter can only generate parsers with ABI version {} - {}, not {}", + ABI_VERSION_MIN, ABI_VERSION_MAX, abi_version + ); + } + Generator { buffer: String::new(), indent_level: 0, @@ -1658,7 +1665,7 @@ pub(crate) fn render_c_code( symbol_map: HashMap::new(), unique_aliases: Vec::new(), field_names: Vec::new(), - next_abi, + abi_version, } .generate() } diff --git a/cli/src/main.rs b/cli/src/main.rs index 554eb2c5..e35b101a 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -11,6 +11,7 @@ use tree_sitter_loader as loader; const BUILD_VERSION: &'static str = env!("CARGO_PKG_VERSION"); const BUILD_SHA: Option<&'static str> = option_env!("BUILD_SHA"); +const DEFAULT_GENERATE_ABI_VERSION: usize = 13; fn main() { let result = run(); @@ -90,7 +91,19 @@ fn run() -> Result<()> { .about("Generate a parser") .arg(Arg::with_name("grammar-path").index(1)) .arg(Arg::with_name("log").long("log")) - .arg(Arg::with_name("prev-abi").long("prev-abi")) + .arg( + Arg::with_name("abi-version") + .long("abi") + .value_name("version") + .help(&format!( + concat!( + "Select the language ABI version to generate (default {}).\n", + "Use --abi=latest to generate the newest supported version ({}).", + ), + DEFAULT_GENERATE_ABI_VERSION, + tree_sitter::LANGUAGE_VERSION, + )), + ) .arg(Arg::with_name("no-bindings").long("no-bindings")) .arg( Arg::with_name("report-states-for-rule") @@ -266,12 +279,21 @@ fn run() -> Result<()> { if matches.is_present("log") { logger::init(); } - let new_abi = !matches.is_present("prev-abi"); + let abi_version = + matches + .value_of("abi-version") + .map_or(DEFAULT_GENERATE_ABI_VERSION, |version| { + if version == "latest" { + tree_sitter::LANGUAGE_VERSION + } else { + version.parse().expect("invalid abi version flag") + } + }); let generate_bindings = !matches.is_present("no-bindings"); generate::generate_parser_in_directory( ¤t_dir, grammar_path, - new_abi, + abi_version, generate_bindings, report_symbol_name, )?; diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h index 66379d3b..fdd2cef7 100644 --- a/lib/include/tree_sitter/parser.h +++ b/lib/include/tree_sitter/parser.h @@ -110,7 +110,7 @@ struct TSLanguage { const TSSymbol *public_symbol_map; const uint16_t *alias_map; const TSSymbol *alias_sequences; - const TSStateId *ts_primary_state_ids; + const TSStateId *primary_state_ids; const TSLexMode *lex_modes; bool (*lex_fn)(TSLexer *, TSStateId); bool (*keyword_lex_fn)(TSLexer *, TSStateId); diff --git a/lib/src/language.h b/lib/src/language.h index 4af4592a..1b65f25f 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -207,7 +207,7 @@ static inline bool ts_language_state_is_primary( TSStateId state ) { if (self->version >= 14) { - return state == self->ts_primary_state_ids[state]; + return state == self->primary_state_ids[state]; } else { return true; } diff --git a/script/generate-fixtures b/script/generate-fixtures index 135c4d71..85298c46 100755 --- a/script/generate-fixtures +++ b/script/generate-fixtures @@ -22,6 +22,6 @@ while read -r grammar_file; do echo "Regenerating ${grammar_name} parser" ( cd $grammar_dir - "$tree_sitter" generate src/grammar.json --no-bindings + "$tree_sitter" generate src/grammar.json --no-bindings --abi=latest ) done <<< "$grammar_files" diff --git a/script/generate-fixtures.cmd b/script/generate-fixtures.cmd index 227029b6..6d11a3a3 100644 --- a/script/generate-fixtures.cmd +++ b/script/generate-fixtures.cmd @@ -6,7 +6,7 @@ set tree_sitter="%cd%\target\release\tree-sitter" for /f "tokens=*" %%f in ('dir test\fixtures\grammars\grammar.js /b/s') do ( pushd "%%f\.." echo Regenerating parser !cd! - %tree_sitter% generate src\grammar.json --no-bindings + %tree_sitter% generate src\grammar.json --no-bindings --abi=latest popd )