diff --git a/Cargo.lock b/Cargo.lock index 9f92d9ed..c1e8905f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1959,6 +1959,7 @@ name = "tree-sitter-generate" version = "0.26.0" dependencies = [ "anyhow", + "bitflags 2.9.4", "dunce", "indexmap", "indoc", diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index 74f33268..6f64c801 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -32,6 +32,7 @@ use tree_sitter_cli::{ wasm, }; use tree_sitter_config::Config; +use tree_sitter_generate::OptLevel; use tree_sitter_highlight::Highlighter; use tree_sitter_loader::{self as loader, Bindings, TreeSitterJSON}; use tree_sitter_tags::TagsContext; @@ -162,6 +163,11 @@ struct Generate { /// The name or path of the JavaScript runtime to use for generating parsers, specify `native` /// to use the native `QuickJS` runtime pub js_runtime: Option, + + /// Disable optimizations when generating the parser. Currently, this only affects + /// the merging of compatible parse states. + #[arg(long)] + pub disable_optimizations: bool, } #[derive(Args)] @@ -868,6 +874,11 @@ impl Generate { self.report_states_for_rule.as_deref(), self.js_runtime.as_deref(), self.emit != GenerationEmit::Json, + if self.disable_optimizations { + OptLevel::empty() + } else { + OptLevel::default() + }, ) { if self.json { eprintln!("{}", serde_json::to_string_pretty(&err)?); diff --git a/crates/generate/Cargo.toml b/crates/generate/Cargo.toml index 61b1686a..1588763d 100644 --- a/crates/generate/Cargo.toml +++ b/crates/generate/Cargo.toml @@ -26,6 +26,7 @@ qjs-rt = ["load", "rquickjs", "pathdiff"] [dependencies] anyhow.workspace = true +bitflags = "2.9.4" dunce = "1.0.5" indexmap.workspace = true indoc.workspace = true diff --git a/crates/generate/src/build_tables.rs b/crates/generate/src/build_tables.rs index f455664b..8c6ef2a4 100644 --- a/crates/generate/src/build_tables.rs +++ b/crates/generate/src/build_tables.rs @@ -27,6 +27,7 @@ use crate::{ node_types::VariableInfo, rules::{AliasMap, Symbol, SymbolType, TokenSet}, tables::{LexTable, ParseAction, ParseTable, ParseTableEntry}, + OptLevel, }; pub struct Tables { @@ -43,6 +44,7 @@ pub fn build_tables( variable_info: &[VariableInfo], inlines: &InlinedProductionMap, report_symbol_name: Option<&str>, + optimizations: OptLevel, ) -> BuildTableResult { let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines); let following_tokens = @@ -78,6 +80,7 @@ pub fn build_tables( simple_aliases, &token_conflict_map, &keywords, + optimizations, ); let lex_tables = build_lex_table( &mut parse_table, diff --git a/crates/generate/src/build_tables/minimize_parse_table.rs b/crates/generate/src/build_tables/minimize_parse_table.rs index 9655cb88..1d70625e 100644 --- a/crates/generate/src/build_tables/minimize_parse_table.rs +++ b/crates/generate/src/build_tables/minimize_parse_table.rs @@ -11,6 +11,7 @@ use crate::{ grammars::{LexicalGrammar, SyntaxGrammar, VariableType}, rules::{AliasMap, Symbol, TokenSet}, tables::{GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry}, + OptLevel, }; pub fn minimize_parse_table( @@ -20,6 +21,7 @@ pub fn minimize_parse_table( simple_aliases: &AliasMap, token_conflict_map: &TokenConflictMap, keywords: &TokenSet, + optimizations: OptLevel, ) { let mut minimizer = Minimizer { parse_table, @@ -29,7 +31,9 @@ pub fn minimize_parse_table( keywords, simple_aliases, }; - minimizer.merge_compatible_states(); + if optimizations.contains(OptLevel::MergeStates) { + minimizer.merge_compatible_states(); + } minimizer.remove_unit_reductions(); minimizer.remove_unused_states(); minimizer.reorder_states_by_descending_size(); diff --git a/crates/generate/src/generate.rs b/crates/generate/src/generate.rs index cf6d1009..09e6e389 100644 --- a/crates/generate/src/generate.rs +++ b/crates/generate/src/generate.rs @@ -8,6 +8,7 @@ use std::{ }; use anyhow::Result; +use bitflags::bitflags; use log::warn; use node_types::VariableInfo; use regex::{Regex, RegexBuilder}; @@ -191,6 +192,19 @@ impl From for JSError { } } +bitflags! { + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct OptLevel: u32 { + const MergeStates = 1 << 0; + } +} + +impl Default for OptLevel { + fn default() -> Self { + Self::MergeStates + } +} + #[cfg(feature = "load")] #[allow(clippy::too_many_arguments)] pub fn generate_parser_in_directory( @@ -201,6 +215,7 @@ pub fn generate_parser_in_directory( report_symbol_name: Option<&str>, js_runtime: Option<&str>, generate_parser: bool, + optimizations: OptLevel, ) -> GenerateResult<()> where T: Into, @@ -278,6 +293,7 @@ where abi_version, semantic_version.map(|v| (v.major as u8, v.minor as u8, v.patch as u8)), report_symbol_name, + optimizations, )?; write_file(&src_path.join("parser.c"), c_code)?; @@ -301,6 +317,7 @@ pub fn generate_parser_for_grammar( LANGUAGE_VERSION, semantic_version, None, + OptLevel::empty(), )?; Ok((input_grammar.name, parser.c_code)) } @@ -334,6 +351,7 @@ fn generate_parser_for_grammar_with_opts( abi_version: usize, semantic_version: Option<(u8, u8, u8)>, report_symbol_name: Option<&str>, + optimizations: OptLevel, ) -> GenerateResult { let JSONOutput { syntax_grammar, @@ -353,6 +371,7 @@ fn generate_parser_for_grammar_with_opts( &variable_info, &inlines, report_symbol_name, + optimizations, )?; let c_code = render_c_code( &input_grammar.name,