feat: add option to disable parse state optimizations

This commit is contained in:
Amaan Qureshi 2025-09-25 02:19:52 -04:00 committed by Amaan Qureshi
parent a9bce7c18a
commit 5f7806f99e
6 changed files with 40 additions and 1 deletions

1
Cargo.lock generated
View file

@ -1959,6 +1959,7 @@ name = "tree-sitter-generate"
version = "0.26.0"
dependencies = [
"anyhow",
"bitflags 2.9.4",
"dunce",
"indexmap",
"indoc",

View file

@ -32,6 +32,7 @@ use tree_sitter_cli::{
wasm,
};
use tree_sitter_config::Config;
use tree_sitter_generate::OptLevel;
use tree_sitter_highlight::Highlighter;
use tree_sitter_loader::{self as loader, Bindings, TreeSitterJSON};
use tree_sitter_tags::TagsContext;
@ -162,6 +163,11 @@ struct Generate {
/// The name or path of the JavaScript runtime to use for generating parsers, specify `native`
/// to use the native `QuickJS` runtime
pub js_runtime: Option<String>,
/// Disable optimizations when generating the parser. Currently, this only affects
/// the merging of compatible parse states.
#[arg(long)]
pub disable_optimizations: bool,
}
#[derive(Args)]
@ -868,6 +874,11 @@ impl Generate {
self.report_states_for_rule.as_deref(),
self.js_runtime.as_deref(),
self.emit != GenerationEmit::Json,
if self.disable_optimizations {
OptLevel::empty()
} else {
OptLevel::default()
},
) {
if self.json {
eprintln!("{}", serde_json::to_string_pretty(&err)?);

View file

@ -26,6 +26,7 @@ qjs-rt = ["load", "rquickjs", "pathdiff"]
[dependencies]
anyhow.workspace = true
bitflags = "2.9.4"
dunce = "1.0.5"
indexmap.workspace = true
indoc.workspace = true

View file

@ -27,6 +27,7 @@ use crate::{
node_types::VariableInfo,
rules::{AliasMap, Symbol, SymbolType, TokenSet},
tables::{LexTable, ParseAction, ParseTable, ParseTableEntry},
OptLevel,
};
pub struct Tables {
@ -43,6 +44,7 @@ pub fn build_tables(
variable_info: &[VariableInfo],
inlines: &InlinedProductionMap,
report_symbol_name: Option<&str>,
optimizations: OptLevel,
) -> BuildTableResult<Tables> {
let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
let following_tokens =
@ -78,6 +80,7 @@ pub fn build_tables(
simple_aliases,
&token_conflict_map,
&keywords,
optimizations,
);
let lex_tables = build_lex_table(
&mut parse_table,

View file

@ -11,6 +11,7 @@ use crate::{
grammars::{LexicalGrammar, SyntaxGrammar, VariableType},
rules::{AliasMap, Symbol, TokenSet},
tables::{GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry},
OptLevel,
};
pub fn minimize_parse_table(
@ -20,6 +21,7 @@ pub fn minimize_parse_table(
simple_aliases: &AliasMap,
token_conflict_map: &TokenConflictMap,
keywords: &TokenSet,
optimizations: OptLevel,
) {
let mut minimizer = Minimizer {
parse_table,
@ -29,7 +31,9 @@ pub fn minimize_parse_table(
keywords,
simple_aliases,
};
minimizer.merge_compatible_states();
if optimizations.contains(OptLevel::MergeStates) {
minimizer.merge_compatible_states();
}
minimizer.remove_unit_reductions();
minimizer.remove_unused_states();
minimizer.reorder_states_by_descending_size();

View file

@ -8,6 +8,7 @@ use std::{
};
use anyhow::Result;
use bitflags::bitflags;
use log::warn;
use node_types::VariableInfo;
use regex::{Regex, RegexBuilder};
@ -191,6 +192,19 @@ impl From<rquickjs::Error> for JSError {
}
}
bitflags! {
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct OptLevel: u32 {
const MergeStates = 1 << 0;
}
}
impl Default for OptLevel {
fn default() -> Self {
Self::MergeStates
}
}
#[cfg(feature = "load")]
#[allow(clippy::too_many_arguments)]
pub fn generate_parser_in_directory<T, U, V>(
@ -201,6 +215,7 @@ pub fn generate_parser_in_directory<T, U, V>(
report_symbol_name: Option<&str>,
js_runtime: Option<&str>,
generate_parser: bool,
optimizations: OptLevel,
) -> GenerateResult<()>
where
T: Into<PathBuf>,
@ -278,6 +293,7 @@ where
abi_version,
semantic_version.map(|v| (v.major as u8, v.minor as u8, v.patch as u8)),
report_symbol_name,
optimizations,
)?;
write_file(&src_path.join("parser.c"), c_code)?;
@ -301,6 +317,7 @@ pub fn generate_parser_for_grammar(
LANGUAGE_VERSION,
semantic_version,
None,
OptLevel::empty(),
)?;
Ok((input_grammar.name, parser.c_code))
}
@ -334,6 +351,7 @@ fn generate_parser_for_grammar_with_opts(
abi_version: usize,
semantic_version: Option<(u8, u8, u8)>,
report_symbol_name: Option<&str>,
optimizations: OptLevel,
) -> GenerateResult<GeneratedParser> {
let JSONOutput {
syntax_grammar,
@ -353,6 +371,7 @@ fn generate_parser_for_grammar_with_opts(
&variable_info,
&inlines,
report_symbol_name,
optimizations,
)?;
let c_code = render_c_code(
&input_grammar.name,