From ab9b098aadcaf9dc81739f863bb05812f4e6d722 Mon Sep 17 00:00:00 2001 From: bglgwyng Date: Sun, 9 Nov 2025 16:19:49 +0900 Subject: [PATCH] refactor: extract grammar introspection into separate module --- crates/generate/src/generate.rs | 63 ++-------------------- crates/generate/src/introspect_grammar.rs | 66 +++++++++++++++++++++++ crates/generate/src/node_types.rs | 27 +++++++--- 3 files changed, 89 insertions(+), 67 deletions(-) create mode 100644 crates/generate/src/introspect_grammar.rs diff --git a/crates/generate/src/generate.rs b/crates/generate/src/generate.rs index 69e9a2b9..ad0c51d4 100644 --- a/crates/generate/src/generate.rs +++ b/crates/generate/src/generate.rs @@ -13,7 +13,6 @@ use std::{ use anyhow::Result; use bitflags::bitflags; use log::warn; -use node_types::VariableInfo; use regex::{Regex, RegexBuilder}; use rules::{Alias, Symbol}; #[cfg(feature = "load")] @@ -26,6 +25,7 @@ use thiserror::Error; mod build_tables; mod dedup; mod grammars; +mod introspect_grammar; mod nfa; mod node_types; pub mod parse_grammar; @@ -36,15 +36,13 @@ mod render; mod rules; mod tables; -use build_tables::build_tables; pub use build_tables::ParseTableBuilderError; -use grammars::{InlinedProductionMap, InputGrammar, LexicalGrammar, SyntaxGrammar}; +use introspect_grammar::{introspect_grammar, GrammarIntrospection}; pub use node_types::{SuperTypeCycleError, VariableInfoError}; use parse_grammar::parse_grammar; pub use parse_grammar::ParseGrammarError; -use prepare_grammar::prepare_grammar; pub use prepare_grammar::PrepareGrammarError; -use render::{generate_symbol_ids, render_c_code}; +use render::render_c_code; pub use render::{ABI_VERSION_MAX, ABI_VERSION_MIN}; use crate::{build_tables::Tables, node_types::ChildType}; @@ -56,18 +54,6 @@ static JSON_COMMENT_REGEX: LazyLock = LazyLock::new(|| { .unwrap() }); -struct GrammarIntrospection { - syntax_grammar: SyntaxGrammar, - lexical_grammar: LexicalGrammar, - simple_aliases: BTreeMap, - variable_info: Vec, - supertype_symbol_map: BTreeMap>, - tables: Tables, - symbol_ids: HashMap, - alias_ids: HashMap, - unique_aliases: Vec, -} - // NOTE: This constant must be kept in sync with the definition of // `TREE_SITTER_LANGUAGE_VERSION` in `lib/include/tree_sitter/api.h`. const LANGUAGE_VERSION: usize = 15; @@ -368,49 +354,6 @@ pub fn generate_parser_for_grammar( Ok((input_grammar.name, c_code)) } -fn introspect_grammar( - input_grammar: &InputGrammar, - report_symbol_name: Option<&str>, - optimizations: OptLevel, -) -> Result { - let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = - prepare_grammar(input_grammar)?; - let variable_info = - node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?; - - let supertype_symbol_map = - node_types::get_supertype_symbol_map(&syntax_grammar, &simple_aliases, &variable_info); - let tables = build_tables( - &syntax_grammar, - &lexical_grammar, - &simple_aliases, - &variable_info, - &inlines, - report_symbol_name, - optimizations, - )?; - - // Generate symbol IDs (both string and numeric) before rendering C code - let (symbol_ids, alias_ids, unique_aliases) = generate_symbol_ids( - &tables.parse_table, - &syntax_grammar, - &lexical_grammar, - &simple_aliases, - ); - - Ok(GrammarIntrospection { - syntax_grammar, - lexical_grammar, - simple_aliases, - variable_info, - supertype_symbol_map, - tables, - symbol_ids, - alias_ids, - unique_aliases, - }) -} - /// This will read the `tree-sitter.json` config file and attempt to extract the version. /// /// If the file is not found in the current directory or any of its parent directories, this will diff --git a/crates/generate/src/introspect_grammar.rs b/crates/generate/src/introspect_grammar.rs new file mode 100644 index 00000000..a654c687 --- /dev/null +++ b/crates/generate/src/introspect_grammar.rs @@ -0,0 +1,66 @@ +use std::collections::{BTreeMap, HashMap}; + +use crate::{ + build_tables::{build_tables, Tables}, + grammars::{InputGrammar, LexicalGrammar, SyntaxGrammar}, + node_types::{self, ChildType, VariableInfo}, + prepare_grammar::prepare_grammar, + render::generate_symbol_ids, + rules::{Alias, Symbol}, + GenerateError, OptLevel, +}; + +pub struct GrammarIntrospection { + pub syntax_grammar: SyntaxGrammar, + pub lexical_grammar: LexicalGrammar, + pub simple_aliases: BTreeMap, + pub variable_info: Vec, + pub supertype_symbol_map: BTreeMap>, + pub tables: Tables, + pub symbol_ids: HashMap, + pub alias_ids: HashMap, + pub unique_aliases: Vec, +} + +pub fn introspect_grammar( + input_grammar: &InputGrammar, + report_symbol_name: Option<&str>, + optimizations: OptLevel, +) -> Result { + let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = + prepare_grammar(input_grammar)?; + let variable_info = + node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?; + + let supertype_symbol_map = + node_types::get_supertype_symbol_map(&syntax_grammar, &simple_aliases, &variable_info); + let tables = build_tables( + &syntax_grammar, + &lexical_grammar, + &simple_aliases, + &variable_info, + &inlines, + report_symbol_name, + optimizations, + )?; + + // Generate symbol IDs (both string and numeric) before rendering C code + let (symbol_ids, alias_ids, unique_aliases) = generate_symbol_ids( + &tables.parse_table, + &syntax_grammar, + &lexical_grammar, + &simple_aliases, + ); + + Ok(GrammarIntrospection { + syntax_grammar, + lexical_grammar, + simple_aliases, + variable_info, + supertype_symbol_map, + tables, + symbol_ids, + alias_ids, + unique_aliases, + }) +} diff --git a/crates/generate/src/node_types.rs b/crates/generate/src/node_types.rs index b559f870..c2cfabe4 100644 --- a/crates/generate/src/node_types.rs +++ b/crates/generate/src/node_types.rs @@ -857,8 +857,10 @@ mod tests { grammars::{ InputGrammar, LexicalVariable, Production, ProductionStep, SyntaxVariable, Variable, }, + introspect_grammar, prepare_grammar::prepare_grammar, rules::Rule, + GrammarIntrospection, OptLevel, }; #[test] @@ -2091,17 +2093,28 @@ mod tests { } fn get_node_types(grammar: &InputGrammar) -> SuperTypeCycleResult> { - let (syntax_grammar, lexical_grammar, _, default_aliases) = - prepare_grammar(grammar).unwrap(); - let variable_info = - get_variable_info(&syntax_grammar, &lexical_grammar, &default_aliases).unwrap(); - generate_node_types_json( + let GrammarIntrospection { + syntax_grammar, + lexical_grammar, + simple_aliases, + variable_info, + supertype_symbol_map: _, + tables: _, + symbol_ids: _, + alias_ids: _, + unique_aliases: _, + } = introspect_grammar(grammar, None, OptLevel::default()).unwrap(); + + let x = generate_node_types_json( &syntax_grammar, &lexical_grammar, - &default_aliases, + &simple_aliases, &variable_info, + // TODO: use `symbol_ids` &HashMap::new(), - ) + ); + + return x; } fn build_syntax_grammar(