refactor: extract grammar introspection into separate module
This commit is contained in:
parent
d2a2b4005a
commit
ab9b098aad
3 changed files with 89 additions and 67 deletions
|
|
@ -13,7 +13,6 @@ use std::{
|
|||
use anyhow::Result;
|
||||
use bitflags::bitflags;
|
||||
use log::warn;
|
||||
use node_types::VariableInfo;
|
||||
use regex::{Regex, RegexBuilder};
|
||||
use rules::{Alias, Symbol};
|
||||
#[cfg(feature = "load")]
|
||||
|
|
@ -26,6 +25,7 @@ use thiserror::Error;
|
|||
mod build_tables;
|
||||
mod dedup;
|
||||
mod grammars;
|
||||
mod introspect_grammar;
|
||||
mod nfa;
|
||||
mod node_types;
|
||||
pub mod parse_grammar;
|
||||
|
|
@ -36,15 +36,13 @@ mod render;
|
|||
mod rules;
|
||||
mod tables;
|
||||
|
||||
use build_tables::build_tables;
|
||||
pub use build_tables::ParseTableBuilderError;
|
||||
use grammars::{InlinedProductionMap, InputGrammar, LexicalGrammar, SyntaxGrammar};
|
||||
use introspect_grammar::{introspect_grammar, GrammarIntrospection};
|
||||
pub use node_types::{SuperTypeCycleError, VariableInfoError};
|
||||
use parse_grammar::parse_grammar;
|
||||
pub use parse_grammar::ParseGrammarError;
|
||||
use prepare_grammar::prepare_grammar;
|
||||
pub use prepare_grammar::PrepareGrammarError;
|
||||
use render::{generate_symbol_ids, render_c_code};
|
||||
use render::render_c_code;
|
||||
pub use render::{ABI_VERSION_MAX, ABI_VERSION_MIN};
|
||||
|
||||
use crate::{build_tables::Tables, node_types::ChildType};
|
||||
|
|
@ -56,18 +54,6 @@ static JSON_COMMENT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
|
|||
.unwrap()
|
||||
});
|
||||
|
||||
struct GrammarIntrospection {
|
||||
syntax_grammar: SyntaxGrammar,
|
||||
lexical_grammar: LexicalGrammar,
|
||||
simple_aliases: BTreeMap<Symbol, Alias>,
|
||||
variable_info: Vec<VariableInfo>,
|
||||
supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
|
||||
tables: Tables,
|
||||
symbol_ids: HashMap<Symbol, (String, u16)>,
|
||||
alias_ids: HashMap<Alias, String>,
|
||||
unique_aliases: Vec<Alias>,
|
||||
}
|
||||
|
||||
// NOTE: This constant must be kept in sync with the definition of
|
||||
// `TREE_SITTER_LANGUAGE_VERSION` in `lib/include/tree_sitter/api.h`.
|
||||
const LANGUAGE_VERSION: usize = 15;
|
||||
|
|
@ -368,49 +354,6 @@ pub fn generate_parser_for_grammar(
|
|||
Ok((input_grammar.name, c_code))
|
||||
}
|
||||
|
||||
fn introspect_grammar(
|
||||
input_grammar: &InputGrammar,
|
||||
report_symbol_name: Option<&str>,
|
||||
optimizations: OptLevel,
|
||||
) -> Result<GrammarIntrospection, GenerateError> {
|
||||
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
|
||||
prepare_grammar(input_grammar)?;
|
||||
let variable_info =
|
||||
node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?;
|
||||
|
||||
let supertype_symbol_map =
|
||||
node_types::get_supertype_symbol_map(&syntax_grammar, &simple_aliases, &variable_info);
|
||||
let tables = build_tables(
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
&simple_aliases,
|
||||
&variable_info,
|
||||
&inlines,
|
||||
report_symbol_name,
|
||||
optimizations,
|
||||
)?;
|
||||
|
||||
// Generate symbol IDs (both string and numeric) before rendering C code
|
||||
let (symbol_ids, alias_ids, unique_aliases) = generate_symbol_ids(
|
||||
&tables.parse_table,
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
&simple_aliases,
|
||||
);
|
||||
|
||||
Ok(GrammarIntrospection {
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
simple_aliases,
|
||||
variable_info,
|
||||
supertype_symbol_map,
|
||||
tables,
|
||||
symbol_ids,
|
||||
alias_ids,
|
||||
unique_aliases,
|
||||
})
|
||||
}
|
||||
|
||||
/// This will read the `tree-sitter.json` config file and attempt to extract the version.
|
||||
///
|
||||
/// If the file is not found in the current directory or any of its parent directories, this will
|
||||
|
|
|
|||
66
crates/generate/src/introspect_grammar.rs
Normal file
66
crates/generate/src/introspect_grammar.rs
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
use std::collections::{BTreeMap, HashMap};
|
||||
|
||||
use crate::{
|
||||
build_tables::{build_tables, Tables},
|
||||
grammars::{InputGrammar, LexicalGrammar, SyntaxGrammar},
|
||||
node_types::{self, ChildType, VariableInfo},
|
||||
prepare_grammar::prepare_grammar,
|
||||
render::generate_symbol_ids,
|
||||
rules::{Alias, Symbol},
|
||||
GenerateError, OptLevel,
|
||||
};
|
||||
|
||||
pub struct GrammarIntrospection {
|
||||
pub syntax_grammar: SyntaxGrammar,
|
||||
pub lexical_grammar: LexicalGrammar,
|
||||
pub simple_aliases: BTreeMap<Symbol, Alias>,
|
||||
pub variable_info: Vec<VariableInfo>,
|
||||
pub supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
|
||||
pub tables: Tables,
|
||||
pub symbol_ids: HashMap<Symbol, (String, u16)>,
|
||||
pub alias_ids: HashMap<Alias, String>,
|
||||
pub unique_aliases: Vec<Alias>,
|
||||
}
|
||||
|
||||
pub fn introspect_grammar(
|
||||
input_grammar: &InputGrammar,
|
||||
report_symbol_name: Option<&str>,
|
||||
optimizations: OptLevel,
|
||||
) -> Result<GrammarIntrospection, GenerateError> {
|
||||
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
|
||||
prepare_grammar(input_grammar)?;
|
||||
let variable_info =
|
||||
node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?;
|
||||
|
||||
let supertype_symbol_map =
|
||||
node_types::get_supertype_symbol_map(&syntax_grammar, &simple_aliases, &variable_info);
|
||||
let tables = build_tables(
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
&simple_aliases,
|
||||
&variable_info,
|
||||
&inlines,
|
||||
report_symbol_name,
|
||||
optimizations,
|
||||
)?;
|
||||
|
||||
// Generate symbol IDs (both string and numeric) before rendering C code
|
||||
let (symbol_ids, alias_ids, unique_aliases) = generate_symbol_ids(
|
||||
&tables.parse_table,
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
&simple_aliases,
|
||||
);
|
||||
|
||||
Ok(GrammarIntrospection {
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
simple_aliases,
|
||||
variable_info,
|
||||
supertype_symbol_map,
|
||||
tables,
|
||||
symbol_ids,
|
||||
alias_ids,
|
||||
unique_aliases,
|
||||
})
|
||||
}
|
||||
|
|
@ -857,8 +857,10 @@ mod tests {
|
|||
grammars::{
|
||||
InputGrammar, LexicalVariable, Production, ProductionStep, SyntaxVariable, Variable,
|
||||
},
|
||||
introspect_grammar,
|
||||
prepare_grammar::prepare_grammar,
|
||||
rules::Rule,
|
||||
GrammarIntrospection, OptLevel,
|
||||
};
|
||||
|
||||
#[test]
|
||||
|
|
@ -2091,17 +2093,28 @@ mod tests {
|
|||
}
|
||||
|
||||
fn get_node_types(grammar: &InputGrammar) -> SuperTypeCycleResult<Vec<NodeInfoJSON>> {
|
||||
let (syntax_grammar, lexical_grammar, _, default_aliases) =
|
||||
prepare_grammar(grammar).unwrap();
|
||||
let variable_info =
|
||||
get_variable_info(&syntax_grammar, &lexical_grammar, &default_aliases).unwrap();
|
||||
generate_node_types_json(
|
||||
let GrammarIntrospection {
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
simple_aliases,
|
||||
variable_info,
|
||||
supertype_symbol_map: _,
|
||||
tables: _,
|
||||
symbol_ids: _,
|
||||
alias_ids: _,
|
||||
unique_aliases: _,
|
||||
} = introspect_grammar(grammar, None, OptLevel::default()).unwrap();
|
||||
|
||||
let x = generate_node_types_json(
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
&default_aliases,
|
||||
&simple_aliases,
|
||||
&variable_info,
|
||||
// TODO: use `symbol_ids`
|
||||
&HashMap::new(),
|
||||
)
|
||||
);
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
fn build_syntax_grammar(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue