diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 7b26892e..41d3932c 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -25,10 +25,11 @@ struct AuxiliarySymbolInfo { type SymbolSequence = Vec; type AuxiliarySymbolSequence = Vec; +pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>); + struct ParseStateQueueEntry { - preceding_symbols: SymbolSequence, - preceding_auxiliary_symbols: AuxiliarySymbolSequence, state_id: ParseStateId, + preceding_auxiliary_symbols: AuxiliarySymbolSequence, } struct ParseTableBuilder<'a> { @@ -38,13 +39,13 @@ struct ParseTableBuilder<'a> { variable_info: &'a Vec, core_ids_by_core: HashMap, usize>, state_ids_by_item_set: HashMap, ParseStateId>, - item_sets_by_state_id: Vec>, + parse_state_info_by_id: Vec>, parse_state_queue: VecDeque, parse_table: ParseTable, } impl<'a> ParseTableBuilder<'a> { - fn build(mut self) -> Result { + fn build(mut self) -> Result<(ParseTable, Vec>)> { // Ensure that the empty alias sequence has index 0. self.parse_table .production_infos @@ -70,9 +71,10 @@ impl<'a> ParseTableBuilder<'a> { while let Some(entry) = self.parse_state_queue.pop_front() { let item_set = self .item_set_builder - .transitive_closure(&self.item_sets_by_state_id[entry.state_id]); + .transitive_closure(&self.parse_state_info_by_id[entry.state_id].1); + self.add_actions( - entry.preceding_symbols, + self.parse_state_info_by_id[entry.state_id].0.clone(), entry.preceding_auxiliary_symbols, entry.state_id, item_set, @@ -81,7 +83,7 @@ impl<'a> ParseTableBuilder<'a> { self.remove_precedences(); - Ok(self.parse_table) + Ok((self.parse_table, self.parse_state_info_by_id)) } fn add_parse_state( @@ -104,7 +106,9 @@ impl<'a> ParseTableBuilder<'a> { }; let state_id = self.parse_table.states.len(); - self.item_sets_by_state_id.push(v.key().clone()); + self.parse_state_info_by_id + .push((preceding_symbols.clone(), v.key().clone())); + self.parse_table.states.push(ParseState { id: state_id, lex_state_id: 0, @@ -115,7 +119,6 @@ impl<'a> ParseTableBuilder<'a> { }); self.parse_state_queue.push_back(ParseStateQueueEntry { state_id, - preceding_symbols: preceding_symbols.clone(), preceding_auxiliary_symbols: preceding_auxiliary_symbols.clone(), }); v.insert(state_id); @@ -751,12 +754,12 @@ fn populate_following_tokens( } } -pub(crate) fn build_parse_table( - syntax_grammar: &SyntaxGrammar, - lexical_grammar: &LexicalGrammar, - inlines: &InlinedProductionMap, - variable_info: &Vec, -) -> Result<(ParseTable, Vec)> { +pub(crate) fn build_parse_table<'a>( + syntax_grammar: &'a SyntaxGrammar, + lexical_grammar: &'a LexicalGrammar, + inlines: &'a InlinedProductionMap, + variable_info: &'a Vec, +) -> Result<(ParseTable, Vec, Vec>)> { let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines); let mut following_tokens = vec![TokenSet::new(); lexical_grammar.variables.len()]; populate_following_tokens( @@ -766,14 +769,14 @@ pub(crate) fn build_parse_table( &item_set_builder, ); - let table = ParseTableBuilder { + let (table, item_sets) = ParseTableBuilder { syntax_grammar, lexical_grammar, item_set_builder, variable_info, state_ids_by_item_set: HashMap::new(), core_ids_by_core: HashMap::new(), - item_sets_by_state_id: Vec::new(), + parse_state_info_by_id: Vec::new(), parse_state_queue: VecDeque::new(), parse_table: ParseTable { states: Vec::new(), @@ -785,5 +788,5 @@ pub(crate) fn build_parse_table( } .build()?; - Ok((table, following_tokens)) + Ok((table, following_tokens, item_sets)) } diff --git a/cli/src/generate/build_tables/item.rs b/cli/src/generate/build_tables/item.rs index df712402..f0e5d381 100644 --- a/cli/src/generate/build_tables/item.rs +++ b/cli/src/generate/build_tables/item.rs @@ -1,5 +1,8 @@ -use crate::generate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar}; -use crate::generate::rules::{Associativity, Symbol, SymbolType, TokenSet}; +use crate::generate::grammars::{ + LexicalGrammar, Production, ProductionStep, SyntaxGrammar, +}; +use crate::generate::rules::Associativity; +use crate::generate::rules::{Symbol, SymbolType, TokenSet}; use lazy_static::lazy_static; use std::cmp::Ordering; use std::fmt; @@ -161,12 +164,14 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> { for (i, step) in self.0.production.steps.iter().enumerate() { if i == self.0.step_index as usize { write!(f, " •")?; - if step.precedence != 0 || step.associativity.is_some() { - write!( - f, - " (prec {:?} assoc {:?})", - step.precedence, step.associativity - )?; + if let Some(associativity) = step.associativity { + if step.precedence != 0 { + write!(f, " ({} {:?})", step.precedence, associativity)?; + } else { + write!(f, " ({:?})", associativity)?; + } + } else if step.precedence != 0 { + write!(f, " ({})", step.precedence)?; } } @@ -184,19 +189,21 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> { } if let Some(alias) = &step.alias { - write!(f, " (alias {})", alias.value)?; + write!(f, "@{}", alias.value)?; } } if self.0.is_done() { write!(f, " •")?; if let Some(step) = self.0.production.steps.last() { - if step.precedence != 0 || step.associativity.is_some() { - write!( - f, - " (prec {:?} assoc {:?})", - step.precedence, step.associativity - )?; + if let Some(associativity) = step.associativity { + if step.precedence != 0 { + write!(f, " ({} {:?})", step.precedence, associativity)?; + } else { + write!(f, " ({:?})", associativity)?; + } + } else if step.precedence != 0 { + write!(f, " ({})", step.precedence)?; } } } diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs index af9483eb..e0f84244 100644 --- a/cli/src/generate/build_tables/mod.rs +++ b/cli/src/generate/build_tables/mod.rs @@ -7,7 +7,7 @@ mod minimize_parse_table; mod token_conflicts; use self::build_lex_table::build_lex_table; -use self::build_parse_table::build_parse_table; +use self::build_parse_table::{build_parse_table, ParseStateInfo}; use self::coincident_tokens::CoincidentTokenIndex; use self::minimize_parse_table::minimize_parse_table; use self::token_conflicts::TokenConflictMap; @@ -18,7 +18,7 @@ use crate::generate::node_types::VariableInfo; use crate::generate::rules::{AliasMap, Symbol, SymbolType, TokenSet}; use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry}; use log::info; -use std::collections::HashMap; +use std::collections::{BTreeSet, HashMap}; pub(crate) fn build_tables( syntax_grammar: &SyntaxGrammar, @@ -26,8 +26,9 @@ pub(crate) fn build_tables( simple_aliases: &AliasMap, variable_info: &Vec, inlines: &InlinedProductionMap, + report_symbol_name: Option<&str>, ) -> Result<(ParseTable, LexTable, LexTable, Option)> { - let (mut parse_table, following_tokens) = + let (mut parse_table, following_tokens, parse_state_info) = build_parse_table(syntax_grammar, lexical_grammar, inlines, variable_info)?; let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens); let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar); @@ -65,6 +66,16 @@ pub(crate) fn build_tables( ); populate_external_lex_states(&mut parse_table, syntax_grammar); mark_fragile_tokens(&mut parse_table, lexical_grammar, &token_conflict_map); + + if let Some(report_symbol_name) = report_symbol_name { + report_state_info( + &syntax_grammar, + &lexical_grammar, + &parse_table, + &parse_state_info, + report_symbol_name, + ); + } Ok(( parse_table, main_lex_table, @@ -372,6 +383,90 @@ fn mark_fragile_tokens( } } +fn report_state_info<'a>( + syntax_grammar: &SyntaxGrammar, + lexical_grammar: &LexicalGrammar, + parse_table: &ParseTable, + parse_state_info: &Vec>, + report_symbol_name: &'a str, +) { + let mut all_state_indices = BTreeSet::new(); + let mut symbols_with_state_indices = (0..syntax_grammar.variables.len()) + .map(|i| (Symbol::non_terminal(i), BTreeSet::new())) + .collect::>(); + + for (i, state) in parse_table.states.iter().enumerate() { + all_state_indices.insert(i); + let item_set = &parse_state_info[state.id]; + for (item, _) in item_set.1.entries.iter() { + if !item.is_augmented() { + symbols_with_state_indices[item.variable_index as usize] + .1 + .insert(i); + } + } + } + + symbols_with_state_indices.sort_unstable_by_key(|(_, states)| -(states.len() as i32)); + + let max_symbol_name_length = syntax_grammar + .variables + .iter() + .map(|v| v.name.len()) + .max() + .unwrap(); + for (symbol, states) in &symbols_with_state_indices { + eprintln!( + "{:width$}\t{}", + syntax_grammar.variables[symbol.index].name, + states.len(), + width = max_symbol_name_length + ); + } + eprintln!(""); + + let state_indices = if report_symbol_name == "*" { + Some(&all_state_indices) + } else { + symbols_with_state_indices + .iter() + .find_map(|(symbol, state_indices)| { + if syntax_grammar.variables[symbol.index].name == report_symbol_name { + Some(state_indices) + } else { + None + } + }) + }; + + if let Some(state_indices) = state_indices { + let mut state_indices = state_indices.into_iter().cloned().collect::>(); + state_indices.sort_unstable_by_key(|i| (parse_table.states[*i].core_id, *i)); + + for state_index in state_indices { + let id = parse_table.states[state_index].id; + let (preceding_symbols, item_set) = &parse_state_info[id]; + eprintln!("state index: {}", state_index); + eprintln!("state id: {}", id); + eprint!("symbol sequence:"); + for symbol in preceding_symbols { + let name = if symbol.is_terminal() { + &lexical_grammar.variables[symbol.index].name + } else if symbol.is_external() { + &syntax_grammar.external_tokens[symbol.index].name + } else { + &syntax_grammar.variables[symbol.index].name + }; + eprint!(" {}", name); + } + eprintln!( + "\nitems:\n{}", + self::item::ParseItemSetDisplay(&item_set, syntax_grammar, lexical_grammar,), + ); + } + } +} + fn all_chars_are_alphabetical(cursor: &NfaCursor) -> bool { cursor.transition_chars().all(|(chars, is_sep)| { if is_sep { diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index 64de772c..2afab507 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -42,6 +42,7 @@ pub fn generate_parser_in_directory( repo_path: &PathBuf, grammar_path: Option<&str>, properties_only: bool, + report_symbol_name: Option<&str>, ) -> Result<()> { let src_path = repo_path.join("src"); let header_path = src_path.join("tree_sitter"); @@ -102,6 +103,7 @@ pub fn generate_parser_in_directory( lexical_grammar, inlines, simple_aliases, + report_symbol_name, )?; write_file(&src_path.join("parser.c"), c_code)?; @@ -132,6 +134,7 @@ pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String lexical_grammar, inlines, simple_aliases, + None, )?; Ok((input_grammar.name, parser.c_code)) } @@ -142,6 +145,7 @@ fn generate_parser_for_grammar_with_opts( lexical_grammar: LexicalGrammar, inlines: InlinedProductionMap, simple_aliases: AliasMap, + report_symbol_name: Option<&str>, ) -> Result { let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &inlines)?; let node_types_json = node_types::generate_node_types_json( @@ -156,6 +160,7 @@ fn generate_parser_for_grammar_with_opts( &simple_aliases, &variable_info, &inlines, + report_symbol_name, )?; let c_code = render_c_code( name, diff --git a/cli/src/main.rs b/cli/src/main.rs index f4565f34..80be798a 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -39,6 +39,12 @@ fn run() -> error::Result<()> { .arg(Arg::with_name("grammar-path").index(1)) .arg(Arg::with_name("log").long("log")) .arg(Arg::with_name("properties-only").long("properties")) + .arg( + Arg::with_name("report-states-for-rule") + .long("report-states-for-rule") + .value_name("rule-name") + .takes_value(true), + ) .arg(Arg::with_name("no-minimize").long("no-minimize")), ) .subcommand( @@ -121,10 +127,22 @@ fn run() -> error::Result<()> { } else if let Some(matches) = matches.subcommand_matches("generate") { let grammar_path = matches.value_of("grammar-path"); let properties_only = matches.is_present("properties-only"); + let report_symbol_name = matches.value_of("report-states-for-rule").or_else(|| { + if matches.is_present("report-states") { + Some("") + } else { + None + } + }); if matches.is_present("log") { logger::init(); } - generate::generate_parser_in_directory(¤t_dir, grammar_path, properties_only)?; + generate::generate_parser_in_directory( + ¤t_dir, + grammar_path, + properties_only, + report_symbol_name, + )?; } else if let Some(matches) = matches.subcommand_matches("test") { let debug = matches.is_present("debug"); let debug_graph = matches.is_present("debug-graph");