Add --report-states flag for reporting state counts for each rule
This commit is contained in:
parent
82ff542d3b
commit
aeb2f895b4
5 changed files with 165 additions and 37 deletions
|
|
@ -25,10 +25,11 @@ struct AuxiliarySymbolInfo {
|
|||
type SymbolSequence = Vec<Symbol>;
|
||||
type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
|
||||
|
||||
pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
|
||||
|
||||
struct ParseStateQueueEntry {
|
||||
preceding_symbols: SymbolSequence,
|
||||
preceding_auxiliary_symbols: AuxiliarySymbolSequence,
|
||||
state_id: ParseStateId,
|
||||
preceding_auxiliary_symbols: AuxiliarySymbolSequence,
|
||||
}
|
||||
|
||||
struct ParseTableBuilder<'a> {
|
||||
|
|
@ -38,13 +39,13 @@ struct ParseTableBuilder<'a> {
|
|||
variable_info: &'a Vec<VariableInfo>,
|
||||
core_ids_by_core: HashMap<ParseItemSetCore<'a>, usize>,
|
||||
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
|
||||
item_sets_by_state_id: Vec<ParseItemSet<'a>>,
|
||||
parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
|
||||
parse_state_queue: VecDeque<ParseStateQueueEntry>,
|
||||
parse_table: ParseTable,
|
||||
}
|
||||
|
||||
impl<'a> ParseTableBuilder<'a> {
|
||||
fn build(mut self) -> Result<ParseTable> {
|
||||
fn build(mut self) -> Result<(ParseTable, Vec<ParseStateInfo<'a>>)> {
|
||||
// Ensure that the empty alias sequence has index 0.
|
||||
self.parse_table
|
||||
.production_infos
|
||||
|
|
@ -70,9 +71,10 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
while let Some(entry) = self.parse_state_queue.pop_front() {
|
||||
let item_set = self
|
||||
.item_set_builder
|
||||
.transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
|
||||
.transitive_closure(&self.parse_state_info_by_id[entry.state_id].1);
|
||||
|
||||
self.add_actions(
|
||||
entry.preceding_symbols,
|
||||
self.parse_state_info_by_id[entry.state_id].0.clone(),
|
||||
entry.preceding_auxiliary_symbols,
|
||||
entry.state_id,
|
||||
item_set,
|
||||
|
|
@ -81,7 +83,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
|
||||
self.remove_precedences();
|
||||
|
||||
Ok(self.parse_table)
|
||||
Ok((self.parse_table, self.parse_state_info_by_id))
|
||||
}
|
||||
|
||||
fn add_parse_state(
|
||||
|
|
@ -104,7 +106,9 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
};
|
||||
|
||||
let state_id = self.parse_table.states.len();
|
||||
self.item_sets_by_state_id.push(v.key().clone());
|
||||
self.parse_state_info_by_id
|
||||
.push((preceding_symbols.clone(), v.key().clone()));
|
||||
|
||||
self.parse_table.states.push(ParseState {
|
||||
id: state_id,
|
||||
lex_state_id: 0,
|
||||
|
|
@ -115,7 +119,6 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
});
|
||||
self.parse_state_queue.push_back(ParseStateQueueEntry {
|
||||
state_id,
|
||||
preceding_symbols: preceding_symbols.clone(),
|
||||
preceding_auxiliary_symbols: preceding_auxiliary_symbols.clone(),
|
||||
});
|
||||
v.insert(state_id);
|
||||
|
|
@ -751,12 +754,12 @@ fn populate_following_tokens(
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) fn build_parse_table(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
inlines: &InlinedProductionMap,
|
||||
variable_info: &Vec<VariableInfo>,
|
||||
) -> Result<(ParseTable, Vec<TokenSet>)> {
|
||||
pub(crate) fn build_parse_table<'a>(
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
inlines: &'a InlinedProductionMap,
|
||||
variable_info: &'a Vec<VariableInfo>,
|
||||
) -> Result<(ParseTable, Vec<TokenSet>, Vec<ParseStateInfo<'a>>)> {
|
||||
let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
|
||||
let mut following_tokens = vec![TokenSet::new(); lexical_grammar.variables.len()];
|
||||
populate_following_tokens(
|
||||
|
|
@ -766,14 +769,14 @@ pub(crate) fn build_parse_table(
|
|||
&item_set_builder,
|
||||
);
|
||||
|
||||
let table = ParseTableBuilder {
|
||||
let (table, item_sets) = ParseTableBuilder {
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
item_set_builder,
|
||||
variable_info,
|
||||
state_ids_by_item_set: HashMap::new(),
|
||||
core_ids_by_core: HashMap::new(),
|
||||
item_sets_by_state_id: Vec::new(),
|
||||
parse_state_info_by_id: Vec::new(),
|
||||
parse_state_queue: VecDeque::new(),
|
||||
parse_table: ParseTable {
|
||||
states: Vec::new(),
|
||||
|
|
@ -785,5 +788,5 @@ pub(crate) fn build_parse_table(
|
|||
}
|
||||
.build()?;
|
||||
|
||||
Ok((table, following_tokens))
|
||||
Ok((table, following_tokens, item_sets))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,8 @@
|
|||
use crate::generate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
|
||||
use crate::generate::rules::{Associativity, Symbol, SymbolType, TokenSet};
|
||||
use crate::generate::grammars::{
|
||||
LexicalGrammar, Production, ProductionStep, SyntaxGrammar,
|
||||
};
|
||||
use crate::generate::rules::Associativity;
|
||||
use crate::generate::rules::{Symbol, SymbolType, TokenSet};
|
||||
use lazy_static::lazy_static;
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt;
|
||||
|
|
@ -161,12 +164,14 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
|
|||
for (i, step) in self.0.production.steps.iter().enumerate() {
|
||||
if i == self.0.step_index as usize {
|
||||
write!(f, " •")?;
|
||||
if step.precedence != 0 || step.associativity.is_some() {
|
||||
write!(
|
||||
f,
|
||||
" (prec {:?} assoc {:?})",
|
||||
step.precedence, step.associativity
|
||||
)?;
|
||||
if let Some(associativity) = step.associativity {
|
||||
if step.precedence != 0 {
|
||||
write!(f, " ({} {:?})", step.precedence, associativity)?;
|
||||
} else {
|
||||
write!(f, " ({:?})", associativity)?;
|
||||
}
|
||||
} else if step.precedence != 0 {
|
||||
write!(f, " ({})", step.precedence)?;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -184,19 +189,21 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
|
|||
}
|
||||
|
||||
if let Some(alias) = &step.alias {
|
||||
write!(f, " (alias {})", alias.value)?;
|
||||
write!(f, "@{}", alias.value)?;
|
||||
}
|
||||
}
|
||||
|
||||
if self.0.is_done() {
|
||||
write!(f, " •")?;
|
||||
if let Some(step) = self.0.production.steps.last() {
|
||||
if step.precedence != 0 || step.associativity.is_some() {
|
||||
write!(
|
||||
f,
|
||||
" (prec {:?} assoc {:?})",
|
||||
step.precedence, step.associativity
|
||||
)?;
|
||||
if let Some(associativity) = step.associativity {
|
||||
if step.precedence != 0 {
|
||||
write!(f, " ({} {:?})", step.precedence, associativity)?;
|
||||
} else {
|
||||
write!(f, " ({:?})", associativity)?;
|
||||
}
|
||||
} else if step.precedence != 0 {
|
||||
write!(f, " ({})", step.precedence)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ mod minimize_parse_table;
|
|||
mod token_conflicts;
|
||||
|
||||
use self::build_lex_table::build_lex_table;
|
||||
use self::build_parse_table::build_parse_table;
|
||||
use self::build_parse_table::{build_parse_table, ParseStateInfo};
|
||||
use self::coincident_tokens::CoincidentTokenIndex;
|
||||
use self::minimize_parse_table::minimize_parse_table;
|
||||
use self::token_conflicts::TokenConflictMap;
|
||||
|
|
@ -18,7 +18,7 @@ use crate::generate::node_types::VariableInfo;
|
|||
use crate::generate::rules::{AliasMap, Symbol, SymbolType, TokenSet};
|
||||
use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
|
||||
use log::info;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{BTreeSet, HashMap};
|
||||
|
||||
pub(crate) fn build_tables(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
|
|
@ -26,8 +26,9 @@ pub(crate) fn build_tables(
|
|||
simple_aliases: &AliasMap,
|
||||
variable_info: &Vec<VariableInfo>,
|
||||
inlines: &InlinedProductionMap,
|
||||
report_symbol_name: Option<&str>,
|
||||
) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
|
||||
let (mut parse_table, following_tokens) =
|
||||
let (mut parse_table, following_tokens, parse_state_info) =
|
||||
build_parse_table(syntax_grammar, lexical_grammar, inlines, variable_info)?;
|
||||
let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
|
||||
let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
|
||||
|
|
@ -65,6 +66,16 @@ pub(crate) fn build_tables(
|
|||
);
|
||||
populate_external_lex_states(&mut parse_table, syntax_grammar);
|
||||
mark_fragile_tokens(&mut parse_table, lexical_grammar, &token_conflict_map);
|
||||
|
||||
if let Some(report_symbol_name) = report_symbol_name {
|
||||
report_state_info(
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
&parse_table,
|
||||
&parse_state_info,
|
||||
report_symbol_name,
|
||||
);
|
||||
}
|
||||
Ok((
|
||||
parse_table,
|
||||
main_lex_table,
|
||||
|
|
@ -372,6 +383,90 @@ fn mark_fragile_tokens(
|
|||
}
|
||||
}
|
||||
|
||||
fn report_state_info<'a>(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
parse_table: &ParseTable,
|
||||
parse_state_info: &Vec<ParseStateInfo<'a>>,
|
||||
report_symbol_name: &'a str,
|
||||
) {
|
||||
let mut all_state_indices = BTreeSet::new();
|
||||
let mut symbols_with_state_indices = (0..syntax_grammar.variables.len())
|
||||
.map(|i| (Symbol::non_terminal(i), BTreeSet::new()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for (i, state) in parse_table.states.iter().enumerate() {
|
||||
all_state_indices.insert(i);
|
||||
let item_set = &parse_state_info[state.id];
|
||||
for (item, _) in item_set.1.entries.iter() {
|
||||
if !item.is_augmented() {
|
||||
symbols_with_state_indices[item.variable_index as usize]
|
||||
.1
|
||||
.insert(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
symbols_with_state_indices.sort_unstable_by_key(|(_, states)| -(states.len() as i32));
|
||||
|
||||
let max_symbol_name_length = syntax_grammar
|
||||
.variables
|
||||
.iter()
|
||||
.map(|v| v.name.len())
|
||||
.max()
|
||||
.unwrap();
|
||||
for (symbol, states) in &symbols_with_state_indices {
|
||||
eprintln!(
|
||||
"{:width$}\t{}",
|
||||
syntax_grammar.variables[symbol.index].name,
|
||||
states.len(),
|
||||
width = max_symbol_name_length
|
||||
);
|
||||
}
|
||||
eprintln!("");
|
||||
|
||||
let state_indices = if report_symbol_name == "*" {
|
||||
Some(&all_state_indices)
|
||||
} else {
|
||||
symbols_with_state_indices
|
||||
.iter()
|
||||
.find_map(|(symbol, state_indices)| {
|
||||
if syntax_grammar.variables[symbol.index].name == report_symbol_name {
|
||||
Some(state_indices)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
};
|
||||
|
||||
if let Some(state_indices) = state_indices {
|
||||
let mut state_indices = state_indices.into_iter().cloned().collect::<Vec<_>>();
|
||||
state_indices.sort_unstable_by_key(|i| (parse_table.states[*i].core_id, *i));
|
||||
|
||||
for state_index in state_indices {
|
||||
let id = parse_table.states[state_index].id;
|
||||
let (preceding_symbols, item_set) = &parse_state_info[id];
|
||||
eprintln!("state index: {}", state_index);
|
||||
eprintln!("state id: {}", id);
|
||||
eprint!("symbol sequence:");
|
||||
for symbol in preceding_symbols {
|
||||
let name = if symbol.is_terminal() {
|
||||
&lexical_grammar.variables[symbol.index].name
|
||||
} else if symbol.is_external() {
|
||||
&syntax_grammar.external_tokens[symbol.index].name
|
||||
} else {
|
||||
&syntax_grammar.variables[symbol.index].name
|
||||
};
|
||||
eprint!(" {}", name);
|
||||
}
|
||||
eprintln!(
|
||||
"\nitems:\n{}",
|
||||
self::item::ParseItemSetDisplay(&item_set, syntax_grammar, lexical_grammar,),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn all_chars_are_alphabetical(cursor: &NfaCursor) -> bool {
|
||||
cursor.transition_chars().all(|(chars, is_sep)| {
|
||||
if is_sep {
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@ pub fn generate_parser_in_directory(
|
|||
repo_path: &PathBuf,
|
||||
grammar_path: Option<&str>,
|
||||
properties_only: bool,
|
||||
report_symbol_name: Option<&str>,
|
||||
) -> Result<()> {
|
||||
let src_path = repo_path.join("src");
|
||||
let header_path = src_path.join("tree_sitter");
|
||||
|
|
@ -102,6 +103,7 @@ pub fn generate_parser_in_directory(
|
|||
lexical_grammar,
|
||||
inlines,
|
||||
simple_aliases,
|
||||
report_symbol_name,
|
||||
)?;
|
||||
|
||||
write_file(&src_path.join("parser.c"), c_code)?;
|
||||
|
|
@ -132,6 +134,7 @@ pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String
|
|||
lexical_grammar,
|
||||
inlines,
|
||||
simple_aliases,
|
||||
None,
|
||||
)?;
|
||||
Ok((input_grammar.name, parser.c_code))
|
||||
}
|
||||
|
|
@ -142,6 +145,7 @@ fn generate_parser_for_grammar_with_opts(
|
|||
lexical_grammar: LexicalGrammar,
|
||||
inlines: InlinedProductionMap,
|
||||
simple_aliases: AliasMap,
|
||||
report_symbol_name: Option<&str>,
|
||||
) -> Result<GeneratedParser> {
|
||||
let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &inlines)?;
|
||||
let node_types_json = node_types::generate_node_types_json(
|
||||
|
|
@ -156,6 +160,7 @@ fn generate_parser_for_grammar_with_opts(
|
|||
&simple_aliases,
|
||||
&variable_info,
|
||||
&inlines,
|
||||
report_symbol_name,
|
||||
)?;
|
||||
let c_code = render_c_code(
|
||||
name,
|
||||
|
|
|
|||
|
|
@ -39,6 +39,12 @@ fn run() -> error::Result<()> {
|
|||
.arg(Arg::with_name("grammar-path").index(1))
|
||||
.arg(Arg::with_name("log").long("log"))
|
||||
.arg(Arg::with_name("properties-only").long("properties"))
|
||||
.arg(
|
||||
Arg::with_name("report-states-for-rule")
|
||||
.long("report-states-for-rule")
|
||||
.value_name("rule-name")
|
||||
.takes_value(true),
|
||||
)
|
||||
.arg(Arg::with_name("no-minimize").long("no-minimize")),
|
||||
)
|
||||
.subcommand(
|
||||
|
|
@ -121,10 +127,22 @@ fn run() -> error::Result<()> {
|
|||
} else if let Some(matches) = matches.subcommand_matches("generate") {
|
||||
let grammar_path = matches.value_of("grammar-path");
|
||||
let properties_only = matches.is_present("properties-only");
|
||||
let report_symbol_name = matches.value_of("report-states-for-rule").or_else(|| {
|
||||
if matches.is_present("report-states") {
|
||||
Some("")
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
if matches.is_present("log") {
|
||||
logger::init();
|
||||
}
|
||||
generate::generate_parser_in_directory(¤t_dir, grammar_path, properties_only)?;
|
||||
generate::generate_parser_in_directory(
|
||||
¤t_dir,
|
||||
grammar_path,
|
||||
properties_only,
|
||||
report_symbol_name,
|
||||
)?;
|
||||
} else if let Some(matches) = matches.subcommand_matches("test") {
|
||||
let debug = matches.is_present("debug");
|
||||
let debug_graph = matches.is_present("debug-graph");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue