Add --report-states flag for reporting state counts for each rule

This commit is contained in:
Max Brunsfeld 2019-05-20 13:25:01 -07:00
parent 82ff542d3b
commit aeb2f895b4
5 changed files with 165 additions and 37 deletions

View file

@ -25,10 +25,11 @@ struct AuxiliarySymbolInfo {
type SymbolSequence = Vec<Symbol>;
type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
struct ParseStateQueueEntry {
preceding_symbols: SymbolSequence,
preceding_auxiliary_symbols: AuxiliarySymbolSequence,
state_id: ParseStateId,
preceding_auxiliary_symbols: AuxiliarySymbolSequence,
}
struct ParseTableBuilder<'a> {
@ -38,13 +39,13 @@ struct ParseTableBuilder<'a> {
variable_info: &'a Vec<VariableInfo>,
core_ids_by_core: HashMap<ParseItemSetCore<'a>, usize>,
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
item_sets_by_state_id: Vec<ParseItemSet<'a>>,
parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
parse_state_queue: VecDeque<ParseStateQueueEntry>,
parse_table: ParseTable,
}
impl<'a> ParseTableBuilder<'a> {
fn build(mut self) -> Result<ParseTable> {
fn build(mut self) -> Result<(ParseTable, Vec<ParseStateInfo<'a>>)> {
// Ensure that the empty alias sequence has index 0.
self.parse_table
.production_infos
@ -70,9 +71,10 @@ impl<'a> ParseTableBuilder<'a> {
while let Some(entry) = self.parse_state_queue.pop_front() {
let item_set = self
.item_set_builder
.transitive_closure(&self.item_sets_by_state_id[entry.state_id]);
.transitive_closure(&self.parse_state_info_by_id[entry.state_id].1);
self.add_actions(
entry.preceding_symbols,
self.parse_state_info_by_id[entry.state_id].0.clone(),
entry.preceding_auxiliary_symbols,
entry.state_id,
item_set,
@ -81,7 +83,7 @@ impl<'a> ParseTableBuilder<'a> {
self.remove_precedences();
Ok(self.parse_table)
Ok((self.parse_table, self.parse_state_info_by_id))
}
fn add_parse_state(
@ -104,7 +106,9 @@ impl<'a> ParseTableBuilder<'a> {
};
let state_id = self.parse_table.states.len();
self.item_sets_by_state_id.push(v.key().clone());
self.parse_state_info_by_id
.push((preceding_symbols.clone(), v.key().clone()));
self.parse_table.states.push(ParseState {
id: state_id,
lex_state_id: 0,
@ -115,7 +119,6 @@ impl<'a> ParseTableBuilder<'a> {
});
self.parse_state_queue.push_back(ParseStateQueueEntry {
state_id,
preceding_symbols: preceding_symbols.clone(),
preceding_auxiliary_symbols: preceding_auxiliary_symbols.clone(),
});
v.insert(state_id);
@ -751,12 +754,12 @@ fn populate_following_tokens(
}
}
pub(crate) fn build_parse_table(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
inlines: &InlinedProductionMap,
variable_info: &Vec<VariableInfo>,
) -> Result<(ParseTable, Vec<TokenSet>)> {
pub(crate) fn build_parse_table<'a>(
syntax_grammar: &'a SyntaxGrammar,
lexical_grammar: &'a LexicalGrammar,
inlines: &'a InlinedProductionMap,
variable_info: &'a Vec<VariableInfo>,
) -> Result<(ParseTable, Vec<TokenSet>, Vec<ParseStateInfo<'a>>)> {
let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
let mut following_tokens = vec![TokenSet::new(); lexical_grammar.variables.len()];
populate_following_tokens(
@ -766,14 +769,14 @@ pub(crate) fn build_parse_table(
&item_set_builder,
);
let table = ParseTableBuilder {
let (table, item_sets) = ParseTableBuilder {
syntax_grammar,
lexical_grammar,
item_set_builder,
variable_info,
state_ids_by_item_set: HashMap::new(),
core_ids_by_core: HashMap::new(),
item_sets_by_state_id: Vec::new(),
parse_state_info_by_id: Vec::new(),
parse_state_queue: VecDeque::new(),
parse_table: ParseTable {
states: Vec::new(),
@ -785,5 +788,5 @@ pub(crate) fn build_parse_table(
}
.build()?;
Ok((table, following_tokens))
Ok((table, following_tokens, item_sets))
}

View file

@ -1,5 +1,8 @@
use crate::generate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
use crate::generate::rules::{Associativity, Symbol, SymbolType, TokenSet};
use crate::generate::grammars::{
LexicalGrammar, Production, ProductionStep, SyntaxGrammar,
};
use crate::generate::rules::Associativity;
use crate::generate::rules::{Symbol, SymbolType, TokenSet};
use lazy_static::lazy_static;
use std::cmp::Ordering;
use std::fmt;
@ -161,12 +164,14 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
for (i, step) in self.0.production.steps.iter().enumerate() {
if i == self.0.step_index as usize {
write!(f, "")?;
if step.precedence != 0 || step.associativity.is_some() {
write!(
f,
" (prec {:?} assoc {:?})",
step.precedence, step.associativity
)?;
if let Some(associativity) = step.associativity {
if step.precedence != 0 {
write!(f, " ({} {:?})", step.precedence, associativity)?;
} else {
write!(f, " ({:?})", associativity)?;
}
} else if step.precedence != 0 {
write!(f, " ({})", step.precedence)?;
}
}
@ -184,19 +189,21 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
}
if let Some(alias) = &step.alias {
write!(f, " (alias {})", alias.value)?;
write!(f, "@{}", alias.value)?;
}
}
if self.0.is_done() {
write!(f, "")?;
if let Some(step) = self.0.production.steps.last() {
if step.precedence != 0 || step.associativity.is_some() {
write!(
f,
" (prec {:?} assoc {:?})",
step.precedence, step.associativity
)?;
if let Some(associativity) = step.associativity {
if step.precedence != 0 {
write!(f, " ({} {:?})", step.precedence, associativity)?;
} else {
write!(f, " ({:?})", associativity)?;
}
} else if step.precedence != 0 {
write!(f, " ({})", step.precedence)?;
}
}
}

View file

@ -7,7 +7,7 @@ mod minimize_parse_table;
mod token_conflicts;
use self::build_lex_table::build_lex_table;
use self::build_parse_table::build_parse_table;
use self::build_parse_table::{build_parse_table, ParseStateInfo};
use self::coincident_tokens::CoincidentTokenIndex;
use self::minimize_parse_table::minimize_parse_table;
use self::token_conflicts::TokenConflictMap;
@ -18,7 +18,7 @@ use crate::generate::node_types::VariableInfo;
use crate::generate::rules::{AliasMap, Symbol, SymbolType, TokenSet};
use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
use log::info;
use std::collections::HashMap;
use std::collections::{BTreeSet, HashMap};
pub(crate) fn build_tables(
syntax_grammar: &SyntaxGrammar,
@ -26,8 +26,9 @@ pub(crate) fn build_tables(
simple_aliases: &AliasMap,
variable_info: &Vec<VariableInfo>,
inlines: &InlinedProductionMap,
report_symbol_name: Option<&str>,
) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
let (mut parse_table, following_tokens) =
let (mut parse_table, following_tokens, parse_state_info) =
build_parse_table(syntax_grammar, lexical_grammar, inlines, variable_info)?;
let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
@ -65,6 +66,16 @@ pub(crate) fn build_tables(
);
populate_external_lex_states(&mut parse_table, syntax_grammar);
mark_fragile_tokens(&mut parse_table, lexical_grammar, &token_conflict_map);
if let Some(report_symbol_name) = report_symbol_name {
report_state_info(
&syntax_grammar,
&lexical_grammar,
&parse_table,
&parse_state_info,
report_symbol_name,
);
}
Ok((
parse_table,
main_lex_table,
@ -372,6 +383,90 @@ fn mark_fragile_tokens(
}
}
fn report_state_info<'a>(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
parse_table: &ParseTable,
parse_state_info: &Vec<ParseStateInfo<'a>>,
report_symbol_name: &'a str,
) {
let mut all_state_indices = BTreeSet::new();
let mut symbols_with_state_indices = (0..syntax_grammar.variables.len())
.map(|i| (Symbol::non_terminal(i), BTreeSet::new()))
.collect::<Vec<_>>();
for (i, state) in parse_table.states.iter().enumerate() {
all_state_indices.insert(i);
let item_set = &parse_state_info[state.id];
for (item, _) in item_set.1.entries.iter() {
if !item.is_augmented() {
symbols_with_state_indices[item.variable_index as usize]
.1
.insert(i);
}
}
}
symbols_with_state_indices.sort_unstable_by_key(|(_, states)| -(states.len() as i32));
let max_symbol_name_length = syntax_grammar
.variables
.iter()
.map(|v| v.name.len())
.max()
.unwrap();
for (symbol, states) in &symbols_with_state_indices {
eprintln!(
"{:width$}\t{}",
syntax_grammar.variables[symbol.index].name,
states.len(),
width = max_symbol_name_length
);
}
eprintln!("");
let state_indices = if report_symbol_name == "*" {
Some(&all_state_indices)
} else {
symbols_with_state_indices
.iter()
.find_map(|(symbol, state_indices)| {
if syntax_grammar.variables[symbol.index].name == report_symbol_name {
Some(state_indices)
} else {
None
}
})
};
if let Some(state_indices) = state_indices {
let mut state_indices = state_indices.into_iter().cloned().collect::<Vec<_>>();
state_indices.sort_unstable_by_key(|i| (parse_table.states[*i].core_id, *i));
for state_index in state_indices {
let id = parse_table.states[state_index].id;
let (preceding_symbols, item_set) = &parse_state_info[id];
eprintln!("state index: {}", state_index);
eprintln!("state id: {}", id);
eprint!("symbol sequence:");
for symbol in preceding_symbols {
let name = if symbol.is_terminal() {
&lexical_grammar.variables[symbol.index].name
} else if symbol.is_external() {
&syntax_grammar.external_tokens[symbol.index].name
} else {
&syntax_grammar.variables[symbol.index].name
};
eprint!(" {}", name);
}
eprintln!(
"\nitems:\n{}",
self::item::ParseItemSetDisplay(&item_set, syntax_grammar, lexical_grammar,),
);
}
}
}
fn all_chars_are_alphabetical(cursor: &NfaCursor) -> bool {
cursor.transition_chars().all(|(chars, is_sep)| {
if is_sep {

View file

@ -42,6 +42,7 @@ pub fn generate_parser_in_directory(
repo_path: &PathBuf,
grammar_path: Option<&str>,
properties_only: bool,
report_symbol_name: Option<&str>,
) -> Result<()> {
let src_path = repo_path.join("src");
let header_path = src_path.join("tree_sitter");
@ -102,6 +103,7 @@ pub fn generate_parser_in_directory(
lexical_grammar,
inlines,
simple_aliases,
report_symbol_name,
)?;
write_file(&src_path.join("parser.c"), c_code)?;
@ -132,6 +134,7 @@ pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String
lexical_grammar,
inlines,
simple_aliases,
None,
)?;
Ok((input_grammar.name, parser.c_code))
}
@ -142,6 +145,7 @@ fn generate_parser_for_grammar_with_opts(
lexical_grammar: LexicalGrammar,
inlines: InlinedProductionMap,
simple_aliases: AliasMap,
report_symbol_name: Option<&str>,
) -> Result<GeneratedParser> {
let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &inlines)?;
let node_types_json = node_types::generate_node_types_json(
@ -156,6 +160,7 @@ fn generate_parser_for_grammar_with_opts(
&simple_aliases,
&variable_info,
&inlines,
report_symbol_name,
)?;
let c_code = render_c_code(
name,

View file

@ -39,6 +39,12 @@ fn run() -> error::Result<()> {
.arg(Arg::with_name("grammar-path").index(1))
.arg(Arg::with_name("log").long("log"))
.arg(Arg::with_name("properties-only").long("properties"))
.arg(
Arg::with_name("report-states-for-rule")
.long("report-states-for-rule")
.value_name("rule-name")
.takes_value(true),
)
.arg(Arg::with_name("no-minimize").long("no-minimize")),
)
.subcommand(
@ -121,10 +127,22 @@ fn run() -> error::Result<()> {
} else if let Some(matches) = matches.subcommand_matches("generate") {
let grammar_path = matches.value_of("grammar-path");
let properties_only = matches.is_present("properties-only");
let report_symbol_name = matches.value_of("report-states-for-rule").or_else(|| {
if matches.is_present("report-states") {
Some("")
} else {
None
}
});
if matches.is_present("log") {
logger::init();
}
generate::generate_parser_in_directory(&current_dir, grammar_path, properties_only)?;
generate::generate_parser_in_directory(
&current_dir,
grammar_path,
properties_only,
report_symbol_name,
)?;
} else if let Some(matches) = matches.subcommand_matches("test") {
let debug = matches.is_present("debug");
let debug_graph = matches.is_present("debug-graph");