From 108ca989ea372464426999ba2aae3f33a706b87d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 23 Jan 2019 15:13:06 -0800 Subject: [PATCH] Start work on including child refs in generated parsers --- .../build_tables/build_parse_table.rs | 29 ++++++----- cli/src/generate/build_tables/item.rs | 1 + .../build_tables/minimize_parse_table.rs | 2 +- cli/src/generate/grammars.rs | 13 +++++ cli/src/generate/parse_grammar.rs | 2 +- .../prepare_grammar/flatten_grammar.rs | 51 +++++++++++++++++++ cli/src/generate/render.rs | 24 ++++----- cli/src/generate/rules.rs | 7 +++ cli/src/generate/tables.rs | 12 +++-- 9 files changed, 111 insertions(+), 30 deletions(-) diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 5351f72e..463dca97 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -6,7 +6,7 @@ use crate::generate::grammars::{ }; use crate::generate::rules::{Alias, Associativity, Symbol, SymbolType}; use crate::generate::tables::{ - AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, + ChildInfoSequenceId, ChildInfo, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, }; use core::ops::Range; use hashbrown::hash_map::Entry; @@ -47,7 +47,7 @@ struct ParseTableBuilder<'a> { impl<'a> ParseTableBuilder<'a> { fn build(mut self) -> Result { // Ensure that the empty alias sequence has index 0. - self.parse_table.alias_sequences.push(Vec::new()); + self.parse_table.child_info_sequences.push(Vec::new()); // Add the error state at index 0. self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default()); @@ -176,7 +176,7 @@ impl<'a> ParseTableBuilder<'a> { precedence: item.precedence(), associativity: item.associativity(), dynamic_precedence: item.production.dynamic_precedence, - alias_sequence_id: self.get_alias_sequence_id(item), + child_info_sequence_id: self.get_child_info_sequence_id(item), } }; @@ -645,29 +645,32 @@ impl<'a> ParseTableBuilder<'a> { } } - fn get_alias_sequence_id(&mut self, item: &ParseItem) -> AliasSequenceId { - let mut alias_sequence: Vec> = item + fn get_child_info_sequence_id(&mut self, item: &ParseItem) -> ChildInfoSequenceId { + let mut child_info_sequence: Vec = item .production .steps .iter() - .map(|s| s.alias.clone()) + .map(|s| ChildInfo { + alias: s.alias.clone(), + child_ref: s.child_ref.clone(), + }) .collect(); - while alias_sequence.last() == Some(&None) { - alias_sequence.pop(); + while child_info_sequence.last() == Some(&ChildInfo::default()) { + child_info_sequence.pop(); } if item.production.steps.len() > self.parse_table.max_aliased_production_length { self.parse_table.max_aliased_production_length = item.production.steps.len() } if let Some(index) = self .parse_table - .alias_sequences + .child_info_sequences .iter() - .position(|seq| *seq == alias_sequence) + .position(|seq| *seq == child_info_sequence) { index } else { - self.parse_table.alias_sequences.push(alias_sequence); - self.parse_table.alias_sequences.len() - 1 + self.parse_table.child_info_sequences.push(child_info_sequence); + self.parse_table.child_info_sequences.len() - 1 } } @@ -740,7 +743,7 @@ pub(crate) fn build_parse_table( parse_table: ParseTable { states: Vec::new(), symbols: Vec::new(), - alias_sequences: Vec::new(), + child_info_sequences: Vec::new(), max_aliased_production_length: 0, }, } diff --git a/cli/src/generate/build_tables/item.rs b/cli/src/generate/build_tables/item.rs index b450bb75..0fc9c5f8 100644 --- a/cli/src/generate/build_tables/item.rs +++ b/cli/src/generate/build_tables/item.rs @@ -20,6 +20,7 @@ lazy_static! { precedence: 0, associativity: None, alias: None, + child_ref: None, }], }; } diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs index 9b012afe..81a153d3 100644 --- a/cli/src/generate/build_tables/minimize_parse_table.rs +++ b/cli/src/generate/build_tables/minimize_parse_table.rs @@ -59,7 +59,7 @@ impl<'a> Minimizer<'a> { ParseAction::ShiftExtra => continue, ParseAction::Reduce { child_count: 1, - alias_sequence_id: 0, + child_info_sequence_id: 0, symbol, .. } => { diff --git a/cli/src/generate/grammars.rs b/cli/src/generate/grammars.rs index c9282da3..f4862449 100644 --- a/cli/src/generate/grammars.rs +++ b/cli/src/generate/grammars.rs @@ -54,6 +54,7 @@ pub(crate) struct ProductionStep { pub precedence: i32, pub associativity: Option, pub alias: Option, + pub child_ref: Option, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -99,6 +100,7 @@ impl ProductionStep { precedence: 0, associativity: None, alias: None, + child_ref: None, } } @@ -108,6 +110,7 @@ impl ProductionStep { precedence, associativity, alias: self.alias, + child_ref: self.child_ref, } } @@ -120,6 +123,16 @@ impl ProductionStep { value: value.to_string(), is_named, }), + child_ref: self.child_ref, + } + } + pub(crate) fn with_child_ref(self, name: &str) -> Self { + Self { + symbol: self.symbol, + precedence: self.precedence, + associativity: self.associativity, + alias: self.alias, + child_ref: Some(name.to_string()), } } } diff --git a/cli/src/generate/parse_grammar.rs b/cli/src/generate/parse_grammar.rs index 4f049572..a11140ac 100644 --- a/cli/src/generate/parse_grammar.rs +++ b/cli/src/generate/parse_grammar.rs @@ -124,7 +124,7 @@ fn parse_rule(json: RuleJSON) -> Rule { RuleJSON::PATTERN { value } => Rule::Pattern(value), RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name), RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()), - RuleJSON::REF { content, value } => parse_rule(*content), + RuleJSON::REF { content, value } => Rule::child_ref(value, parse_rule(*content)), RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()), RuleJSON::REPEAT1 { content } => Rule::repeat(parse_rule(*content)), RuleJSON::REPEAT { content } => { diff --git a/cli/src/generate/prepare_grammar/flatten_grammar.rs b/cli/src/generate/prepare_grammar/flatten_grammar.rs index 98276b7e..95071937 100644 --- a/cli/src/generate/prepare_grammar/flatten_grammar.rs +++ b/cli/src/generate/prepare_grammar/flatten_grammar.rs @@ -11,6 +11,7 @@ struct RuleFlattener { precedence_stack: Vec, associativity_stack: Vec, alias_stack: Vec, + child_ref_stack: Vec, } impl RuleFlattener { @@ -23,6 +24,7 @@ impl RuleFlattener { precedence_stack: Vec::new(), associativity_stack: Vec::new(), alias_stack: Vec::new(), + child_ref_stack: Vec::new(), } } @@ -60,6 +62,12 @@ impl RuleFlattener { self.alias_stack.push(alias); } + let mut has_child_ref = false; + if let Some(child_ref) = params.child_ref { + has_child_ref = true; + self.child_ref_stack.push(child_ref); + } + if params.dynamic_precedence.abs() > self.production.dynamic_precedence.abs() { self.production.dynamic_precedence = params.dynamic_precedence; } @@ -86,6 +94,10 @@ impl RuleFlattener { self.alias_stack.pop(); } + if has_child_ref { + self.child_ref_stack.pop(); + } + did_push } Rule::Symbol(symbol) => { @@ -94,6 +106,7 @@ impl RuleFlattener { precedence: self.precedence_stack.last().cloned().unwrap_or(0), associativity: self.associativity_stack.last().cloned(), alias: self.alias_stack.last().cloned(), + child_ref: self.child_ref_stack.last().cloned(), }); true } @@ -355,4 +368,42 @@ mod tests { }] ); } + + #[test] + fn test_flatten_grammar_with_child_refs() { + let result = flatten_variable(Variable { + name: "test".to_string(), + kind: VariableType::Named, + rule: Rule::seq(vec![ + Rule::child_ref("first-thing".to_string(), Rule::terminal(1)), + Rule::terminal(2), + Rule::choice(vec![ + Rule::Blank, + Rule::child_ref("second-thing".to_string(), Rule::terminal(3)), + ]), + ]), + }) + .unwrap(); + + assert_eq!( + result.productions, + vec![ + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(1)).with_child_ref("first-thing"), + ProductionStep::new(Symbol::terminal(2)) + ] + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(1)).with_child_ref("first-thing"), + ProductionStep::new(Symbol::terminal(2)), + ProductionStep::new(Symbol::terminal(3)).with_child_ref("second-thing"), + ] + }, + ] + ); + } } diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 86ed3dc7..05153a0c 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -67,7 +67,7 @@ impl Generator { self.add_symbol_names_list(); self.add_symbol_metadata_list(); - if self.parse_table.alias_sequences.len() > 1 { + if self.parse_table.child_info_sequences.len() > 1 { self.add_alias_sequences(); } @@ -148,9 +148,9 @@ impl Generator { self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers); } - for alias_sequence in &self.parse_table.alias_sequences { - for entry in alias_sequence { - if let Some(alias) = entry { + for child_info_sequence in &self.parse_table.child_info_sequences { + for entry in child_info_sequence { + if let Some(alias) = &entry.alias { let alias_kind = if alias.is_named { VariableType::Named } else { @@ -307,14 +307,14 @@ impl Generator { add_line!( self, "static TSSymbol ts_alias_sequences[{}][MAX_ALIAS_SEQUENCE_LENGTH] = {{", - self.parse_table.alias_sequences.len() + self.parse_table.child_info_sequences.len() ); indent!(self); - for (i, sequence) in self.parse_table.alias_sequences.iter().enumerate().skip(1) { + for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate().skip(1) { add_line!(self, "[{}] = {{", i); indent!(self); - for (j, alias) in sequence.iter().enumerate() { - if let Some(alias) = alias { + for (j, child_info) in sequence.iter().enumerate() { + if let Some(alias) = &child_info.alias { add_line!(self, "[{}] = {},", j, self.alias_ids[&alias]); } } @@ -686,15 +686,15 @@ impl Generator { symbol, child_count, dynamic_precedence, - alias_sequence_id, + child_info_sequence_id, .. } => { add!(self, "REDUCE({}, {}", self.symbol_ids[&symbol], child_count); if dynamic_precedence != 0 { add!(self, ", .dynamic_precedence = {}", dynamic_precedence); } - if alias_sequence_id != 0 { - add!(self, ", .alias_sequence_id = {}", alias_sequence_id); + if child_info_sequence_id != 0 { + add!(self, ", .alias_sequence_id = {}", child_info_sequence_id); } add!(self, ")"); } @@ -759,7 +759,7 @@ impl Generator { add_line!(self, ".lex_modes = ts_lex_modes,"); add_line!(self, ".symbol_names = ts_symbol_names,"); - if self.parse_table.alias_sequences.len() > 1 { + if self.parse_table.child_info_sequences.len() > 1 { add_line!( self, ".alias_sequences = (const TSSymbol *)ts_alias_sequences," diff --git a/cli/src/generate/rules.rs b/cli/src/generate/rules.rs index 09a20294..f1939cb1 100644 --- a/cli/src/generate/rules.rs +++ b/cli/src/generate/rules.rs @@ -32,6 +32,7 @@ pub(crate) struct MetadataParams { pub is_active: bool, pub is_main_token: bool, pub alias: Option, + pub child_ref: Option, } #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] @@ -57,6 +58,12 @@ pub(crate) enum Rule { } impl Rule { + pub fn child_ref(name: String, content: Rule) -> Self { + add_metadata(content, move |params| { + params.child_ref = Some(name); + }) + } + pub fn alias(content: Rule, value: String, is_named: bool) -> Self { add_metadata(content, move |params| { params.alias = Some(Alias { is_named, value }); diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index 6c3da68e..f798544b 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -2,7 +2,7 @@ use super::nfa::CharacterSet; use super::rules::{Alias, Associativity, Symbol}; use hashbrown::HashMap; -pub(crate) type AliasSequenceId = usize; +pub(crate) type ChildInfoSequenceId = usize; pub(crate) type ParseStateId = usize; pub(crate) type LexStateId = usize; @@ -21,7 +21,7 @@ pub(crate) enum ParseAction { precedence: i32, dynamic_precedence: i32, associativity: Option, - alias_sequence_id: AliasSequenceId, + child_info_sequence_id: ChildInfoSequenceId, }, } @@ -39,11 +39,17 @@ pub(crate) struct ParseState { pub unfinished_item_signature: u64, } +#[derive(Debug, Default, PartialEq, Eq)] +pub(crate) struct ChildInfo { + pub alias: Option, + pub child_ref: Option, +} + #[derive(Debug, PartialEq, Eq)] pub(crate) struct ParseTable { pub states: Vec, pub symbols: Vec, - pub alias_sequences: Vec>>, + pub child_info_sequences: Vec>, pub max_aliased_production_length: usize, }