From 7300249d20204999b545d8509bdf9e6a98e36070 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 5 Mar 2021 14:49:28 -0800 Subject: [PATCH] Fix incorrect merging of states with different inherited fields Co-authored-by: Douglas Creager --- .../build_tables/build_parse_table.rs | 22 ++++++- cli/src/generate/build_tables/item.rs | 60 +++++++++++++++---- .../generate/build_tables/item_set_builder.rs | 13 +--- cli/src/generate/grammars.rs | 4 ++ 4 files changed, 77 insertions(+), 22 deletions(-) diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 3b4c8cd8..2231d5b0 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -100,6 +100,7 @@ impl<'a> ParseTableBuilder<'a> { variable_index: extra_non_terminal.index as u32, production, step_index: 1, + has_preceding_inherited_fields: false, }, &[Symbol::end_of_nonterminal_extra()] .iter() @@ -197,16 +198,33 @@ impl<'a> ParseTableBuilder<'a> { // next symbol. Advance the item to its next step and insert the resulting // item into the successor item set. if let Some(next_symbol) = item.symbol() { - let successor = item.successor(); + let mut successor = item.successor(); if next_symbol.is_non_terminal() { + let variable = &self.syntax_grammar.variables[next_symbol.index]; + // Keep track of where auxiliary non-terminals (repeat symbols) are // used within visible symbols. This information may be needed later // for conflict resolution. - if self.syntax_grammar.variables[next_symbol.index].is_auxiliary() { + if variable.is_auxiliary() { preceding_auxiliary_symbols .push(self.get_auxiliary_node_info(&item_set, next_symbol)); } + // For most parse items, the symbols associated with the preceding children + // don't matter: they have no effect on the REDUCE action that would be + // performed at the end of the item. But the symbols *do* matter for + // children that are hidden and have fields, because those fields are + // "inherited" by the parent node. + // + // If this item has consumed a hidden child with fields, then the symbols + // of its preceding children need to be taken into account when comparing + // it with other items. + if variable.is_hidden() + && !self.variable_info[next_symbol.index].fields.is_empty() + { + successor.has_preceding_inherited_fields = true; + } + non_terminal_successors .entry(next_symbol) .or_insert_with(|| ParseItemSet::default()) diff --git a/cli/src/generate/build_tables/item.rs b/cli/src/generate/build_tables/item.rs index e3205cf8..8917f5c5 100644 --- a/cli/src/generate/build_tables/item.rs +++ b/cli/src/generate/build_tables/item.rs @@ -22,18 +22,42 @@ lazy_static! { }; } +/// A ParseItem represents an in-progress match of a single production in a grammar. #[derive(Clone, Copy, Debug)] pub(crate) struct ParseItem<'a> { + /// The index of the parent rule within the grammar. pub variable_index: u32, + /// The number of symbols that have already been matched. pub step_index: u32, + /// The production being matched. pub production: &'a Production, + /// A boolean indicating whether any of the already-matched children were + /// hidden nodes and had fields. Ordinarily, a parse item's behavior is not + /// affected by the symbols of its preceding children; it only needs to + /// keep track of their fields and aliases. + /// + /// Take for example these two items: + /// X -> a b • c + /// X -> a g • c + /// + /// They can be considered equivalent, for the purposes of parse table + /// generation, because they entail the same actions. But if this flag is + /// true, then the item's set of inherited fields may depend on the specific + /// symbols of its preceding children. + pub has_preceding_inherited_fields: bool, } +/// A ParseItemSet represents a set of in-progress matches of productions in a +/// grammar, and for each in-progress match, a set of "lookaheads" - tokens that +/// are allowed to *follow* the in-progress rule. This object corresponds directly +/// to a state in the final parse table. #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) struct ParseItemSet<'a> { pub entries: Vec<(ParseItem<'a>, TokenSet)>, } +/// A ParseItemSetCore is like a ParseItemSet, but without the lookahead +/// information. Parse states with the same core are candidates for merging. #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) struct ParseItemSetCore<'a> { pub entries: Vec>, @@ -64,6 +88,7 @@ impl<'a> ParseItem<'a> { variable_index: u32::MAX, production: &START_PRODUCTION, step_index: 0, + has_preceding_inherited_fields: false, } } @@ -100,13 +125,23 @@ impl<'a> ParseItem<'a> { self.variable_index == u32::MAX } + /// Create an item like this one, but advanced by one step. pub fn successor(&self) -> ParseItem<'a> { ParseItem { variable_index: self.variable_index, production: self.production, step_index: self.step_index + 1, + has_preceding_inherited_fields: self.has_preceding_inherited_fields, } } + + /// Create an item identical to this one, but with a different production. + /// This is used when dynamically "inlining" certain symbols in a production. + pub fn substitute_production(&self, production: &'a Production) -> ParseItem<'a> { + let mut result = self.clone(); + result.production = production; + result + } } impl<'a> ParseItemSet<'a> { @@ -258,19 +293,18 @@ impl<'a> Hash for ParseItem<'a> { self.precedence().hash(hasher); self.associativity().hash(hasher); - // When comparing two parse items, the symbols that were already consumed by - // both items don't matter. Take for example these two items: - // X -> a b • c - // X -> a d • c - // These two items can be considered equivalent, for the purposes of parse - // table generation, because they entail the same actions. However, if the - // productions have different aliases or field names, they *cannot* be - // treated as equivalent, because those details are ultimately stored as - // attributes of the `REDUCE` action that will be performed when the item - // is finished. + // The already-matched children don't play any role in the parse state for + // this item, unless any of the following are true: + // * the children have fields + // * the children have aliases + // * the children are hidden and + // See the docs for `has_preceding_inherited_fields`. for step in &self.production.steps[0..self.step_index as usize] { step.alias.hash(hasher); step.field_name.hash(hasher); + if self.has_preceding_inherited_fields { + step.symbol.hash(hasher); + } } for step in &self.production.steps[self.step_index as usize..] { step.hash(hasher); @@ -286,6 +320,7 @@ impl<'a> PartialEq for ParseItem<'a> { || self.production.steps.len() != other.production.steps.len() || self.precedence() != other.precedence() || self.associativity() != other.associativity() + || self.has_preceding_inherited_fields != other.has_preceding_inherited_fields { return false; } @@ -300,6 +335,11 @@ impl<'a> PartialEq for ParseItem<'a> { if step.field_name != other.production.steps[i].field_name { return false; } + if self.has_preceding_inherited_fields + && step.symbol != other.production.steps[i].symbol + { + return false; + } } else if *step != other.production.steps[i] { return false; } diff --git a/cli/src/generate/build_tables/item_set_builder.rs b/cli/src/generate/build_tables/item_set_builder.rs index 29690829..18283576 100644 --- a/cli/src/generate/build_tables/item_set_builder.rs +++ b/cli/src/generate/build_tables/item_set_builder.rs @@ -204,6 +204,7 @@ impl<'a> ParseItemSetBuilder<'a> { variable_index, production, step_index: 0, + has_preceding_inherited_fields: false, }; if let Some(inlined_productions) = @@ -213,11 +214,7 @@ impl<'a> ParseItemSetBuilder<'a> { find_or_push( additions_for_non_terminal, TransitiveClosureAddition { - item: ParseItem { - variable_index, - production, - step_index: item.step_index, - }, + item: item.substitute_production(production), info: follow_set_info.clone(), }, ); @@ -248,11 +245,7 @@ impl<'a> ParseItemSetBuilder<'a> { for production in productions { self.add_item( &mut result, - ParseItem { - variable_index: item.variable_index, - production, - step_index: item.step_index, - }, + item.substitute_production(production), lookaheads, ); } diff --git a/cli/src/generate/grammars.rs b/cli/src/generate/grammars.rs index 33a5dcb4..08ce8da8 100644 --- a/cli/src/generate/grammars.rs +++ b/cli/src/generate/grammars.rs @@ -238,6 +238,10 @@ impl SyntaxVariable { pub fn is_auxiliary(&self) -> bool { self.kind == VariableType::Auxiliary } + + pub fn is_hidden(&self) -> bool { + self.kind == VariableType::Hidden || self.kind == VariableType::Auxiliary + } } impl InlinedProductionMap {