Fix incorrect merging of states with different inherited fields

Co-authored-by: Douglas Creager <dcreager@dcreager.net>
This commit is contained in:
Max Brunsfeld 2021-03-05 14:49:28 -08:00
parent e20aff9a9c
commit 7300249d20
4 changed files with 77 additions and 22 deletions

View file

@ -100,6 +100,7 @@ impl<'a> ParseTableBuilder<'a> {
variable_index: extra_non_terminal.index as u32,
production,
step_index: 1,
has_preceding_inherited_fields: false,
},
&[Symbol::end_of_nonterminal_extra()]
.iter()
@ -197,16 +198,33 @@ impl<'a> ParseTableBuilder<'a> {
// next symbol. Advance the item to its next step and insert the resulting
// item into the successor item set.
if let Some(next_symbol) = item.symbol() {
let successor = item.successor();
let mut successor = item.successor();
if next_symbol.is_non_terminal() {
let variable = &self.syntax_grammar.variables[next_symbol.index];
// Keep track of where auxiliary non-terminals (repeat symbols) are
// used within visible symbols. This information may be needed later
// for conflict resolution.
if self.syntax_grammar.variables[next_symbol.index].is_auxiliary() {
if variable.is_auxiliary() {
preceding_auxiliary_symbols
.push(self.get_auxiliary_node_info(&item_set, next_symbol));
}
// For most parse items, the symbols associated with the preceding children
// don't matter: they have no effect on the REDUCE action that would be
// performed at the end of the item. But the symbols *do* matter for
// children that are hidden and have fields, because those fields are
// "inherited" by the parent node.
//
// If this item has consumed a hidden child with fields, then the symbols
// of its preceding children need to be taken into account when comparing
// it with other items.
if variable.is_hidden()
&& !self.variable_info[next_symbol.index].fields.is_empty()
{
successor.has_preceding_inherited_fields = true;
}
non_terminal_successors
.entry(next_symbol)
.or_insert_with(|| ParseItemSet::default())

View file

@ -22,18 +22,42 @@ lazy_static! {
};
}
/// A ParseItem represents an in-progress match of a single production in a grammar.
#[derive(Clone, Copy, Debug)]
pub(crate) struct ParseItem<'a> {
/// The index of the parent rule within the grammar.
pub variable_index: u32,
/// The number of symbols that have already been matched.
pub step_index: u32,
/// The production being matched.
pub production: &'a Production,
/// A boolean indicating whether any of the already-matched children were
/// hidden nodes and had fields. Ordinarily, a parse item's behavior is not
/// affected by the symbols of its preceding children; it only needs to
/// keep track of their fields and aliases.
///
/// Take for example these two items:
/// X -> a b • c
/// X -> a g • c
///
/// They can be considered equivalent, for the purposes of parse table
/// generation, because they entail the same actions. But if this flag is
/// true, then the item's set of inherited fields may depend on the specific
/// symbols of its preceding children.
pub has_preceding_inherited_fields: bool,
}
/// A ParseItemSet represents a set of in-progress matches of productions in a
/// grammar, and for each in-progress match, a set of "lookaheads" - tokens that
/// are allowed to *follow* the in-progress rule. This object corresponds directly
/// to a state in the final parse table.
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct ParseItemSet<'a> {
pub entries: Vec<(ParseItem<'a>, TokenSet)>,
}
/// A ParseItemSetCore is like a ParseItemSet, but without the lookahead
/// information. Parse states with the same core are candidates for merging.
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct ParseItemSetCore<'a> {
pub entries: Vec<ParseItem<'a>>,
@ -64,6 +88,7 @@ impl<'a> ParseItem<'a> {
variable_index: u32::MAX,
production: &START_PRODUCTION,
step_index: 0,
has_preceding_inherited_fields: false,
}
}
@ -100,13 +125,23 @@ impl<'a> ParseItem<'a> {
self.variable_index == u32::MAX
}
/// Create an item like this one, but advanced by one step.
pub fn successor(&self) -> ParseItem<'a> {
ParseItem {
variable_index: self.variable_index,
production: self.production,
step_index: self.step_index + 1,
has_preceding_inherited_fields: self.has_preceding_inherited_fields,
}
}
/// Create an item identical to this one, but with a different production.
/// This is used when dynamically "inlining" certain symbols in a production.
pub fn substitute_production(&self, production: &'a Production) -> ParseItem<'a> {
let mut result = self.clone();
result.production = production;
result
}
}
impl<'a> ParseItemSet<'a> {
@ -258,19 +293,18 @@ impl<'a> Hash for ParseItem<'a> {
self.precedence().hash(hasher);
self.associativity().hash(hasher);
// When comparing two parse items, the symbols that were already consumed by
// both items don't matter. Take for example these two items:
// X -> a b • c
// X -> a d • c
// These two items can be considered equivalent, for the purposes of parse
// table generation, because they entail the same actions. However, if the
// productions have different aliases or field names, they *cannot* be
// treated as equivalent, because those details are ultimately stored as
// attributes of the `REDUCE` action that will be performed when the item
// is finished.
// The already-matched children don't play any role in the parse state for
// this item, unless any of the following are true:
// * the children have fields
// * the children have aliases
// * the children are hidden and
// See the docs for `has_preceding_inherited_fields`.
for step in &self.production.steps[0..self.step_index as usize] {
step.alias.hash(hasher);
step.field_name.hash(hasher);
if self.has_preceding_inherited_fields {
step.symbol.hash(hasher);
}
}
for step in &self.production.steps[self.step_index as usize..] {
step.hash(hasher);
@ -286,6 +320,7 @@ impl<'a> PartialEq for ParseItem<'a> {
|| self.production.steps.len() != other.production.steps.len()
|| self.precedence() != other.precedence()
|| self.associativity() != other.associativity()
|| self.has_preceding_inherited_fields != other.has_preceding_inherited_fields
{
return false;
}
@ -300,6 +335,11 @@ impl<'a> PartialEq for ParseItem<'a> {
if step.field_name != other.production.steps[i].field_name {
return false;
}
if self.has_preceding_inherited_fields
&& step.symbol != other.production.steps[i].symbol
{
return false;
}
} else if *step != other.production.steps[i] {
return false;
}

View file

@ -204,6 +204,7 @@ impl<'a> ParseItemSetBuilder<'a> {
variable_index,
production,
step_index: 0,
has_preceding_inherited_fields: false,
};
if let Some(inlined_productions) =
@ -213,11 +214,7 @@ impl<'a> ParseItemSetBuilder<'a> {
find_or_push(
additions_for_non_terminal,
TransitiveClosureAddition {
item: ParseItem {
variable_index,
production,
step_index: item.step_index,
},
item: item.substitute_production(production),
info: follow_set_info.clone(),
},
);
@ -248,11 +245,7 @@ impl<'a> ParseItemSetBuilder<'a> {
for production in productions {
self.add_item(
&mut result,
ParseItem {
variable_index: item.variable_index,
production,
step_index: item.step_index,
},
item.substitute_production(production),
lookaheads,
);
}

View file

@ -238,6 +238,10 @@ impl SyntaxVariable {
pub fn is_auxiliary(&self) -> bool {
self.kind == VariableType::Auxiliary
}
pub fn is_hidden(&self) -> bool {
self.kind == VariableType::Hidden || self.kind == VariableType::Auxiliary
}
}
impl InlinedProductionMap {