Fix incorrect merging of states with different inherited fields
Co-authored-by: Douglas Creager <dcreager@dcreager.net>
This commit is contained in:
parent
e20aff9a9c
commit
7300249d20
4 changed files with 77 additions and 22 deletions
|
|
@ -100,6 +100,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
variable_index: extra_non_terminal.index as u32,
|
||||
production,
|
||||
step_index: 1,
|
||||
has_preceding_inherited_fields: false,
|
||||
},
|
||||
&[Symbol::end_of_nonterminal_extra()]
|
||||
.iter()
|
||||
|
|
@ -197,16 +198,33 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
// next symbol. Advance the item to its next step and insert the resulting
|
||||
// item into the successor item set.
|
||||
if let Some(next_symbol) = item.symbol() {
|
||||
let successor = item.successor();
|
||||
let mut successor = item.successor();
|
||||
if next_symbol.is_non_terminal() {
|
||||
let variable = &self.syntax_grammar.variables[next_symbol.index];
|
||||
|
||||
// Keep track of where auxiliary non-terminals (repeat symbols) are
|
||||
// used within visible symbols. This information may be needed later
|
||||
// for conflict resolution.
|
||||
if self.syntax_grammar.variables[next_symbol.index].is_auxiliary() {
|
||||
if variable.is_auxiliary() {
|
||||
preceding_auxiliary_symbols
|
||||
.push(self.get_auxiliary_node_info(&item_set, next_symbol));
|
||||
}
|
||||
|
||||
// For most parse items, the symbols associated with the preceding children
|
||||
// don't matter: they have no effect on the REDUCE action that would be
|
||||
// performed at the end of the item. But the symbols *do* matter for
|
||||
// children that are hidden and have fields, because those fields are
|
||||
// "inherited" by the parent node.
|
||||
//
|
||||
// If this item has consumed a hidden child with fields, then the symbols
|
||||
// of its preceding children need to be taken into account when comparing
|
||||
// it with other items.
|
||||
if variable.is_hidden()
|
||||
&& !self.variable_info[next_symbol.index].fields.is_empty()
|
||||
{
|
||||
successor.has_preceding_inherited_fields = true;
|
||||
}
|
||||
|
||||
non_terminal_successors
|
||||
.entry(next_symbol)
|
||||
.or_insert_with(|| ParseItemSet::default())
|
||||
|
|
|
|||
|
|
@ -22,18 +22,42 @@ lazy_static! {
|
|||
};
|
||||
}
|
||||
|
||||
/// A ParseItem represents an in-progress match of a single production in a grammar.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub(crate) struct ParseItem<'a> {
|
||||
/// The index of the parent rule within the grammar.
|
||||
pub variable_index: u32,
|
||||
/// The number of symbols that have already been matched.
|
||||
pub step_index: u32,
|
||||
/// The production being matched.
|
||||
pub production: &'a Production,
|
||||
/// A boolean indicating whether any of the already-matched children were
|
||||
/// hidden nodes and had fields. Ordinarily, a parse item's behavior is not
|
||||
/// affected by the symbols of its preceding children; it only needs to
|
||||
/// keep track of their fields and aliases.
|
||||
///
|
||||
/// Take for example these two items:
|
||||
/// X -> a b • c
|
||||
/// X -> a g • c
|
||||
///
|
||||
/// They can be considered equivalent, for the purposes of parse table
|
||||
/// generation, because they entail the same actions. But if this flag is
|
||||
/// true, then the item's set of inherited fields may depend on the specific
|
||||
/// symbols of its preceding children.
|
||||
pub has_preceding_inherited_fields: bool,
|
||||
}
|
||||
|
||||
/// A ParseItemSet represents a set of in-progress matches of productions in a
|
||||
/// grammar, and for each in-progress match, a set of "lookaheads" - tokens that
|
||||
/// are allowed to *follow* the in-progress rule. This object corresponds directly
|
||||
/// to a state in the final parse table.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseItemSet<'a> {
|
||||
pub entries: Vec<(ParseItem<'a>, TokenSet)>,
|
||||
}
|
||||
|
||||
/// A ParseItemSetCore is like a ParseItemSet, but without the lookahead
|
||||
/// information. Parse states with the same core are candidates for merging.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseItemSetCore<'a> {
|
||||
pub entries: Vec<ParseItem<'a>>,
|
||||
|
|
@ -64,6 +88,7 @@ impl<'a> ParseItem<'a> {
|
|||
variable_index: u32::MAX,
|
||||
production: &START_PRODUCTION,
|
||||
step_index: 0,
|
||||
has_preceding_inherited_fields: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -100,13 +125,23 @@ impl<'a> ParseItem<'a> {
|
|||
self.variable_index == u32::MAX
|
||||
}
|
||||
|
||||
/// Create an item like this one, but advanced by one step.
|
||||
pub fn successor(&self) -> ParseItem<'a> {
|
||||
ParseItem {
|
||||
variable_index: self.variable_index,
|
||||
production: self.production,
|
||||
step_index: self.step_index + 1,
|
||||
has_preceding_inherited_fields: self.has_preceding_inherited_fields,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an item identical to this one, but with a different production.
|
||||
/// This is used when dynamically "inlining" certain symbols in a production.
|
||||
pub fn substitute_production(&self, production: &'a Production) -> ParseItem<'a> {
|
||||
let mut result = self.clone();
|
||||
result.production = production;
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ParseItemSet<'a> {
|
||||
|
|
@ -258,19 +293,18 @@ impl<'a> Hash for ParseItem<'a> {
|
|||
self.precedence().hash(hasher);
|
||||
self.associativity().hash(hasher);
|
||||
|
||||
// When comparing two parse items, the symbols that were already consumed by
|
||||
// both items don't matter. Take for example these two items:
|
||||
// X -> a b • c
|
||||
// X -> a d • c
|
||||
// These two items can be considered equivalent, for the purposes of parse
|
||||
// table generation, because they entail the same actions. However, if the
|
||||
// productions have different aliases or field names, they *cannot* be
|
||||
// treated as equivalent, because those details are ultimately stored as
|
||||
// attributes of the `REDUCE` action that will be performed when the item
|
||||
// is finished.
|
||||
// The already-matched children don't play any role in the parse state for
|
||||
// this item, unless any of the following are true:
|
||||
// * the children have fields
|
||||
// * the children have aliases
|
||||
// * the children are hidden and
|
||||
// See the docs for `has_preceding_inherited_fields`.
|
||||
for step in &self.production.steps[0..self.step_index as usize] {
|
||||
step.alias.hash(hasher);
|
||||
step.field_name.hash(hasher);
|
||||
if self.has_preceding_inherited_fields {
|
||||
step.symbol.hash(hasher);
|
||||
}
|
||||
}
|
||||
for step in &self.production.steps[self.step_index as usize..] {
|
||||
step.hash(hasher);
|
||||
|
|
@ -286,6 +320,7 @@ impl<'a> PartialEq for ParseItem<'a> {
|
|||
|| self.production.steps.len() != other.production.steps.len()
|
||||
|| self.precedence() != other.precedence()
|
||||
|| self.associativity() != other.associativity()
|
||||
|| self.has_preceding_inherited_fields != other.has_preceding_inherited_fields
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
|
@ -300,6 +335,11 @@ impl<'a> PartialEq for ParseItem<'a> {
|
|||
if step.field_name != other.production.steps[i].field_name {
|
||||
return false;
|
||||
}
|
||||
if self.has_preceding_inherited_fields
|
||||
&& step.symbol != other.production.steps[i].symbol
|
||||
{
|
||||
return false;
|
||||
}
|
||||
} else if *step != other.production.steps[i] {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -204,6 +204,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
variable_index,
|
||||
production,
|
||||
step_index: 0,
|
||||
has_preceding_inherited_fields: false,
|
||||
};
|
||||
|
||||
if let Some(inlined_productions) =
|
||||
|
|
@ -213,11 +214,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
find_or_push(
|
||||
additions_for_non_terminal,
|
||||
TransitiveClosureAddition {
|
||||
item: ParseItem {
|
||||
variable_index,
|
||||
production,
|
||||
step_index: item.step_index,
|
||||
},
|
||||
item: item.substitute_production(production),
|
||||
info: follow_set_info.clone(),
|
||||
},
|
||||
);
|
||||
|
|
@ -248,11 +245,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
for production in productions {
|
||||
self.add_item(
|
||||
&mut result,
|
||||
ParseItem {
|
||||
variable_index: item.variable_index,
|
||||
production,
|
||||
step_index: item.step_index,
|
||||
},
|
||||
item.substitute_production(production),
|
||||
lookaheads,
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -238,6 +238,10 @@ impl SyntaxVariable {
|
|||
pub fn is_auxiliary(&self) -> bool {
|
||||
self.kind == VariableType::Auxiliary
|
||||
}
|
||||
|
||||
pub fn is_hidden(&self) -> bool {
|
||||
self.kind == VariableType::Hidden || self.kind == VariableType::Auxiliary
|
||||
}
|
||||
}
|
||||
|
||||
impl InlinedProductionMap {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue