From d40f11837095634243b0a8d15e1e679ef86f4dc3 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 23 Feb 2021 20:48:32 -0800 Subject: [PATCH 1/3] Generalize precedence datatype to include strings Right now, the strings are not used in comparisons, but they are passed through the grammar processing pipeline, and are available to the parse table construction algorithm. This also cleans up a confusing aspect of the parse table construction, in which precedences and associativities were temporarily stored in the parse table data structure itself. --- .../build_tables/build_parse_table.rs | 156 +++++++++--------- cli/src/generate/build_tables/item.rs | 24 ++- .../generate/build_tables/token_conflicts.rs | 4 +- cli/src/generate/grammars.rs | 12 +- cli/src/generate/parse_grammar.rs | 34 +++- .../generate/prepare_grammar/expand_tokens.rs | 35 ++-- .../prepare_grammar/flatten_grammar.rs | 49 +++--- .../prepare_grammar/process_inlines.rs | 16 +- cli/src/generate/rules.rs | 53 +++++- cli/src/generate/tables.rs | 14 +- 10 files changed, 235 insertions(+), 162 deletions(-) diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 205c8f0c..bfbae9a4 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -5,16 +5,15 @@ use crate::generate::grammars::{ InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType, }; use crate::generate::node_types::VariableInfo; -use crate::generate::rules::{Associativity, Symbol, SymbolType, TokenSet}; +use crate::generate::rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet}; use crate::generate::tables::{ FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, ProductionInfo, ProductionInfoId, }; -use core::ops::Range; -use std::collections::hash_map::Entry; use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; use std::fmt::Write; use std::u32; +use std::{cmp::Ordering, collections::hash_map::Entry}; // For conflict reporting, each parse state is associated with an example // sequence of symbols that could lead to that parse state. @@ -29,6 +28,14 @@ struct AuxiliarySymbolInfo { parent_symbols: Vec, } +#[derive(Debug, Default)] +struct ReductionInfo { + precedence: Precedence, + has_left_assoc: bool, + has_right_assoc: bool, + has_non_assoc: bool, +} + struct ParseStateQueueEntry { state_id: ParseStateId, preceding_auxiliary_symbols: AuxiliarySymbolSequence, @@ -118,8 +125,6 @@ impl<'a> ParseTableBuilder<'a> { )?; } - self.remove_precedences(); - Ok((self.parse_table, self.parse_state_info_by_id)) } @@ -179,6 +184,7 @@ impl<'a> ParseTableBuilder<'a> { let mut terminal_successors = BTreeMap::new(); let mut non_terminal_successors = BTreeMap::new(); let mut lookaheads_with_conflicts = TokenSet::new(); + let mut reduction_infos = HashMap::::new(); // Each item in the item set contributes to either or a Shift action or a Reduce // action in this state. @@ -217,31 +223,50 @@ impl<'a> ParseTableBuilder<'a> { ParseAction::Reduce { symbol: Symbol::non_terminal(item.variable_index as usize), child_count: item.step_index as usize, - precedence: item.precedence(), - associativity: item.associativity(), dynamic_precedence: item.production.dynamic_precedence, production_id: self.get_production_id(item), } }; + let precedence = item.precedence(); + let associativity = item.associativity(); for lookahead in lookaheads.iter() { - let entry = self.parse_table.states[state_id] + let table_entry = self.parse_table.states[state_id] .terminal_entries - .entry(lookahead); - let entry = entry.or_insert_with(|| ParseTableEntry::new()); + .entry(lookahead) + .or_insert_with(|| ParseTableEntry::new()); + let reduction_info = reduction_infos.entry(lookahead).or_default(); // While inserting Reduce actions, eagerly resolve conflicts related // to precedence: avoid inserting lower-precedence reductions, and // clear the action list when inserting higher-precedence reductions. - if entry.actions.is_empty() { - entry.actions.push(action); - } else if action.precedence() > entry.actions[0].precedence() { - entry.actions.clear(); - entry.actions.push(action); - lookaheads_with_conflicts.remove(&lookahead); - } else if action.precedence() == entry.actions[0].precedence() { - entry.actions.push(action); - lookaheads_with_conflicts.insert(lookahead); + if table_entry.actions.is_empty() { + table_entry.actions.push(action); + } else { + match Self::compare_precedence( + &self.syntax_grammar, + precedence, + &reduction_info.precedence, + ) { + Ordering::Greater => { + table_entry.actions.clear(); + table_entry.actions.push(action); + lookaheads_with_conflicts.remove(&lookahead); + *reduction_info = ReductionInfo::default(); + } + Ordering::Equal => { + table_entry.actions.push(action); + lookaheads_with_conflicts.insert(lookahead); + } + Ordering::Less => continue, + } + } + + reduction_info.precedence = precedence.clone(); + match associativity { + Some(Associativity::Left) => reduction_info.has_left_assoc = true, + Some(Associativity::Right) => reduction_info.has_right_assoc = true, + None => reduction_info.has_non_assoc = true, } } } @@ -302,6 +327,7 @@ impl<'a> ParseTableBuilder<'a> { &preceding_symbols, &preceding_auxiliary_symbols, symbol, + reduction_infos.get(&symbol).unwrap(), )?; } @@ -381,6 +407,7 @@ impl<'a> ParseTableBuilder<'a> { preceding_symbols: &SymbolSequence, preceding_auxiliary_symbols: &Vec, conflicting_lookahead: Symbol, + reduction_info: &ReductionInfo, ) -> Result<()> { let entry = self.parse_table.states[state_id] .terminal_entries @@ -393,9 +420,8 @@ impl<'a> ParseTableBuilder<'a> { // sorted out ahead of time in `add_actions`. But there can still be // REDUCE-REDUCE conflicts where all actions have the *same* // precedence, and there can still be SHIFT/REDUCE conflicts. - let reduce_precedence = entry.actions[0].precedence(); let mut considered_associativity = false; - let mut shift_precedence: Option> = None; + let mut shift_precedence: Vec<&Precedence> = Vec::new(); let mut conflicting_items = HashSet::new(); for (item, lookaheads) in &item_set.entries { if let Some(step) = item.step() { @@ -409,15 +435,9 @@ impl<'a> ParseTableBuilder<'a> { conflicting_items.insert(item); } - let precedence = item.precedence(); - if let Some(range) = &mut shift_precedence { - if precedence < range.start { - range.start = precedence; - } else if precedence > range.end { - range.end = precedence; - } - } else { - shift_precedence = Some(precedence..precedence); + let p = item.precedence(); + if let Err(i) = shift_precedence.binary_search(&p) { + shift_precedence.insert(i, p); } } } @@ -429,8 +449,6 @@ impl<'a> ParseTableBuilder<'a> { } if let ParseAction::Shift { is_repetition, .. } = entry.actions.last_mut().unwrap() { - let shift_precedence = shift_precedence.unwrap_or(0..0); - // If all of the items in the conflict have the same parent symbol, // and that parent symbols is auxiliary, then this is just the intentional // ambiguity associated with a repeat rule. Resolve that class of ambiguity @@ -448,40 +466,37 @@ impl<'a> ParseTableBuilder<'a> { } // If the SHIFT action has higher precedence, remove all the REDUCE actions. - if shift_precedence.start > reduce_precedence - || (shift_precedence.start == reduce_precedence - && shift_precedence.end > reduce_precedence) - { + let mut shift_is_less = false; + let mut shift_is_more = false; + for p in shift_precedence { + match Self::compare_precedence(&self.syntax_grammar, p, &reduction_info.precedence) + { + Ordering::Greater => shift_is_more = true, + Ordering::Less => shift_is_less = true, + Ordering::Equal => {} + } + } + + if shift_is_more && !shift_is_less { entry.actions.drain(0..entry.actions.len() - 1); } // If the REDUCE actions have higher precedence, remove the SHIFT action. - else if shift_precedence.end < reduce_precedence - || (shift_precedence.end == reduce_precedence - && shift_precedence.start < reduce_precedence) - { + else if shift_is_less && !shift_is_more { entry.actions.pop(); conflicting_items.retain(|item| item.is_done()); } // If the SHIFT and REDUCE actions have the same predence, consider // the REDUCE actions' associativity. - else if shift_precedence == (reduce_precedence..reduce_precedence) { + else if !shift_is_less && !shift_is_more { considered_associativity = true; - let mut has_left = false; - let mut has_right = false; - let mut has_non = false; - for action in &entry.actions { - if let ParseAction::Reduce { associativity, .. } = action { - match associativity { - Some(Associativity::Left) => has_left = true, - Some(Associativity::Right) => has_right = true, - None => has_non = true, - } - } - } // If all Reduce actions are left associative, remove the SHIFT action. // If all Reduce actions are right associative, remove the REDUCE actions. - match (has_left, has_non, has_right) { + match ( + reduction_info.has_left_assoc, + reduction_info.has_non_assoc, + reduction_info.has_right_assoc, + ) { (true, false, false) => { entry.actions.pop(); conflicting_items.retain(|item| item.is_done()); @@ -595,7 +610,7 @@ impl<'a> ParseTableBuilder<'a> { "(precedence: {}, associativity: {:?})", precedence, associativity )) - } else if precedence != 0 { + } else if !precedence.is_none() { Some(format!("(precedence: {})", precedence)) } else { None @@ -714,6 +729,17 @@ impl<'a> ParseTableBuilder<'a> { Err(Error::new(msg)) } + fn compare_precedence( + _grammar: &SyntaxGrammar, + left: &Precedence, + right: &Precedence, + ) -> Ordering { + // TODO - compare named precedence + let left_integer = left.as_integer(); + let right_integer = right.as_integer(); + left_integer.cmp(&right_integer) + } + fn get_auxiliary_node_info( &self, item_set: &ParseItemSet, @@ -739,26 +765,6 @@ impl<'a> ParseTableBuilder<'a> { } } - fn remove_precedences(&mut self) { - for state in self.parse_table.states.iter_mut() { - for (_, entry) in state.terminal_entries.iter_mut() { - for action in entry.actions.iter_mut() { - match action { - ParseAction::Reduce { - precedence, - associativity, - .. - } => { - *precedence = 0; - *associativity = None; - } - _ => {} - } - } - } - } - } - fn get_production_id(&mut self, item: &ParseItem) -> ProductionInfoId { let mut production_info = ProductionInfo { alias_sequence: Vec::new(), diff --git a/cli/src/generate/build_tables/item.rs b/cli/src/generate/build_tables/item.rs index f0e5d381..e3205cf8 100644 --- a/cli/src/generate/build_tables/item.rs +++ b/cli/src/generate/build_tables/item.rs @@ -1,8 +1,5 @@ -use crate::generate::grammars::{ - LexicalGrammar, Production, ProductionStep, SyntaxGrammar, -}; -use crate::generate::rules::Associativity; -use crate::generate::rules::{Symbol, SymbolType, TokenSet}; +use crate::generate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar}; +use crate::generate::rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet}; use lazy_static::lazy_static; use std::cmp::Ordering; use std::fmt; @@ -17,7 +14,7 @@ lazy_static! { index: 0, kind: SymbolType::NonTerminal, }, - precedence: 0, + precedence: Precedence::None, associativity: None, alias: None, field_name: None, @@ -82,8 +79,9 @@ impl<'a> ParseItem<'a> { self.prev_step().and_then(|step| step.associativity) } - pub fn precedence(&self) -> i32 { - self.prev_step().map_or(0, |step| step.precedence) + pub fn precedence(&self) -> &Precedence { + self.prev_step() + .map_or(&Precedence::None, |step| &step.precedence) } pub fn prev_step(&self) -> Option<&'a ProductionStep> { @@ -165,12 +163,12 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> { if i == self.0.step_index as usize { write!(f, " •")?; if let Some(associativity) = step.associativity { - if step.precedence != 0 { + if !step.precedence.is_none() { write!(f, " ({} {:?})", step.precedence, associativity)?; } else { write!(f, " ({:?})", associativity)?; } - } else if step.precedence != 0 { + } else if !step.precedence.is_none() { write!(f, " ({})", step.precedence)?; } } @@ -197,12 +195,12 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> { write!(f, " •")?; if let Some(step) = self.0.production.steps.last() { if let Some(associativity) = step.associativity { - if step.precedence != 0 { + if !step.precedence.is_none() { write!(f, " ({} {:?})", step.precedence, associativity)?; } else { write!(f, " ({:?})", associativity)?; } - } else if step.precedence != 0 { + } else if !step.precedence.is_none() { write!(f, " ({})", step.precedence)?; } } @@ -257,7 +255,7 @@ impl<'a> Hash for ParseItem<'a> { hasher.write_u32(self.step_index); hasher.write_i32(self.production.dynamic_precedence); hasher.write_usize(self.production.steps.len()); - hasher.write_i32(self.precedence()); + self.precedence().hash(hasher); self.associativity().hash(hasher); // When comparing two parse items, the symbols that were already consumed by diff --git a/cli/src/generate/build_tables/token_conflicts.rs b/cli/src/generate/build_tables/token_conflicts.rs index 64e7564b..223d3481 100644 --- a/cli/src/generate/build_tables/token_conflicts.rs +++ b/cli/src/generate/build_tables/token_conflicts.rs @@ -380,7 +380,7 @@ mod tests { use super::*; use crate::generate::grammars::{Variable, VariableType}; use crate::generate::prepare_grammar::{expand_tokens, ExtractedLexicalGrammar}; - use crate::generate::rules::{Rule, Symbol}; + use crate::generate::rules::{Precedence, Rule, Symbol}; #[test] fn test_starting_characters() { @@ -508,7 +508,7 @@ mod tests { Variable { name: "anything".to_string(), kind: VariableType::Named, - rule: Rule::prec(-1, Rule::pattern(".*")), + rule: Rule::prec(Precedence::Integer(-1), Rule::pattern(".*")), }, ], }) diff --git a/cli/src/generate/grammars.rs b/cli/src/generate/grammars.rs index 6cf325dd..99f518cb 100644 --- a/cli/src/generate/grammars.rs +++ b/cli/src/generate/grammars.rs @@ -1,5 +1,5 @@ use super::nfa::Nfa; -use super::rules::{Alias, Associativity, Rule, Symbol}; +use super::rules::{Alias, Associativity, Precedence, Rule, Symbol}; use std::collections::HashMap; #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] @@ -52,7 +52,7 @@ pub(crate) struct LexicalGrammar { #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub(crate) struct ProductionStep { pub symbol: Symbol, - pub precedence: i32, + pub precedence: Precedence, pub associativity: Option, pub alias: Option, pub field_name: Option, @@ -100,14 +100,18 @@ impl ProductionStep { pub(crate) fn new(symbol: Symbol) -> Self { Self { symbol, - precedence: 0, + precedence: Precedence::None, associativity: None, alias: None, field_name: None, } } - pub(crate) fn with_prec(self, precedence: i32, associativity: Option) -> Self { + pub(crate) fn with_prec( + self, + precedence: Precedence, + associativity: Option, + ) -> Self { Self { symbol: self.symbol, precedence, diff --git a/cli/src/generate/parse_grammar.rs b/cli/src/generate/parse_grammar.rs index c01dbd99..1e638006 100644 --- a/cli/src/generate/parse_grammar.rs +++ b/cli/src/generate/parse_grammar.rs @@ -1,5 +1,5 @@ use super::grammars::{InputGrammar, Variable, VariableType}; -use super::rules::Rule; +use super::rules::{Precedence, Rule}; use crate::error::Result; use serde_derive::Deserialize; use serde_json::{Map, Value}; @@ -44,15 +44,15 @@ enum RuleJSON { content: Box, }, PREC_LEFT { - value: i32, + value: PrecedenceJSON, content: Box, }, PREC_RIGHT { - value: i32, + value: PrecedenceJSON, content: Box, }, PREC { - value: i32, + value: PrecedenceJSON, content: Box, }, TOKEN { @@ -63,6 +63,13 @@ enum RuleJSON { }, } +#[derive(Deserialize)] +#[serde(untagged)] +enum PrecedenceJSON { + Integer(i32), + Name(String), +} + #[derive(Deserialize)] pub(crate) struct GrammarJSON { pub(crate) name: String, @@ -133,9 +140,13 @@ fn parse_rule(json: RuleJSON) -> Rule { RuleJSON::REPEAT { content } => { Rule::choice(vec![Rule::repeat(parse_rule(*content)), Rule::Blank]) } - RuleJSON::PREC { value, content } => Rule::prec(value, parse_rule(*content)), - RuleJSON::PREC_LEFT { value, content } => Rule::prec_left(value, parse_rule(*content)), - RuleJSON::PREC_RIGHT { value, content } => Rule::prec_right(value, parse_rule(*content)), + RuleJSON::PREC { value, content } => Rule::prec(value.into(), parse_rule(*content)), + RuleJSON::PREC_LEFT { value, content } => { + Rule::prec_left(value.into(), parse_rule(*content)) + } + RuleJSON::PREC_RIGHT { value, content } => { + Rule::prec_right(value.into(), parse_rule(*content)) + } RuleJSON::PREC_DYNAMIC { value, content } => { Rule::prec_dynamic(value, parse_rule(*content)) } @@ -144,6 +155,15 @@ fn parse_rule(json: RuleJSON) -> Rule { } } +impl Into for PrecedenceJSON { + fn into(self) -> Precedence { + match self { + PrecedenceJSON::Integer(i) => Precedence::Integer(i), + PrecedenceJSON::Name(i) => Precedence::Name(i), + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/cli/src/generate/prepare_grammar/expand_tokens.rs b/cli/src/generate/prepare_grammar/expand_tokens.rs index 23866551..b4c53a27 100644 --- a/cli/src/generate/prepare_grammar/expand_tokens.rs +++ b/cli/src/generate/prepare_grammar/expand_tokens.rs @@ -1,8 +1,11 @@ use super::ExtractedLexicalGrammar; -use crate::error::{Error, Result}; use crate::generate::grammars::{LexicalGrammar, LexicalVariable}; use crate::generate::nfa::{CharacterSet, Nfa, NfaState}; use crate::generate::rules::Rule; +use crate::{ + error::{Error, Result}, + generate::rules::Precedence, +}; use lazy_static::lazy_static; use regex::Regex; use regex_syntax::ast::{ @@ -46,10 +49,12 @@ fn get_implicit_precedence(rule: &Rule) -> i32 { } fn get_completion_precedence(rule: &Rule) -> i32 { - match rule { - Rule::Metadata { params, .. } => params.precedence.unwrap_or(0), - _ => 0, + if let Rule::Metadata { params, .. } = rule { + if let Precedence::Integer(p) = params.precedence { + return p; + } } + 0 } fn preprocess_regex(content: &str) -> String { @@ -187,11 +192,14 @@ impl NfaBuilder { } } Rule::Metadata { rule, params } => { - if let Some(precedence) = params.precedence { - self.precedence_stack.push(precedence); - } + let has_precedence = if let Precedence::Integer(precedence) = ¶ms.precedence { + self.precedence_stack.push(*precedence); + true + } else { + false + }; let result = self.expand_rule(rule, next_state_id); - if params.precedence.is_some() { + if has_precedence { self.precedence_stack.pop(); } result @@ -626,8 +634,8 @@ mod tests { // shorter tokens with higher precedence Row { rules: vec![ - Rule::prec(2, Rule::pattern("abc")), - Rule::prec(1, Rule::pattern("ab[cd]e")), + Rule::prec(Precedence::Integer(2), Rule::pattern("abc")), + Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e")), Rule::pattern("[a-e]+"), ], separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")], @@ -640,8 +648,11 @@ mod tests { // immediate tokens with higher precedence Row { rules: vec![ - Rule::prec(1, Rule::pattern("[^a]+")), - Rule::immediate_token(Rule::prec(2, Rule::pattern("[^ab]+"))), + Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+")), + Rule::immediate_token(Rule::prec( + Precedence::Integer(2), + Rule::pattern("[^ab]+"), + )), ], separators: vec![Rule::pattern("\\s")], examples: vec![("cccb", Some((1, "ccc")))], diff --git a/cli/src/generate/prepare_grammar/flatten_grammar.rs b/cli/src/generate/prepare_grammar/flatten_grammar.rs index f2b43a04..e7d792ca 100644 --- a/cli/src/generate/prepare_grammar/flatten_grammar.rs +++ b/cli/src/generate/prepare_grammar/flatten_grammar.rs @@ -3,12 +3,11 @@ use crate::error::{Error, Result}; use crate::generate::grammars::{ Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable, }; -use crate::generate::rules::Symbol; -use crate::generate::rules::{Alias, Associativity, Rule}; +use crate::generate::rules::{Alias, Associativity, Precedence, Rule, Symbol}; struct RuleFlattener { production: Production, - precedence_stack: Vec, + precedence_stack: Vec, associativity_stack: Vec, alias_stack: Vec, field_name_stack: Vec, @@ -45,9 +44,9 @@ impl RuleFlattener { } Rule::Metadata { rule, params } => { let mut has_precedence = false; - if let Some(precedence) = params.precedence { + if !params.precedence.is_none() { has_precedence = true; - self.precedence_stack.push(precedence); + self.precedence_stack.push(params.precedence); } let mut has_associativity = false; @@ -77,8 +76,11 @@ impl RuleFlattener { if has_precedence { self.precedence_stack.pop(); if did_push && !at_end { - self.production.steps.last_mut().unwrap().precedence = - self.precedence_stack.last().cloned().unwrap_or(0); + self.production.steps.last_mut().unwrap().precedence = self + .precedence_stack + .last() + .cloned() + .unwrap_or(Precedence::None); } } @@ -103,7 +105,11 @@ impl RuleFlattener { Rule::Symbol(symbol) => { self.production.steps.push(ProductionStep { symbol, - precedence: self.precedence_stack.last().cloned().unwrap_or(0), + precedence: self + .precedence_stack + .last() + .cloned() + .unwrap_or(Precedence::None), associativity: self.associativity_stack.last().cloned(), alias: self.alias_stack.last().cloned(), field_name: self.field_name_stack.last().cloned(), @@ -223,12 +229,12 @@ mod tests { rule: Rule::seq(vec![ Rule::non_terminal(1), Rule::prec_left( - 101, + Precedence::Integer(101), Rule::seq(vec![ Rule::non_terminal(2), Rule::choice(vec![ Rule::prec_right( - 102, + Precedence::Integer(102), Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]), ), Rule::non_terminal(5), @@ -249,11 +255,11 @@ mod tests { steps: vec![ ProductionStep::new(Symbol::non_terminal(1)), ProductionStep::new(Symbol::non_terminal(2)) - .with_prec(101, Some(Associativity::Left)), + .with_prec(Precedence::Integer(101), Some(Associativity::Left)), ProductionStep::new(Symbol::non_terminal(3)) - .with_prec(102, Some(Associativity::Right)), + .with_prec(Precedence::Integer(102), Some(Associativity::Right)), ProductionStep::new(Symbol::non_terminal(4)) - .with_prec(101, Some(Associativity::Left)), + .with_prec(Precedence::Integer(101), Some(Associativity::Left)), ProductionStep::new(Symbol::non_terminal(6)), ProductionStep::new(Symbol::non_terminal(7)), ] @@ -263,9 +269,9 @@ mod tests { steps: vec![ ProductionStep::new(Symbol::non_terminal(1)), ProductionStep::new(Symbol::non_terminal(2)) - .with_prec(101, Some(Associativity::Left)), + .with_prec(Precedence::Integer(101), Some(Associativity::Left)), ProductionStep::new(Symbol::non_terminal(5)) - .with_prec(101, Some(Associativity::Left)), + .with_prec(Precedence::Integer(101), Some(Associativity::Left)), ProductionStep::new(Symbol::non_terminal(6)), ProductionStep::new(Symbol::non_terminal(7)), ] @@ -334,7 +340,7 @@ mod tests { name: "test".to_string(), kind: VariableType::Named, rule: Rule::prec_left( - 101, + Precedence::Integer(101), Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]), ), }) @@ -346,9 +352,9 @@ mod tests { dynamic_precedence: 0, steps: vec![ ProductionStep::new(Symbol::non_terminal(1)) - .with_prec(101, Some(Associativity::Left)), + .with_prec(Precedence::Integer(101), Some(Associativity::Left)), ProductionStep::new(Symbol::non_terminal(2)) - .with_prec(101, Some(Associativity::Left)), + .with_prec(Precedence::Integer(101), Some(Associativity::Left)), ] }] ); @@ -356,7 +362,10 @@ mod tests { let result = flatten_variable(Variable { name: "test".to_string(), kind: VariableType::Named, - rule: Rule::prec_left(101, Rule::seq(vec![Rule::non_terminal(1)])), + rule: Rule::prec_left( + Precedence::Integer(101), + Rule::seq(vec![Rule::non_terminal(1)]), + ), }) .unwrap(); @@ -365,7 +374,7 @@ mod tests { vec![Production { dynamic_precedence: 0, steps: vec![ProductionStep::new(Symbol::non_terminal(1)) - .with_prec(101, Some(Associativity::Left)),] + .with_prec(Precedence::Integer(101), Some(Associativity::Left)),] }] ); } diff --git a/cli/src/generate/prepare_grammar/process_inlines.rs b/cli/src/generate/prepare_grammar/process_inlines.rs index f83658b2..91d730e6 100644 --- a/cli/src/generate/prepare_grammar/process_inlines.rs +++ b/cli/src/generate/prepare_grammar/process_inlines.rs @@ -120,7 +120,7 @@ impl InlinedProductionMapBuilder { } } if let Some(last_inserted_step) = inserted_steps.last_mut() { - if last_inserted_step.precedence == 0 { + if last_inserted_step.precedence.is_none() { last_inserted_step.precedence = removed_step.precedence; } if last_inserted_step.associativity == None { @@ -193,7 +193,7 @@ pub(super) fn process_inlines(grammar: &SyntaxGrammar) -> InlinedProductionMap { mod tests { use super::*; use crate::generate::grammars::{ProductionStep, SyntaxVariable, VariableType}; - use crate::generate::rules::{Associativity, Symbol}; + use crate::generate::rules::{Associativity, Precedence, Symbol}; #[test] fn test_basic_inlining() { @@ -401,7 +401,7 @@ mod tests { steps: vec![ // inlined ProductionStep::new(Symbol::non_terminal(1)) - .with_prec(1, Some(Associativity::Left)), + .with_prec(Precedence::Integer(1), Some(Associativity::Left)), ProductionStep::new(Symbol::terminal(10)), // inlined ProductionStep::new(Symbol::non_terminal(2)) @@ -416,7 +416,7 @@ mod tests { dynamic_precedence: 0, steps: vec![ ProductionStep::new(Symbol::terminal(11)) - .with_prec(2, None) + .with_prec(Precedence::Integer(2), None) .with_alias("inner_alias", true), ProductionStep::new(Symbol::terminal(12)), ], @@ -453,12 +453,12 @@ mod tests { // The first step in the inlined production retains its precedence // and alias. ProductionStep::new(Symbol::terminal(11)) - .with_prec(2, None) + .with_prec(Precedence::Integer(2), None) .with_alias("inner_alias", true), // The final step of the inlined production inherits the precedence of // the inlined step. ProductionStep::new(Symbol::terminal(12)) - .with_prec(1, Some(Associativity::Left)), + .with_prec(Precedence::Integer(1), Some(Associativity::Left)), ProductionStep::new(Symbol::terminal(10)), ProductionStep::new(Symbol::non_terminal(2)).with_alias("outer_alias", true), ] @@ -475,10 +475,10 @@ mod tests { dynamic_precedence: 0, steps: vec![ ProductionStep::new(Symbol::terminal(11)) - .with_prec(2, None) + .with_prec(Precedence::Integer(2), None) .with_alias("inner_alias", true), ProductionStep::new(Symbol::terminal(12)) - .with_prec(1, Some(Associativity::Left)), + .with_prec(Precedence::Integer(1), Some(Associativity::Left)), ProductionStep::new(Symbol::terminal(10)), // All steps of the inlined production inherit their alias from the // inlined step. diff --git a/cli/src/generate/rules.rs b/cli/src/generate/rules.rs index e99f0197..60a1abc7 100644 --- a/cli/src/generate/rules.rs +++ b/cli/src/generate/rules.rs @@ -1,7 +1,7 @@ use super::grammars::VariableType; use smallbitvec::SmallBitVec; -use std::collections::HashMap; use std::iter::FromIterator; +use std::{collections::HashMap, fmt}; #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub(crate) enum SymbolType { @@ -24,11 +24,18 @@ pub(crate) struct Alias { pub is_named: bool, } +#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum Precedence { + None, + Integer(i32), + Name(String), +} + pub(crate) type AliasMap = HashMap; #[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] pub(crate) struct MetadataParams { - pub precedence: Option, + pub precedence: Precedence, pub dynamic_precedence: i32, pub associativity: Option, pub is_token: bool, @@ -99,23 +106,23 @@ impl Rule { }) } - pub fn prec(value: i32, content: Rule) -> Self { + pub fn prec(value: Precedence, content: Rule) -> Self { add_metadata(content, |params| { - params.precedence = Some(value); + params.precedence = value; }) } - pub fn prec_left(value: i32, content: Rule) -> Self { + pub fn prec_left(value: Precedence, content: Rule) -> Self { add_metadata(content, |params| { params.associativity = Some(Associativity::Left); - params.precedence = Some(value); + params.precedence = value; }) } - pub fn prec_right(value: i32, content: Rule) -> Self { + pub fn prec_right(value: Precedence, content: Rule) -> Self { add_metadata(content, |params| { params.associativity = Some(Associativity::Right); - params.precedence = Some(value); + params.precedence = value; }) } @@ -152,6 +159,20 @@ impl Alias { } } +impl Precedence { + pub fn as_integer(&self) -> i32 { + if let Precedence::Integer(i) = self { + *i + } else { + 0 + } + } + + pub fn is_none(&self) -> bool { + matches!(self, Precedence::None) + } +} + #[cfg(test)] impl Rule { pub fn terminal(index: usize) -> Self { @@ -437,3 +458,19 @@ fn choice_helper(result: &mut Vec, rule: Rule) { } } } + +impl fmt::Display for Precedence { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Precedence::Integer(i) => write!(f, "{}", i), + Precedence::Name(s) => write!(f, "'{}'", s), + Precedence::None => write!(f, "none"), + } + } +} + +impl Default for Precedence { + fn default() -> Self { + Precedence::None + } +} diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index e74ec977..ccbf8895 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -1,5 +1,5 @@ use super::nfa::CharacterSet; -use super::rules::{Alias, Associativity, Symbol, TokenSet}; +use super::rules::{Alias, Symbol, TokenSet}; use std::collections::{BTreeMap, HashMap}; pub(crate) type ProductionInfoId = usize; pub(crate) type ParseStateId = usize; @@ -17,9 +17,7 @@ pub(crate) enum ParseAction { Reduce { symbol: Symbol, child_count: usize, - precedence: i32, dynamic_precedence: i32, - associativity: Option, production_id: ProductionInfoId, }, } @@ -163,13 +161,3 @@ impl ParseState { } } } - -impl ParseAction { - pub fn precedence(&self) -> i32 { - if let ParseAction::Reduce { precedence, .. } = self { - *precedence - } else { - 0 - } - } -} From 344797c110e6549380a8c247fdc7c82612bc450e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 24 Feb 2021 16:02:56 -0800 Subject: [PATCH 2/3] Implement named precedence comparison --- .../build_tables/build_parse_table.rs | 40 ++++- cli/src/generate/dsl.js | 13 +- cli/src/generate/grammars.rs | 2 + cli/src/generate/node_types.rs | 88 +++++---- cli/src/generate/parse_grammar.rs | 41 ++--- .../prepare_grammar/expand_repeats.rs | 3 +- .../extract_default_aliases.rs | 11 +- .../prepare_grammar/extract_tokens.rs | 4 +- .../prepare_grammar/flatten_grammar.rs | 1 + .../prepare_grammar/intern_symbols.rs | 4 +- cli/src/generate/prepare_grammar/mod.rs | 47 +++++ .../prepare_grammar/process_inlines.rs | 27 +-- cli/src/generate/rules.rs | 8 - cli/src/tests/corpus_test.rs | 14 +- .../named_precedences/corpus.txt | 12 ++ .../named_precedences/grammar.json | 167 ++++++++++++++++++ .../named_precedences/readme.txt | 3 + 17 files changed, 387 insertions(+), 98 deletions(-) create mode 100644 test/fixtures/test_grammars/named_precedences/corpus.txt create mode 100644 test/fixtures/test_grammars/named_precedences/grammar.json create mode 100644 test/fixtures/test_grammars/named_precedences/readme.txt diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index bfbae9a4..4f3ffe28 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -730,14 +730,44 @@ impl<'a> ParseTableBuilder<'a> { } fn compare_precedence( - _grammar: &SyntaxGrammar, + grammar: &SyntaxGrammar, left: &Precedence, right: &Precedence, ) -> Ordering { - // TODO - compare named precedence - let left_integer = left.as_integer(); - let right_integer = right.as_integer(); - left_integer.cmp(&right_integer) + match (left, right) { + // Integer precedences can be compared to other integer precedences, + // and to the default precedence, which is zero. + (Precedence::Integer(l), Precedence::Integer(r)) => l.cmp(r), + (Precedence::Integer(l), Precedence::None) => l.cmp(&0), + (Precedence::None, Precedence::Integer(r)) => 0.cmp(&r), + + // Named precedences can be compared to other named precedences. + (Precedence::Name(l), Precedence::Name(r)) => grammar + .precedence_orderings + .iter() + .find_map(|list| { + let mut saw_left = false; + let mut saw_right = false; + for name in list { + if name == l { + saw_left = true; + if saw_right { + return Some(Ordering::Less); + } + } else if name == r { + saw_right = true; + if saw_left { + return Some(Ordering::Greater); + } + } + } + None + }) + .unwrap_or(Ordering::Equal), + + // Other combinations of precedence types are not comparable. + _ => Ordering::Equal, + } } fn get_auxiliary_node_info( diff --git a/cli/src/generate/dsl.js b/cli/src/generate/dsl.js index 62fb1d70..1ecf0c66 100644 --- a/cli/src/generate/dsl.js +++ b/cli/src/generate/dsl.js @@ -225,7 +225,8 @@ function grammar(baseGrammar, options) { conflicts: [], externals: [], inline: [], - supertypes: [] + supertypes: [], + precedences: [], }; } @@ -362,11 +363,19 @@ function grammar(baseGrammar, options) { supertypes = supertypeRules.map(symbol => symbol.name); } + let precedences = baseGrammar.precedences; + if (options.precedences) { + if (typeof options.precedences !== "function") { + throw new Error("Grammar's 'precedences' property must be a function"); + } + precedences = options.precedences.call(null, baseGrammar.precedences); + } + if (Object.keys(rules).length == 0) { throw new Error("Grammar must have at least one rule."); } - return {name, word, rules, extras, conflicts, externals, inline, supertypes}; + return {name, word, rules, extras, conflicts, precedences, externals, inline, supertypes}; } function checkArguments(ruleCount, caller, callerName, suffix = '') { diff --git a/cli/src/generate/grammars.rs b/cli/src/generate/grammars.rs index 99f518cb..ea9f6259 100644 --- a/cli/src/generate/grammars.rs +++ b/cli/src/generate/grammars.rs @@ -25,6 +25,7 @@ pub(crate) struct InputGrammar { pub variables: Vec, pub extra_symbols: Vec, pub expected_conflicts: Vec>, + pub precedence_orderings: Vec>, pub external_tokens: Vec, pub variables_to_inline: Vec, pub supertype_symbols: Vec, @@ -93,6 +94,7 @@ pub(crate) struct SyntaxGrammar { pub supertype_symbols: Vec, pub variables_to_inline: Vec, pub word_token: Option, + pub precedence_orderings: Vec>, } #[cfg(test)] diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index bc5a836f..243f10fe 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -713,12 +713,13 @@ mod tests { fn test_node_types_simple() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), word_token: None, + extra_symbols: vec![], + external_tokens: vec![], supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], variables: vec![ Variable { name: "v1".to_string(), @@ -809,11 +810,12 @@ mod tests { let node_types = get_node_types(InputGrammar { name: String::new(), extra_symbols: vec![Rule::named("v3")], - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), word_token: None, + external_tokens: vec![], supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], variables: vec![ Variable { name: "v1".to_string(), @@ -914,11 +916,12 @@ mod tests { fn test_node_types_with_supertypes() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), word_token: None, + extra_symbols: vec![], + external_tokens: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], supertype_symbols: vec!["_v2".to_string()], variables: vec![ Variable { @@ -1001,12 +1004,13 @@ mod tests { fn test_node_types_for_children_without_fields() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), word_token: None, + extra_symbols: vec![], + external_tokens: vec![], supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], variables: vec![ Variable { name: "v1".to_string(), @@ -1100,11 +1104,12 @@ mod tests { let node_types = get_node_types(InputGrammar { name: String::new(), word_token: None, - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: vec!["v2".to_string()], + extra_symbols: vec![], + external_tokens: vec![], supertype_symbols: vec![], + expected_conflicts: vec![], + precedence_orderings: vec![], + variables_to_inline: vec!["v2".to_string()], variables: vec![ Variable { name: "v1".to_string(), @@ -1154,12 +1159,13 @@ mod tests { fn test_node_types_for_aliased_nodes() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), word_token: None, + extra_symbols: vec![], + external_tokens: vec![], supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], variables: vec![ Variable { name: "thing".to_string(), @@ -1230,12 +1236,13 @@ mod tests { fn test_node_types_with_multiple_valued_fields() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), word_token: None, + extra_symbols: vec![], + external_tokens: vec![], supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], variables: vec![ Variable { name: "a".to_string(), @@ -1298,12 +1305,13 @@ mod tests { fn test_node_types_with_fields_on_hidden_tokens() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), word_token: None, + extra_symbols: vec![], + external_tokens: vec![], supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], variables: vec![Variable { name: "script".to_string(), kind: VariableType::Named, @@ -1330,12 +1338,13 @@ mod tests { fn test_node_types_with_multiple_rules_same_alias_name() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), word_token: None, + extra_symbols: vec![], + external_tokens: vec![], supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], variables: vec![ Variable { name: "script".to_string(), @@ -1456,12 +1465,13 @@ mod tests { fn test_node_types_with_tokens_aliased_to_match_rules() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), word_token: None, + extra_symbols: vec![], + external_tokens: vec![], supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], variables: vec![ Variable { name: "a".to_string(), diff --git a/cli/src/generate/parse_grammar.rs b/cli/src/generate/parse_grammar.rs index 1e638006..47217a69 100644 --- a/cli/src/generate/parse_grammar.rs +++ b/cli/src/generate/parse_grammar.rs @@ -74,11 +74,18 @@ enum PrecedenceJSON { pub(crate) struct GrammarJSON { pub(crate) name: String, rules: Map, - conflicts: Option>>, - externals: Option>, - extras: Option>, - inline: Option>, - supertypes: Option>, + #[serde(default)] + precedences: Vec>, + #[serde(default)] + conflicts: Vec>, + #[serde(default)] + externals: Vec, + #[serde(default)] + extras: Vec, + #[serde(default)] + inline: Vec, + #[serde(default)] + supertypes: Vec, word: Option, } @@ -94,31 +101,19 @@ pub(crate) fn parse_grammar(input: &str) -> Result { }) } - let extra_symbols = grammar_json - .extras - .unwrap_or(Vec::new()) - .into_iter() - .map(parse_rule) - .collect(); - let external_tokens = grammar_json - .externals - .unwrap_or(Vec::new()) - .into_iter() - .map(parse_rule) - .collect(); - let expected_conflicts = grammar_json.conflicts.unwrap_or(Vec::new()); - let variables_to_inline = grammar_json.inline.unwrap_or(Vec::new()); - let supertype_symbols = grammar_json.supertypes.unwrap_or(Vec::new()); + let extra_symbols = grammar_json.extras.into_iter().map(parse_rule).collect(); + let external_tokens = grammar_json.externals.into_iter().map(parse_rule).collect(); Ok(InputGrammar { name: grammar_json.name, word_token: grammar_json.word, + expected_conflicts: grammar_json.conflicts, + supertype_symbols: grammar_json.supertypes, + variables_to_inline: grammar_json.inline, + precedence_orderings: grammar_json.precedences, variables, extra_symbols, - expected_conflicts, external_tokens, - supertype_symbols, - variables_to_inline, }) } diff --git a/cli/src/generate/prepare_grammar/expand_repeats.rs b/cli/src/generate/prepare_grammar/expand_repeats.rs index 0660f06e..5a678365 100644 --- a/cli/src/generate/prepare_grammar/expand_repeats.rs +++ b/cli/src/generate/prepare_grammar/expand_repeats.rs @@ -285,9 +285,10 @@ mod tests { variables, extra_symbols: Vec::new(), external_tokens: Vec::new(), + supertype_symbols: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), - supertype_symbols: Vec::new(), + precedence_orderings: Vec::new(), word_token: None, } } diff --git a/cli/src/generate/prepare_grammar/extract_default_aliases.rs b/cli/src/generate/prepare_grammar/extract_default_aliases.rs index 9dabdf60..eb0062d0 100644 --- a/cli/src/generate/prepare_grammar/extract_default_aliases.rs +++ b/cli/src/generate/prepare_grammar/extract_default_aliases.rs @@ -197,11 +197,12 @@ mod tests { }], }, ], - extra_symbols: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), - supertype_symbols: Vec::new(), - external_tokens: Vec::new(), + extra_symbols: vec![], + external_tokens: vec![], + supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], word_token: None, }; diff --git a/cli/src/generate/prepare_grammar/extract_tokens.rs b/cli/src/generate/prepare_grammar/extract_tokens.rs index ae6e7244..ae9e4547 100644 --- a/cli/src/generate/prepare_grammar/extract_tokens.rs +++ b/cli/src/generate/prepare_grammar/extract_tokens.rs @@ -155,6 +155,7 @@ pub(super) fn extract_tokens( supertype_symbols, external_tokens, word_token, + precedence_orderings: grammar.precedence_orderings, }, ExtractedLexicalGrammar { variables: lexical_variables, @@ -494,9 +495,10 @@ mod test { variables, extra_symbols: Vec::new(), external_tokens: Vec::new(), + supertype_symbols: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), - supertype_symbols: Vec::new(), + precedence_orderings: Vec::new(), word_token: None, } } diff --git a/cli/src/generate/prepare_grammar/flatten_grammar.rs b/cli/src/generate/prepare_grammar/flatten_grammar.rs index e7d792ca..4d50335f 100644 --- a/cli/src/generate/prepare_grammar/flatten_grammar.rs +++ b/cli/src/generate/prepare_grammar/flatten_grammar.rs @@ -208,6 +208,7 @@ unless they are used only as the grammar's start rule. extra_symbols: grammar.extra_symbols, expected_conflicts: grammar.expected_conflicts, variables_to_inline: grammar.variables_to_inline, + precedence_orderings: grammar.precedence_orderings, external_tokens: grammar.external_tokens, supertype_symbols: grammar.supertype_symbols, word_token: grammar.word_token, diff --git a/cli/src/generate/prepare_grammar/intern_symbols.rs b/cli/src/generate/prepare_grammar/intern_symbols.rs index 276f13ff..ace81b34 100644 --- a/cli/src/generate/prepare_grammar/intern_symbols.rs +++ b/cli/src/generate/prepare_grammar/intern_symbols.rs @@ -87,6 +87,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result variables_to_inline, supertype_symbols, word_token, + precedence_orderings: grammar.precedence_orderings.clone(), }) } @@ -244,9 +245,10 @@ mod tests { name: "the_language".to_string(), extra_symbols: Vec::new(), external_tokens: Vec::new(), + supertype_symbols: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), - supertype_symbols: Vec::new(), + precedence_orderings: Vec::new(), word_token: None, } } diff --git a/cli/src/generate/prepare_grammar/mod.rs b/cli/src/generate/prepare_grammar/mod.rs index 8b094c56..963727c4 100644 --- a/cli/src/generate/prepare_grammar/mod.rs +++ b/cli/src/generate/prepare_grammar/mod.rs @@ -6,6 +6,13 @@ mod flatten_grammar; mod intern_symbols; mod process_inlines; +use super::Error; +use std::{ + cmp::Ordering, + collections::{hash_map, HashMap}, + mem, +}; + use self::expand_repeats::expand_repeats; pub(crate) use self::expand_tokens::expand_tokens; use self::extract_default_aliases::extract_default_aliases; @@ -23,6 +30,7 @@ pub(crate) struct IntermediateGrammar { variables: Vec, extra_symbols: Vec, expected_conflicts: Vec>, + precedence_orderings: Vec>, external_tokens: Vec, variables_to_inline: Vec, supertype_symbols: Vec, @@ -39,6 +47,8 @@ pub(crate) struct ExtractedLexicalGrammar { pub separators: Vec, } +/// Transform an input grammar into separate components that are ready +/// for parse table construction. pub(crate) fn prepare_grammar( input_grammar: &InputGrammar, ) -> Result<( @@ -47,6 +57,8 @@ pub(crate) fn prepare_grammar( InlinedProductionMap, AliasMap, )> { + validate_precedence_orderings(&input_grammar.precedence_orderings)?; + let interned_grammar = intern_symbols(input_grammar)?; let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?; let syntax_grammar = expand_repeats(syntax_grammar); @@ -56,3 +68,38 @@ pub(crate) fn prepare_grammar( let inlines = process_inlines(&syntax_grammar); Ok((syntax_grammar, lexical_grammar, inlines, default_aliases)) } + +/// Make sure that there are no conflicting orderings. For any two precedence +/// names `a` and `b`, if `a` comes before `b` in some list, then it cannot come +// *after* `b` in any list. +fn validate_precedence_orderings(order_lists: &[Vec]) -> Result<()> { + let mut pairs = HashMap::new(); + for list in order_lists { + for (i, mut name1) in list.iter().enumerate() { + for mut name2 in list.iter().skip(i + 1) { + if name2 == name1 { + continue; + } + let mut ordering = Ordering::Greater; + if name1 > name2 { + ordering = Ordering::Less; + mem::swap(&mut name1, &mut name2); + } + match pairs.entry((name1, name2)) { + hash_map::Entry::Vacant(e) => { + e.insert(ordering); + } + hash_map::Entry::Occupied(e) => { + if e.get() != &ordering { + return Err(Error::new(format!( + "Conflicting orderings for precedences '{}' and '{}'", + name1, name2 + ))); + } + } + } + } + } + } + Ok(()) +} diff --git a/cli/src/generate/prepare_grammar/process_inlines.rs b/cli/src/generate/prepare_grammar/process_inlines.rs index 91d730e6..510f6b7c 100644 --- a/cli/src/generate/prepare_grammar/process_inlines.rs +++ b/cli/src/generate/prepare_grammar/process_inlines.rs @@ -198,11 +198,12 @@ mod tests { #[test] fn test_basic_inlining() { let grammar = SyntaxGrammar { - expected_conflicts: Vec::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - supertype_symbols: Vec::new(), word_token: None, + extra_symbols: vec![], + external_tokens: vec![], + supertype_symbols: vec![], + expected_conflicts: vec![], + precedence_orderings: vec![], variables_to_inline: vec![Symbol::non_terminal(1)], variables: vec![ SyntaxVariable { @@ -329,10 +330,11 @@ mod tests { Symbol::non_terminal(2), Symbol::non_terminal(3), ], - expected_conflicts: Vec::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - supertype_symbols: Vec::new(), + extra_symbols: vec![], + external_tokens: vec![], + supertype_symbols: vec![], + expected_conflicts: vec![], + precedence_orderings: vec![], word_token: None, }; let inline_map = process_inlines(&grammar); @@ -431,10 +433,11 @@ mod tests { }], }, ], - expected_conflicts: Vec::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - supertype_symbols: Vec::new(), + extra_symbols: vec![], + external_tokens: vec![], + supertype_symbols: vec![], + expected_conflicts: vec![], + precedence_orderings: vec![], word_token: None, }; diff --git a/cli/src/generate/rules.rs b/cli/src/generate/rules.rs index 60a1abc7..6f410207 100644 --- a/cli/src/generate/rules.rs +++ b/cli/src/generate/rules.rs @@ -160,14 +160,6 @@ impl Alias { } impl Precedence { - pub fn as_integer(&self) -> i32 { - if let Precedence::Integer(i) = self { - *i - } else { - 0 - } - } - pub fn is_none(&self) -> bool { matches!(self, Precedence::None) } diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index 202dcd70..cb64a5cd 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -248,6 +248,16 @@ fn test_feature_corpus_files() { failure_count += 1; } } else { + if let Err(e) = &generate_result { + eprintln!( + "Unexpected error for test grammar '{}':\n{}", + language_name, + e.message() + ); + failure_count += 1; + continue; + } + let corpus_path = test_path.join("corpus.txt"); let c_code = generate_result.unwrap().1; let language = get_test_language(language_name, &c_code, Some(&test_path)); @@ -390,7 +400,9 @@ fn flatten_tests(test: TestEntry) -> Vec<(String, Vec, String, bool)> { } result.push((name, input, output, has_fields)); } - TestEntry::Group { mut name, children, .. } => { + TestEntry::Group { + mut name, children, .. + } => { if !prefix.is_empty() { name.insert_str(0, " - "); name.insert_str(0, prefix); diff --git a/test/fixtures/test_grammars/named_precedences/corpus.txt b/test/fixtures/test_grammars/named_precedences/corpus.txt new file mode 100644 index 00000000..bc64e925 --- /dev/null +++ b/test/fixtures/test_grammars/named_precedences/corpus.txt @@ -0,0 +1,12 @@ +============= +Declarations +============= + +A||B c = d; +E.F g = h; + +============= +Expressions +============= + +a || b.c; diff --git a/test/fixtures/test_grammars/named_precedences/grammar.json b/test/fixtures/test_grammars/named_precedences/grammar.json new file mode 100644 index 00000000..ad64ce2c --- /dev/null +++ b/test/fixtures/test_grammars/named_precedences/grammar.json @@ -0,0 +1,167 @@ +{ + "name": "named_precedences", + + "extras": [ + { + "type": "PATTERN", + "value": "\\s+" + } + ], + + "precedences": [ + [ + "member", + "and", + "or" + ], + [ + "type_member", + "type_intersection", + "type_union" + ] + ], + + "conflicts": [ + ["expression", "type"], + ["expression", "nested_type"] + ], + + "rules": { + "program": { + "type": "REPEAT", + "content": { + "type": "CHOICE", + "members": [ + {"type": "SYMBOL", "name": "expression_statement"}, + {"type": "SYMBOL", "name": "declaration_statement"} + ] + } + }, + + "expression_statement": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "expression"}, + {"type": "STRING", "value": ";"} + ] + }, + + "declaration_statement": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "type"}, + {"type": "SYMBOL", "name": "expression"}, + {"type": "STRING", "value": ";"} + ] + }, + + "expression": { + "type": "CHOICE", + "members": [ + {"type": "SYMBOL", "name": "member_expression"}, + {"type": "SYMBOL", "name": "binary_expression"}, + {"type": "SYMBOL", "name": "identifier"} + ] + }, + + "member_expression": { + "type": "PREC", + "value": "member", + "content": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "expression"}, + {"type": "STRING", "value": "."}, + {"type": "SYMBOL", "name": "identifier"} + ] + } + }, + + "binary_expression": { + "type": "CHOICE", + "members": [ + { + "type": "PREC_LEFT", + "value": "or", + "content": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "expression"}, + {"type": "STRING", "value": "||"}, + {"type": "SYMBOL", "name": "expression"} + ] + } + }, + { + "type": "PREC_LEFT", + "value": "and", + "content": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "expression"}, + {"type": "STRING", "value": "&&"}, + {"type": "SYMBOL", "name": "expression"} + ] + } + } + ] + }, + + "type": { + "type": "CHOICE", + "members": [ + {"type": "SYMBOL", "name": "nested_type"}, + {"type": "SYMBOL", "name": "binary_type"}, + {"type": "SYMBOL", "name": "identifier"} + ] + }, + + "nested_type": { + "type": "PREC", + "value": "type_member", + "content": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "identifier"}, + {"type": "STRING", "value": "."}, + {"type": "SYMBOL", "name": "identifier"} + ] + } + }, + + "binary_type": { + "type": "CHOICE", + "members": [ + { + "type": "PREC_LEFT", + "value": "type_union", + "content": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "type"}, + {"type": "STRING", "value": "||"}, + {"type": "SYMBOL", "name": "type"} + ] + } + }, + { + "type": "PREC_LEFT", + "value": "type_intersection", + "content": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "type"}, + {"type": "STRING", "value": "&&"}, + {"type": "SYMBOL", "name": "type"} + ] + } + } + ] + }, + + "identifier": { + "type": "PATTERN", + "value": "[a-z]\\w+" + } + } +} diff --git a/test/fixtures/test_grammars/named_precedences/readme.txt b/test/fixtures/test_grammars/named_precedences/readme.txt new file mode 100644 index 00000000..d3fdfd4e --- /dev/null +++ b/test/fixtures/test_grammars/named_precedences/readme.txt @@ -0,0 +1,3 @@ +This grammar uses named precedences, which have a partial order specified via the grammar's `precedences` field. Named +precedences allow certain conflicts to be resolved statically without accidentally resolving *other* conflicts, which +are intended to be resolved dynamically. From d8a235faa1cc1b75c4d00abc61c9fd05c21f6377 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 25 Feb 2021 11:54:21 -0800 Subject: [PATCH 3/3] Add further static validation of named precedences --- cli/src/generate/prepare_grammar/mod.rs | 141 ++++++++++++++++++++++-- 1 file changed, 133 insertions(+), 8 deletions(-) diff --git a/cli/src/generate/prepare_grammar/mod.rs b/cli/src/generate/prepare_grammar/mod.rs index 963727c4..92683c41 100644 --- a/cli/src/generate/prepare_grammar/mod.rs +++ b/cli/src/generate/prepare_grammar/mod.rs @@ -6,10 +6,10 @@ mod flatten_grammar; mod intern_symbols; mod process_inlines; -use super::Error; +use super::{rules::Precedence, Error}; use std::{ cmp::Ordering, - collections::{hash_map, HashMap}, + collections::{hash_map, HashMap, HashSet}, mem, }; @@ -57,7 +57,7 @@ pub(crate) fn prepare_grammar( InlinedProductionMap, AliasMap, )> { - validate_precedence_orderings(&input_grammar.precedence_orderings)?; + validate_named_precedences(input_grammar)?; let interned_grammar = intern_symbols(input_grammar)?; let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?; @@ -69,12 +69,14 @@ pub(crate) fn prepare_grammar( Ok((syntax_grammar, lexical_grammar, inlines, default_aliases)) } -/// Make sure that there are no conflicting orderings. For any two precedence -/// names `a` and `b`, if `a` comes before `b` in some list, then it cannot come -// *after* `b` in any list. -fn validate_precedence_orderings(order_lists: &[Vec]) -> Result<()> { +/// Check that all of the named precedences used in the grammar are declared +/// within the `precedences` lists, and also that there are no conflicting +/// precedence orderings declared in those lists. +fn validate_named_precedences(grammar: &InputGrammar) -> Result<()> { + // For any two precedence names `a` and `b`, if `a` comes before `b` + // in some list, then it cannot come *after* `b` in any list. let mut pairs = HashMap::new(); - for list in order_lists { + for list in &grammar.precedence_orderings { for (i, mut name1) in list.iter().enumerate() { for mut name2 in list.iter().skip(i + 1) { if name2 == name1 { @@ -101,5 +103,128 @@ fn validate_precedence_orderings(order_lists: &[Vec]) -> Result<()> { } } } + + // Check that no rule contains a named precedence that is not present in + // any of the `precedences` lists. + fn validate(rule_name: &str, rule: &Rule, names: &HashSet<&String>) -> Result<()> { + match rule { + Rule::Repeat(rule) => validate(rule_name, rule, names), + Rule::Seq(elements) | Rule::Choice(elements) => elements + .iter() + .map(|e| validate(rule_name, e, names)) + .collect(), + Rule::Metadata { rule, params } => { + if let Precedence::Name(n) = ¶ms.precedence { + if !names.contains(n) { + return Err(Error::new(format!( + "Undeclared precedence '{}' in rule '{}'", + n, rule_name + ))); + } + } + validate(rule_name, rule, names)?; + Ok(()) + } + _ => Ok(()), + } + } + + let precedence_names = grammar + .precedence_orderings + .iter() + .flat_map(|l| l.iter()) + .collect::>(); + for variable in &grammar.variables { + validate(&variable.name, &variable.rule, &precedence_names)?; + } + Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::generate::grammars::{InputGrammar, Variable, VariableType}; + + #[test] + fn test_validate_named_precedences_with_undeclared_precedence() { + let grammar = InputGrammar { + name: String::new(), + word_token: None, + extra_symbols: vec![], + external_tokens: vec![], + supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![ + vec!["a".to_string(), "b".to_string()], + vec!["b".to_string(), "c".to_string(), "d".to_string()], + ], + variables: vec![ + Variable { + name: "v1".to_string(), + kind: VariableType::Named, + rule: Rule::Seq(vec![ + Rule::prec_left(Precedence::Name("b".to_string()), Rule::string("w")), + Rule::prec(Precedence::Name("c".to_string()), Rule::string("x")), + ]), + }, + Variable { + name: "v2".to_string(), + kind: VariableType::Named, + rule: Rule::repeat(Rule::Choice(vec![ + Rule::prec_left(Precedence::Name("omg".to_string()), Rule::string("y")), + Rule::prec(Precedence::Name("c".to_string()), Rule::string("z")), + ])), + }, + ], + }; + + let result = validate_named_precedences(&grammar); + assert_eq!( + result.unwrap_err().message(), + "Undeclared precedence 'omg' in rule 'v2'", + ); + } + + #[test] + fn test_validate_named_precedences_with_conflicting_order() { + let grammar = InputGrammar { + name: String::new(), + word_token: None, + extra_symbols: vec![], + external_tokens: vec![], + supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![ + vec!["a".to_string(), "b".to_string()], + vec!["b".to_string(), "c".to_string(), "a".to_string()], + ], + variables: vec![ + Variable { + name: "v1".to_string(), + kind: VariableType::Named, + rule: Rule::Seq(vec![ + Rule::prec_left(Precedence::Name("b".to_string()), Rule::string("w")), + Rule::prec(Precedence::Name("c".to_string()), Rule::string("x")), + ]), + }, + Variable { + name: "v2".to_string(), + kind: VariableType::Named, + rule: Rule::repeat(Rule::Choice(vec![ + Rule::prec_left(Precedence::Name("a".to_string()), Rule::string("y")), + Rule::prec(Precedence::Name("c".to_string()), Rule::string("z")), + ])), + }, + ], + }; + + let result = validate_named_precedences(&grammar); + assert_eq!( + result.unwrap_err().message(), + "Conflicting orderings for precedences 'a' and 'b'", + ); + } +}