diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 205c8f0c..4f3ffe28 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -5,16 +5,15 @@ use crate::generate::grammars::{ InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType, }; use crate::generate::node_types::VariableInfo; -use crate::generate::rules::{Associativity, Symbol, SymbolType, TokenSet}; +use crate::generate::rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet}; use crate::generate::tables::{ FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, ProductionInfo, ProductionInfoId, }; -use core::ops::Range; -use std::collections::hash_map::Entry; use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; use std::fmt::Write; use std::u32; +use std::{cmp::Ordering, collections::hash_map::Entry}; // For conflict reporting, each parse state is associated with an example // sequence of symbols that could lead to that parse state. @@ -29,6 +28,14 @@ struct AuxiliarySymbolInfo { parent_symbols: Vec, } +#[derive(Debug, Default)] +struct ReductionInfo { + precedence: Precedence, + has_left_assoc: bool, + has_right_assoc: bool, + has_non_assoc: bool, +} + struct ParseStateQueueEntry { state_id: ParseStateId, preceding_auxiliary_symbols: AuxiliarySymbolSequence, @@ -118,8 +125,6 @@ impl<'a> ParseTableBuilder<'a> { )?; } - self.remove_precedences(); - Ok((self.parse_table, self.parse_state_info_by_id)) } @@ -179,6 +184,7 @@ impl<'a> ParseTableBuilder<'a> { let mut terminal_successors = BTreeMap::new(); let mut non_terminal_successors = BTreeMap::new(); let mut lookaheads_with_conflicts = TokenSet::new(); + let mut reduction_infos = HashMap::::new(); // Each item in the item set contributes to either or a Shift action or a Reduce // action in this state. @@ -217,31 +223,50 @@ impl<'a> ParseTableBuilder<'a> { ParseAction::Reduce { symbol: Symbol::non_terminal(item.variable_index as usize), child_count: item.step_index as usize, - precedence: item.precedence(), - associativity: item.associativity(), dynamic_precedence: item.production.dynamic_precedence, production_id: self.get_production_id(item), } }; + let precedence = item.precedence(); + let associativity = item.associativity(); for lookahead in lookaheads.iter() { - let entry = self.parse_table.states[state_id] + let table_entry = self.parse_table.states[state_id] .terminal_entries - .entry(lookahead); - let entry = entry.or_insert_with(|| ParseTableEntry::new()); + .entry(lookahead) + .or_insert_with(|| ParseTableEntry::new()); + let reduction_info = reduction_infos.entry(lookahead).or_default(); // While inserting Reduce actions, eagerly resolve conflicts related // to precedence: avoid inserting lower-precedence reductions, and // clear the action list when inserting higher-precedence reductions. - if entry.actions.is_empty() { - entry.actions.push(action); - } else if action.precedence() > entry.actions[0].precedence() { - entry.actions.clear(); - entry.actions.push(action); - lookaheads_with_conflicts.remove(&lookahead); - } else if action.precedence() == entry.actions[0].precedence() { - entry.actions.push(action); - lookaheads_with_conflicts.insert(lookahead); + if table_entry.actions.is_empty() { + table_entry.actions.push(action); + } else { + match Self::compare_precedence( + &self.syntax_grammar, + precedence, + &reduction_info.precedence, + ) { + Ordering::Greater => { + table_entry.actions.clear(); + table_entry.actions.push(action); + lookaheads_with_conflicts.remove(&lookahead); + *reduction_info = ReductionInfo::default(); + } + Ordering::Equal => { + table_entry.actions.push(action); + lookaheads_with_conflicts.insert(lookahead); + } + Ordering::Less => continue, + } + } + + reduction_info.precedence = precedence.clone(); + match associativity { + Some(Associativity::Left) => reduction_info.has_left_assoc = true, + Some(Associativity::Right) => reduction_info.has_right_assoc = true, + None => reduction_info.has_non_assoc = true, } } } @@ -302,6 +327,7 @@ impl<'a> ParseTableBuilder<'a> { &preceding_symbols, &preceding_auxiliary_symbols, symbol, + reduction_infos.get(&symbol).unwrap(), )?; } @@ -381,6 +407,7 @@ impl<'a> ParseTableBuilder<'a> { preceding_symbols: &SymbolSequence, preceding_auxiliary_symbols: &Vec, conflicting_lookahead: Symbol, + reduction_info: &ReductionInfo, ) -> Result<()> { let entry = self.parse_table.states[state_id] .terminal_entries @@ -393,9 +420,8 @@ impl<'a> ParseTableBuilder<'a> { // sorted out ahead of time in `add_actions`. But there can still be // REDUCE-REDUCE conflicts where all actions have the *same* // precedence, and there can still be SHIFT/REDUCE conflicts. - let reduce_precedence = entry.actions[0].precedence(); let mut considered_associativity = false; - let mut shift_precedence: Option> = None; + let mut shift_precedence: Vec<&Precedence> = Vec::new(); let mut conflicting_items = HashSet::new(); for (item, lookaheads) in &item_set.entries { if let Some(step) = item.step() { @@ -409,15 +435,9 @@ impl<'a> ParseTableBuilder<'a> { conflicting_items.insert(item); } - let precedence = item.precedence(); - if let Some(range) = &mut shift_precedence { - if precedence < range.start { - range.start = precedence; - } else if precedence > range.end { - range.end = precedence; - } - } else { - shift_precedence = Some(precedence..precedence); + let p = item.precedence(); + if let Err(i) = shift_precedence.binary_search(&p) { + shift_precedence.insert(i, p); } } } @@ -429,8 +449,6 @@ impl<'a> ParseTableBuilder<'a> { } if let ParseAction::Shift { is_repetition, .. } = entry.actions.last_mut().unwrap() { - let shift_precedence = shift_precedence.unwrap_or(0..0); - // If all of the items in the conflict have the same parent symbol, // and that parent symbols is auxiliary, then this is just the intentional // ambiguity associated with a repeat rule. Resolve that class of ambiguity @@ -448,40 +466,37 @@ impl<'a> ParseTableBuilder<'a> { } // If the SHIFT action has higher precedence, remove all the REDUCE actions. - if shift_precedence.start > reduce_precedence - || (shift_precedence.start == reduce_precedence - && shift_precedence.end > reduce_precedence) - { + let mut shift_is_less = false; + let mut shift_is_more = false; + for p in shift_precedence { + match Self::compare_precedence(&self.syntax_grammar, p, &reduction_info.precedence) + { + Ordering::Greater => shift_is_more = true, + Ordering::Less => shift_is_less = true, + Ordering::Equal => {} + } + } + + if shift_is_more && !shift_is_less { entry.actions.drain(0..entry.actions.len() - 1); } // If the REDUCE actions have higher precedence, remove the SHIFT action. - else if shift_precedence.end < reduce_precedence - || (shift_precedence.end == reduce_precedence - && shift_precedence.start < reduce_precedence) - { + else if shift_is_less && !shift_is_more { entry.actions.pop(); conflicting_items.retain(|item| item.is_done()); } // If the SHIFT and REDUCE actions have the same predence, consider // the REDUCE actions' associativity. - else if shift_precedence == (reduce_precedence..reduce_precedence) { + else if !shift_is_less && !shift_is_more { considered_associativity = true; - let mut has_left = false; - let mut has_right = false; - let mut has_non = false; - for action in &entry.actions { - if let ParseAction::Reduce { associativity, .. } = action { - match associativity { - Some(Associativity::Left) => has_left = true, - Some(Associativity::Right) => has_right = true, - None => has_non = true, - } - } - } // If all Reduce actions are left associative, remove the SHIFT action. // If all Reduce actions are right associative, remove the REDUCE actions. - match (has_left, has_non, has_right) { + match ( + reduction_info.has_left_assoc, + reduction_info.has_non_assoc, + reduction_info.has_right_assoc, + ) { (true, false, false) => { entry.actions.pop(); conflicting_items.retain(|item| item.is_done()); @@ -595,7 +610,7 @@ impl<'a> ParseTableBuilder<'a> { "(precedence: {}, associativity: {:?})", precedence, associativity )) - } else if precedence != 0 { + } else if !precedence.is_none() { Some(format!("(precedence: {})", precedence)) } else { None @@ -714,6 +729,47 @@ impl<'a> ParseTableBuilder<'a> { Err(Error::new(msg)) } + fn compare_precedence( + grammar: &SyntaxGrammar, + left: &Precedence, + right: &Precedence, + ) -> Ordering { + match (left, right) { + // Integer precedences can be compared to other integer precedences, + // and to the default precedence, which is zero. + (Precedence::Integer(l), Precedence::Integer(r)) => l.cmp(r), + (Precedence::Integer(l), Precedence::None) => l.cmp(&0), + (Precedence::None, Precedence::Integer(r)) => 0.cmp(&r), + + // Named precedences can be compared to other named precedences. + (Precedence::Name(l), Precedence::Name(r)) => grammar + .precedence_orderings + .iter() + .find_map(|list| { + let mut saw_left = false; + let mut saw_right = false; + for name in list { + if name == l { + saw_left = true; + if saw_right { + return Some(Ordering::Less); + } + } else if name == r { + saw_right = true; + if saw_left { + return Some(Ordering::Greater); + } + } + } + None + }) + .unwrap_or(Ordering::Equal), + + // Other combinations of precedence types are not comparable. + _ => Ordering::Equal, + } + } + fn get_auxiliary_node_info( &self, item_set: &ParseItemSet, @@ -739,26 +795,6 @@ impl<'a> ParseTableBuilder<'a> { } } - fn remove_precedences(&mut self) { - for state in self.parse_table.states.iter_mut() { - for (_, entry) in state.terminal_entries.iter_mut() { - for action in entry.actions.iter_mut() { - match action { - ParseAction::Reduce { - precedence, - associativity, - .. - } => { - *precedence = 0; - *associativity = None; - } - _ => {} - } - } - } - } - } - fn get_production_id(&mut self, item: &ParseItem) -> ProductionInfoId { let mut production_info = ProductionInfo { alias_sequence: Vec::new(), diff --git a/cli/src/generate/build_tables/item.rs b/cli/src/generate/build_tables/item.rs index f0e5d381..e3205cf8 100644 --- a/cli/src/generate/build_tables/item.rs +++ b/cli/src/generate/build_tables/item.rs @@ -1,8 +1,5 @@ -use crate::generate::grammars::{ - LexicalGrammar, Production, ProductionStep, SyntaxGrammar, -}; -use crate::generate::rules::Associativity; -use crate::generate::rules::{Symbol, SymbolType, TokenSet}; +use crate::generate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar}; +use crate::generate::rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet}; use lazy_static::lazy_static; use std::cmp::Ordering; use std::fmt; @@ -17,7 +14,7 @@ lazy_static! { index: 0, kind: SymbolType::NonTerminal, }, - precedence: 0, + precedence: Precedence::None, associativity: None, alias: None, field_name: None, @@ -82,8 +79,9 @@ impl<'a> ParseItem<'a> { self.prev_step().and_then(|step| step.associativity) } - pub fn precedence(&self) -> i32 { - self.prev_step().map_or(0, |step| step.precedence) + pub fn precedence(&self) -> &Precedence { + self.prev_step() + .map_or(&Precedence::None, |step| &step.precedence) } pub fn prev_step(&self) -> Option<&'a ProductionStep> { @@ -165,12 +163,12 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> { if i == self.0.step_index as usize { write!(f, " •")?; if let Some(associativity) = step.associativity { - if step.precedence != 0 { + if !step.precedence.is_none() { write!(f, " ({} {:?})", step.precedence, associativity)?; } else { write!(f, " ({:?})", associativity)?; } - } else if step.precedence != 0 { + } else if !step.precedence.is_none() { write!(f, " ({})", step.precedence)?; } } @@ -197,12 +195,12 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> { write!(f, " •")?; if let Some(step) = self.0.production.steps.last() { if let Some(associativity) = step.associativity { - if step.precedence != 0 { + if !step.precedence.is_none() { write!(f, " ({} {:?})", step.precedence, associativity)?; } else { write!(f, " ({:?})", associativity)?; } - } else if step.precedence != 0 { + } else if !step.precedence.is_none() { write!(f, " ({})", step.precedence)?; } } @@ -257,7 +255,7 @@ impl<'a> Hash for ParseItem<'a> { hasher.write_u32(self.step_index); hasher.write_i32(self.production.dynamic_precedence); hasher.write_usize(self.production.steps.len()); - hasher.write_i32(self.precedence()); + self.precedence().hash(hasher); self.associativity().hash(hasher); // When comparing two parse items, the symbols that were already consumed by diff --git a/cli/src/generate/build_tables/token_conflicts.rs b/cli/src/generate/build_tables/token_conflicts.rs index 64e7564b..223d3481 100644 --- a/cli/src/generate/build_tables/token_conflicts.rs +++ b/cli/src/generate/build_tables/token_conflicts.rs @@ -380,7 +380,7 @@ mod tests { use super::*; use crate::generate::grammars::{Variable, VariableType}; use crate::generate::prepare_grammar::{expand_tokens, ExtractedLexicalGrammar}; - use crate::generate::rules::{Rule, Symbol}; + use crate::generate::rules::{Precedence, Rule, Symbol}; #[test] fn test_starting_characters() { @@ -508,7 +508,7 @@ mod tests { Variable { name: "anything".to_string(), kind: VariableType::Named, - rule: Rule::prec(-1, Rule::pattern(".*")), + rule: Rule::prec(Precedence::Integer(-1), Rule::pattern(".*")), }, ], }) diff --git a/cli/src/generate/dsl.js b/cli/src/generate/dsl.js index 62fb1d70..1ecf0c66 100644 --- a/cli/src/generate/dsl.js +++ b/cli/src/generate/dsl.js @@ -225,7 +225,8 @@ function grammar(baseGrammar, options) { conflicts: [], externals: [], inline: [], - supertypes: [] + supertypes: [], + precedences: [], }; } @@ -362,11 +363,19 @@ function grammar(baseGrammar, options) { supertypes = supertypeRules.map(symbol => symbol.name); } + let precedences = baseGrammar.precedences; + if (options.precedences) { + if (typeof options.precedences !== "function") { + throw new Error("Grammar's 'precedences' property must be a function"); + } + precedences = options.precedences.call(null, baseGrammar.precedences); + } + if (Object.keys(rules).length == 0) { throw new Error("Grammar must have at least one rule."); } - return {name, word, rules, extras, conflicts, externals, inline, supertypes}; + return {name, word, rules, extras, conflicts, precedences, externals, inline, supertypes}; } function checkArguments(ruleCount, caller, callerName, suffix = '') { diff --git a/cli/src/generate/grammars.rs b/cli/src/generate/grammars.rs index 6cf325dd..ea9f6259 100644 --- a/cli/src/generate/grammars.rs +++ b/cli/src/generate/grammars.rs @@ -1,5 +1,5 @@ use super::nfa::Nfa; -use super::rules::{Alias, Associativity, Rule, Symbol}; +use super::rules::{Alias, Associativity, Precedence, Rule, Symbol}; use std::collections::HashMap; #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] @@ -25,6 +25,7 @@ pub(crate) struct InputGrammar { pub variables: Vec, pub extra_symbols: Vec, pub expected_conflicts: Vec>, + pub precedence_orderings: Vec>, pub external_tokens: Vec, pub variables_to_inline: Vec, pub supertype_symbols: Vec, @@ -52,7 +53,7 @@ pub(crate) struct LexicalGrammar { #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub(crate) struct ProductionStep { pub symbol: Symbol, - pub precedence: i32, + pub precedence: Precedence, pub associativity: Option, pub alias: Option, pub field_name: Option, @@ -93,6 +94,7 @@ pub(crate) struct SyntaxGrammar { pub supertype_symbols: Vec, pub variables_to_inline: Vec, pub word_token: Option, + pub precedence_orderings: Vec>, } #[cfg(test)] @@ -100,14 +102,18 @@ impl ProductionStep { pub(crate) fn new(symbol: Symbol) -> Self { Self { symbol, - precedence: 0, + precedence: Precedence::None, associativity: None, alias: None, field_name: None, } } - pub(crate) fn with_prec(self, precedence: i32, associativity: Option) -> Self { + pub(crate) fn with_prec( + self, + precedence: Precedence, + associativity: Option, + ) -> Self { Self { symbol: self.symbol, precedence, diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index bc5a836f..243f10fe 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -713,12 +713,13 @@ mod tests { fn test_node_types_simple() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), word_token: None, + extra_symbols: vec![], + external_tokens: vec![], supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], variables: vec![ Variable { name: "v1".to_string(), @@ -809,11 +810,12 @@ mod tests { let node_types = get_node_types(InputGrammar { name: String::new(), extra_symbols: vec![Rule::named("v3")], - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), word_token: None, + external_tokens: vec![], supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], variables: vec![ Variable { name: "v1".to_string(), @@ -914,11 +916,12 @@ mod tests { fn test_node_types_with_supertypes() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), word_token: None, + extra_symbols: vec![], + external_tokens: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], supertype_symbols: vec!["_v2".to_string()], variables: vec![ Variable { @@ -1001,12 +1004,13 @@ mod tests { fn test_node_types_for_children_without_fields() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), word_token: None, + extra_symbols: vec![], + external_tokens: vec![], supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], variables: vec![ Variable { name: "v1".to_string(), @@ -1100,11 +1104,12 @@ mod tests { let node_types = get_node_types(InputGrammar { name: String::new(), word_token: None, - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: vec!["v2".to_string()], + extra_symbols: vec![], + external_tokens: vec![], supertype_symbols: vec![], + expected_conflicts: vec![], + precedence_orderings: vec![], + variables_to_inline: vec!["v2".to_string()], variables: vec![ Variable { name: "v1".to_string(), @@ -1154,12 +1159,13 @@ mod tests { fn test_node_types_for_aliased_nodes() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), word_token: None, + extra_symbols: vec![], + external_tokens: vec![], supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], variables: vec![ Variable { name: "thing".to_string(), @@ -1230,12 +1236,13 @@ mod tests { fn test_node_types_with_multiple_valued_fields() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), word_token: None, + extra_symbols: vec![], + external_tokens: vec![], supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], variables: vec![ Variable { name: "a".to_string(), @@ -1298,12 +1305,13 @@ mod tests { fn test_node_types_with_fields_on_hidden_tokens() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), word_token: None, + extra_symbols: vec![], + external_tokens: vec![], supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], variables: vec![Variable { name: "script".to_string(), kind: VariableType::Named, @@ -1330,12 +1338,13 @@ mod tests { fn test_node_types_with_multiple_rules_same_alias_name() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), word_token: None, + extra_symbols: vec![], + external_tokens: vec![], supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], variables: vec![ Variable { name: "script".to_string(), @@ -1456,12 +1465,13 @@ mod tests { fn test_node_types_with_tokens_aliased_to_match_rules() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), word_token: None, + extra_symbols: vec![], + external_tokens: vec![], supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], variables: vec![ Variable { name: "a".to_string(), diff --git a/cli/src/generate/parse_grammar.rs b/cli/src/generate/parse_grammar.rs index c01dbd99..47217a69 100644 --- a/cli/src/generate/parse_grammar.rs +++ b/cli/src/generate/parse_grammar.rs @@ -1,5 +1,5 @@ use super::grammars::{InputGrammar, Variable, VariableType}; -use super::rules::Rule; +use super::rules::{Precedence, Rule}; use crate::error::Result; use serde_derive::Deserialize; use serde_json::{Map, Value}; @@ -44,15 +44,15 @@ enum RuleJSON { content: Box, }, PREC_LEFT { - value: i32, + value: PrecedenceJSON, content: Box, }, PREC_RIGHT { - value: i32, + value: PrecedenceJSON, content: Box, }, PREC { - value: i32, + value: PrecedenceJSON, content: Box, }, TOKEN { @@ -63,15 +63,29 @@ enum RuleJSON { }, } +#[derive(Deserialize)] +#[serde(untagged)] +enum PrecedenceJSON { + Integer(i32), + Name(String), +} + #[derive(Deserialize)] pub(crate) struct GrammarJSON { pub(crate) name: String, rules: Map, - conflicts: Option>>, - externals: Option>, - extras: Option>, - inline: Option>, - supertypes: Option>, + #[serde(default)] + precedences: Vec>, + #[serde(default)] + conflicts: Vec>, + #[serde(default)] + externals: Vec, + #[serde(default)] + extras: Vec, + #[serde(default)] + inline: Vec, + #[serde(default)] + supertypes: Vec, word: Option, } @@ -87,31 +101,19 @@ pub(crate) fn parse_grammar(input: &str) -> Result { }) } - let extra_symbols = grammar_json - .extras - .unwrap_or(Vec::new()) - .into_iter() - .map(parse_rule) - .collect(); - let external_tokens = grammar_json - .externals - .unwrap_or(Vec::new()) - .into_iter() - .map(parse_rule) - .collect(); - let expected_conflicts = grammar_json.conflicts.unwrap_or(Vec::new()); - let variables_to_inline = grammar_json.inline.unwrap_or(Vec::new()); - let supertype_symbols = grammar_json.supertypes.unwrap_or(Vec::new()); + let extra_symbols = grammar_json.extras.into_iter().map(parse_rule).collect(); + let external_tokens = grammar_json.externals.into_iter().map(parse_rule).collect(); Ok(InputGrammar { name: grammar_json.name, word_token: grammar_json.word, + expected_conflicts: grammar_json.conflicts, + supertype_symbols: grammar_json.supertypes, + variables_to_inline: grammar_json.inline, + precedence_orderings: grammar_json.precedences, variables, extra_symbols, - expected_conflicts, external_tokens, - supertype_symbols, - variables_to_inline, }) } @@ -133,9 +135,13 @@ fn parse_rule(json: RuleJSON) -> Rule { RuleJSON::REPEAT { content } => { Rule::choice(vec![Rule::repeat(parse_rule(*content)), Rule::Blank]) } - RuleJSON::PREC { value, content } => Rule::prec(value, parse_rule(*content)), - RuleJSON::PREC_LEFT { value, content } => Rule::prec_left(value, parse_rule(*content)), - RuleJSON::PREC_RIGHT { value, content } => Rule::prec_right(value, parse_rule(*content)), + RuleJSON::PREC { value, content } => Rule::prec(value.into(), parse_rule(*content)), + RuleJSON::PREC_LEFT { value, content } => { + Rule::prec_left(value.into(), parse_rule(*content)) + } + RuleJSON::PREC_RIGHT { value, content } => { + Rule::prec_right(value.into(), parse_rule(*content)) + } RuleJSON::PREC_DYNAMIC { value, content } => { Rule::prec_dynamic(value, parse_rule(*content)) } @@ -144,6 +150,15 @@ fn parse_rule(json: RuleJSON) -> Rule { } } +impl Into for PrecedenceJSON { + fn into(self) -> Precedence { + match self { + PrecedenceJSON::Integer(i) => Precedence::Integer(i), + PrecedenceJSON::Name(i) => Precedence::Name(i), + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/cli/src/generate/prepare_grammar/expand_repeats.rs b/cli/src/generate/prepare_grammar/expand_repeats.rs index 0660f06e..5a678365 100644 --- a/cli/src/generate/prepare_grammar/expand_repeats.rs +++ b/cli/src/generate/prepare_grammar/expand_repeats.rs @@ -285,9 +285,10 @@ mod tests { variables, extra_symbols: Vec::new(), external_tokens: Vec::new(), + supertype_symbols: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), - supertype_symbols: Vec::new(), + precedence_orderings: Vec::new(), word_token: None, } } diff --git a/cli/src/generate/prepare_grammar/expand_tokens.rs b/cli/src/generate/prepare_grammar/expand_tokens.rs index 23866551..b4c53a27 100644 --- a/cli/src/generate/prepare_grammar/expand_tokens.rs +++ b/cli/src/generate/prepare_grammar/expand_tokens.rs @@ -1,8 +1,11 @@ use super::ExtractedLexicalGrammar; -use crate::error::{Error, Result}; use crate::generate::grammars::{LexicalGrammar, LexicalVariable}; use crate::generate::nfa::{CharacterSet, Nfa, NfaState}; use crate::generate::rules::Rule; +use crate::{ + error::{Error, Result}, + generate::rules::Precedence, +}; use lazy_static::lazy_static; use regex::Regex; use regex_syntax::ast::{ @@ -46,10 +49,12 @@ fn get_implicit_precedence(rule: &Rule) -> i32 { } fn get_completion_precedence(rule: &Rule) -> i32 { - match rule { - Rule::Metadata { params, .. } => params.precedence.unwrap_or(0), - _ => 0, + if let Rule::Metadata { params, .. } = rule { + if let Precedence::Integer(p) = params.precedence { + return p; + } } + 0 } fn preprocess_regex(content: &str) -> String { @@ -187,11 +192,14 @@ impl NfaBuilder { } } Rule::Metadata { rule, params } => { - if let Some(precedence) = params.precedence { - self.precedence_stack.push(precedence); - } + let has_precedence = if let Precedence::Integer(precedence) = ¶ms.precedence { + self.precedence_stack.push(*precedence); + true + } else { + false + }; let result = self.expand_rule(rule, next_state_id); - if params.precedence.is_some() { + if has_precedence { self.precedence_stack.pop(); } result @@ -626,8 +634,8 @@ mod tests { // shorter tokens with higher precedence Row { rules: vec![ - Rule::prec(2, Rule::pattern("abc")), - Rule::prec(1, Rule::pattern("ab[cd]e")), + Rule::prec(Precedence::Integer(2), Rule::pattern("abc")), + Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e")), Rule::pattern("[a-e]+"), ], separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")], @@ -640,8 +648,11 @@ mod tests { // immediate tokens with higher precedence Row { rules: vec![ - Rule::prec(1, Rule::pattern("[^a]+")), - Rule::immediate_token(Rule::prec(2, Rule::pattern("[^ab]+"))), + Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+")), + Rule::immediate_token(Rule::prec( + Precedence::Integer(2), + Rule::pattern("[^ab]+"), + )), ], separators: vec![Rule::pattern("\\s")], examples: vec![("cccb", Some((1, "ccc")))], diff --git a/cli/src/generate/prepare_grammar/extract_default_aliases.rs b/cli/src/generate/prepare_grammar/extract_default_aliases.rs index 9dabdf60..eb0062d0 100644 --- a/cli/src/generate/prepare_grammar/extract_default_aliases.rs +++ b/cli/src/generate/prepare_grammar/extract_default_aliases.rs @@ -197,11 +197,12 @@ mod tests { }], }, ], - extra_symbols: Vec::new(), - expected_conflicts: Vec::new(), - variables_to_inline: Vec::new(), - supertype_symbols: Vec::new(), - external_tokens: Vec::new(), + extra_symbols: vec![], + external_tokens: vec![], + supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![], word_token: None, }; diff --git a/cli/src/generate/prepare_grammar/extract_tokens.rs b/cli/src/generate/prepare_grammar/extract_tokens.rs index ae6e7244..ae9e4547 100644 --- a/cli/src/generate/prepare_grammar/extract_tokens.rs +++ b/cli/src/generate/prepare_grammar/extract_tokens.rs @@ -155,6 +155,7 @@ pub(super) fn extract_tokens( supertype_symbols, external_tokens, word_token, + precedence_orderings: grammar.precedence_orderings, }, ExtractedLexicalGrammar { variables: lexical_variables, @@ -494,9 +495,10 @@ mod test { variables, extra_symbols: Vec::new(), external_tokens: Vec::new(), + supertype_symbols: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), - supertype_symbols: Vec::new(), + precedence_orderings: Vec::new(), word_token: None, } } diff --git a/cli/src/generate/prepare_grammar/flatten_grammar.rs b/cli/src/generate/prepare_grammar/flatten_grammar.rs index f2b43a04..4d50335f 100644 --- a/cli/src/generate/prepare_grammar/flatten_grammar.rs +++ b/cli/src/generate/prepare_grammar/flatten_grammar.rs @@ -3,12 +3,11 @@ use crate::error::{Error, Result}; use crate::generate::grammars::{ Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable, }; -use crate::generate::rules::Symbol; -use crate::generate::rules::{Alias, Associativity, Rule}; +use crate::generate::rules::{Alias, Associativity, Precedence, Rule, Symbol}; struct RuleFlattener { production: Production, - precedence_stack: Vec, + precedence_stack: Vec, associativity_stack: Vec, alias_stack: Vec, field_name_stack: Vec, @@ -45,9 +44,9 @@ impl RuleFlattener { } Rule::Metadata { rule, params } => { let mut has_precedence = false; - if let Some(precedence) = params.precedence { + if !params.precedence.is_none() { has_precedence = true; - self.precedence_stack.push(precedence); + self.precedence_stack.push(params.precedence); } let mut has_associativity = false; @@ -77,8 +76,11 @@ impl RuleFlattener { if has_precedence { self.precedence_stack.pop(); if did_push && !at_end { - self.production.steps.last_mut().unwrap().precedence = - self.precedence_stack.last().cloned().unwrap_or(0); + self.production.steps.last_mut().unwrap().precedence = self + .precedence_stack + .last() + .cloned() + .unwrap_or(Precedence::None); } } @@ -103,7 +105,11 @@ impl RuleFlattener { Rule::Symbol(symbol) => { self.production.steps.push(ProductionStep { symbol, - precedence: self.precedence_stack.last().cloned().unwrap_or(0), + precedence: self + .precedence_stack + .last() + .cloned() + .unwrap_or(Precedence::None), associativity: self.associativity_stack.last().cloned(), alias: self.alias_stack.last().cloned(), field_name: self.field_name_stack.last().cloned(), @@ -202,6 +208,7 @@ unless they are used only as the grammar's start rule. extra_symbols: grammar.extra_symbols, expected_conflicts: grammar.expected_conflicts, variables_to_inline: grammar.variables_to_inline, + precedence_orderings: grammar.precedence_orderings, external_tokens: grammar.external_tokens, supertype_symbols: grammar.supertype_symbols, word_token: grammar.word_token, @@ -223,12 +230,12 @@ mod tests { rule: Rule::seq(vec![ Rule::non_terminal(1), Rule::prec_left( - 101, + Precedence::Integer(101), Rule::seq(vec![ Rule::non_terminal(2), Rule::choice(vec![ Rule::prec_right( - 102, + Precedence::Integer(102), Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]), ), Rule::non_terminal(5), @@ -249,11 +256,11 @@ mod tests { steps: vec![ ProductionStep::new(Symbol::non_terminal(1)), ProductionStep::new(Symbol::non_terminal(2)) - .with_prec(101, Some(Associativity::Left)), + .with_prec(Precedence::Integer(101), Some(Associativity::Left)), ProductionStep::new(Symbol::non_terminal(3)) - .with_prec(102, Some(Associativity::Right)), + .with_prec(Precedence::Integer(102), Some(Associativity::Right)), ProductionStep::new(Symbol::non_terminal(4)) - .with_prec(101, Some(Associativity::Left)), + .with_prec(Precedence::Integer(101), Some(Associativity::Left)), ProductionStep::new(Symbol::non_terminal(6)), ProductionStep::new(Symbol::non_terminal(7)), ] @@ -263,9 +270,9 @@ mod tests { steps: vec![ ProductionStep::new(Symbol::non_terminal(1)), ProductionStep::new(Symbol::non_terminal(2)) - .with_prec(101, Some(Associativity::Left)), + .with_prec(Precedence::Integer(101), Some(Associativity::Left)), ProductionStep::new(Symbol::non_terminal(5)) - .with_prec(101, Some(Associativity::Left)), + .with_prec(Precedence::Integer(101), Some(Associativity::Left)), ProductionStep::new(Symbol::non_terminal(6)), ProductionStep::new(Symbol::non_terminal(7)), ] @@ -334,7 +341,7 @@ mod tests { name: "test".to_string(), kind: VariableType::Named, rule: Rule::prec_left( - 101, + Precedence::Integer(101), Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]), ), }) @@ -346,9 +353,9 @@ mod tests { dynamic_precedence: 0, steps: vec![ ProductionStep::new(Symbol::non_terminal(1)) - .with_prec(101, Some(Associativity::Left)), + .with_prec(Precedence::Integer(101), Some(Associativity::Left)), ProductionStep::new(Symbol::non_terminal(2)) - .with_prec(101, Some(Associativity::Left)), + .with_prec(Precedence::Integer(101), Some(Associativity::Left)), ] }] ); @@ -356,7 +363,10 @@ mod tests { let result = flatten_variable(Variable { name: "test".to_string(), kind: VariableType::Named, - rule: Rule::prec_left(101, Rule::seq(vec![Rule::non_terminal(1)])), + rule: Rule::prec_left( + Precedence::Integer(101), + Rule::seq(vec![Rule::non_terminal(1)]), + ), }) .unwrap(); @@ -365,7 +375,7 @@ mod tests { vec![Production { dynamic_precedence: 0, steps: vec![ProductionStep::new(Symbol::non_terminal(1)) - .with_prec(101, Some(Associativity::Left)),] + .with_prec(Precedence::Integer(101), Some(Associativity::Left)),] }] ); } diff --git a/cli/src/generate/prepare_grammar/intern_symbols.rs b/cli/src/generate/prepare_grammar/intern_symbols.rs index 276f13ff..ace81b34 100644 --- a/cli/src/generate/prepare_grammar/intern_symbols.rs +++ b/cli/src/generate/prepare_grammar/intern_symbols.rs @@ -87,6 +87,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result variables_to_inline, supertype_symbols, word_token, + precedence_orderings: grammar.precedence_orderings.clone(), }) } @@ -244,9 +245,10 @@ mod tests { name: "the_language".to_string(), extra_symbols: Vec::new(), external_tokens: Vec::new(), + supertype_symbols: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), - supertype_symbols: Vec::new(), + precedence_orderings: Vec::new(), word_token: None, } } diff --git a/cli/src/generate/prepare_grammar/mod.rs b/cli/src/generate/prepare_grammar/mod.rs index 8b094c56..92683c41 100644 --- a/cli/src/generate/prepare_grammar/mod.rs +++ b/cli/src/generate/prepare_grammar/mod.rs @@ -6,6 +6,13 @@ mod flatten_grammar; mod intern_symbols; mod process_inlines; +use super::{rules::Precedence, Error}; +use std::{ + cmp::Ordering, + collections::{hash_map, HashMap, HashSet}, + mem, +}; + use self::expand_repeats::expand_repeats; pub(crate) use self::expand_tokens::expand_tokens; use self::extract_default_aliases::extract_default_aliases; @@ -23,6 +30,7 @@ pub(crate) struct IntermediateGrammar { variables: Vec, extra_symbols: Vec, expected_conflicts: Vec>, + precedence_orderings: Vec>, external_tokens: Vec, variables_to_inline: Vec, supertype_symbols: Vec, @@ -39,6 +47,8 @@ pub(crate) struct ExtractedLexicalGrammar { pub separators: Vec, } +/// Transform an input grammar into separate components that are ready +/// for parse table construction. pub(crate) fn prepare_grammar( input_grammar: &InputGrammar, ) -> Result<( @@ -47,6 +57,8 @@ pub(crate) fn prepare_grammar( InlinedProductionMap, AliasMap, )> { + validate_named_precedences(input_grammar)?; + let interned_grammar = intern_symbols(input_grammar)?; let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?; let syntax_grammar = expand_repeats(syntax_grammar); @@ -56,3 +68,163 @@ pub(crate) fn prepare_grammar( let inlines = process_inlines(&syntax_grammar); Ok((syntax_grammar, lexical_grammar, inlines, default_aliases)) } + +/// Check that all of the named precedences used in the grammar are declared +/// within the `precedences` lists, and also that there are no conflicting +/// precedence orderings declared in those lists. +fn validate_named_precedences(grammar: &InputGrammar) -> Result<()> { + // For any two precedence names `a` and `b`, if `a` comes before `b` + // in some list, then it cannot come *after* `b` in any list. + let mut pairs = HashMap::new(); + for list in &grammar.precedence_orderings { + for (i, mut name1) in list.iter().enumerate() { + for mut name2 in list.iter().skip(i + 1) { + if name2 == name1 { + continue; + } + let mut ordering = Ordering::Greater; + if name1 > name2 { + ordering = Ordering::Less; + mem::swap(&mut name1, &mut name2); + } + match pairs.entry((name1, name2)) { + hash_map::Entry::Vacant(e) => { + e.insert(ordering); + } + hash_map::Entry::Occupied(e) => { + if e.get() != &ordering { + return Err(Error::new(format!( + "Conflicting orderings for precedences '{}' and '{}'", + name1, name2 + ))); + } + } + } + } + } + } + + // Check that no rule contains a named precedence that is not present in + // any of the `precedences` lists. + fn validate(rule_name: &str, rule: &Rule, names: &HashSet<&String>) -> Result<()> { + match rule { + Rule::Repeat(rule) => validate(rule_name, rule, names), + Rule::Seq(elements) | Rule::Choice(elements) => elements + .iter() + .map(|e| validate(rule_name, e, names)) + .collect(), + Rule::Metadata { rule, params } => { + if let Precedence::Name(n) = ¶ms.precedence { + if !names.contains(n) { + return Err(Error::new(format!( + "Undeclared precedence '{}' in rule '{}'", + n, rule_name + ))); + } + } + validate(rule_name, rule, names)?; + Ok(()) + } + _ => Ok(()), + } + } + + let precedence_names = grammar + .precedence_orderings + .iter() + .flat_map(|l| l.iter()) + .collect::>(); + for variable in &grammar.variables { + validate(&variable.name, &variable.rule, &precedence_names)?; + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::generate::grammars::{InputGrammar, Variable, VariableType}; + + #[test] + fn test_validate_named_precedences_with_undeclared_precedence() { + let grammar = InputGrammar { + name: String::new(), + word_token: None, + extra_symbols: vec![], + external_tokens: vec![], + supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![ + vec!["a".to_string(), "b".to_string()], + vec!["b".to_string(), "c".to_string(), "d".to_string()], + ], + variables: vec![ + Variable { + name: "v1".to_string(), + kind: VariableType::Named, + rule: Rule::Seq(vec![ + Rule::prec_left(Precedence::Name("b".to_string()), Rule::string("w")), + Rule::prec(Precedence::Name("c".to_string()), Rule::string("x")), + ]), + }, + Variable { + name: "v2".to_string(), + kind: VariableType::Named, + rule: Rule::repeat(Rule::Choice(vec![ + Rule::prec_left(Precedence::Name("omg".to_string()), Rule::string("y")), + Rule::prec(Precedence::Name("c".to_string()), Rule::string("z")), + ])), + }, + ], + }; + + let result = validate_named_precedences(&grammar); + assert_eq!( + result.unwrap_err().message(), + "Undeclared precedence 'omg' in rule 'v2'", + ); + } + + #[test] + fn test_validate_named_precedences_with_conflicting_order() { + let grammar = InputGrammar { + name: String::new(), + word_token: None, + extra_symbols: vec![], + external_tokens: vec![], + supertype_symbols: vec![], + expected_conflicts: vec![], + variables_to_inline: vec![], + precedence_orderings: vec![ + vec!["a".to_string(), "b".to_string()], + vec!["b".to_string(), "c".to_string(), "a".to_string()], + ], + variables: vec![ + Variable { + name: "v1".to_string(), + kind: VariableType::Named, + rule: Rule::Seq(vec![ + Rule::prec_left(Precedence::Name("b".to_string()), Rule::string("w")), + Rule::prec(Precedence::Name("c".to_string()), Rule::string("x")), + ]), + }, + Variable { + name: "v2".to_string(), + kind: VariableType::Named, + rule: Rule::repeat(Rule::Choice(vec![ + Rule::prec_left(Precedence::Name("a".to_string()), Rule::string("y")), + Rule::prec(Precedence::Name("c".to_string()), Rule::string("z")), + ])), + }, + ], + }; + + let result = validate_named_precedences(&grammar); + assert_eq!( + result.unwrap_err().message(), + "Conflicting orderings for precedences 'a' and 'b'", + ); + } +} diff --git a/cli/src/generate/prepare_grammar/process_inlines.rs b/cli/src/generate/prepare_grammar/process_inlines.rs index f83658b2..510f6b7c 100644 --- a/cli/src/generate/prepare_grammar/process_inlines.rs +++ b/cli/src/generate/prepare_grammar/process_inlines.rs @@ -120,7 +120,7 @@ impl InlinedProductionMapBuilder { } } if let Some(last_inserted_step) = inserted_steps.last_mut() { - if last_inserted_step.precedence == 0 { + if last_inserted_step.precedence.is_none() { last_inserted_step.precedence = removed_step.precedence; } if last_inserted_step.associativity == None { @@ -193,16 +193,17 @@ pub(super) fn process_inlines(grammar: &SyntaxGrammar) -> InlinedProductionMap { mod tests { use super::*; use crate::generate::grammars::{ProductionStep, SyntaxVariable, VariableType}; - use crate::generate::rules::{Associativity, Symbol}; + use crate::generate::rules::{Associativity, Precedence, Symbol}; #[test] fn test_basic_inlining() { let grammar = SyntaxGrammar { - expected_conflicts: Vec::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - supertype_symbols: Vec::new(), word_token: None, + extra_symbols: vec![], + external_tokens: vec![], + supertype_symbols: vec![], + expected_conflicts: vec![], + precedence_orderings: vec![], variables_to_inline: vec![Symbol::non_terminal(1)], variables: vec![ SyntaxVariable { @@ -329,10 +330,11 @@ mod tests { Symbol::non_terminal(2), Symbol::non_terminal(3), ], - expected_conflicts: Vec::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - supertype_symbols: Vec::new(), + extra_symbols: vec![], + external_tokens: vec![], + supertype_symbols: vec![], + expected_conflicts: vec![], + precedence_orderings: vec![], word_token: None, }; let inline_map = process_inlines(&grammar); @@ -401,7 +403,7 @@ mod tests { steps: vec![ // inlined ProductionStep::new(Symbol::non_terminal(1)) - .with_prec(1, Some(Associativity::Left)), + .with_prec(Precedence::Integer(1), Some(Associativity::Left)), ProductionStep::new(Symbol::terminal(10)), // inlined ProductionStep::new(Symbol::non_terminal(2)) @@ -416,7 +418,7 @@ mod tests { dynamic_precedence: 0, steps: vec![ ProductionStep::new(Symbol::terminal(11)) - .with_prec(2, None) + .with_prec(Precedence::Integer(2), None) .with_alias("inner_alias", true), ProductionStep::new(Symbol::terminal(12)), ], @@ -431,10 +433,11 @@ mod tests { }], }, ], - expected_conflicts: Vec::new(), - extra_symbols: Vec::new(), - external_tokens: Vec::new(), - supertype_symbols: Vec::new(), + extra_symbols: vec![], + external_tokens: vec![], + supertype_symbols: vec![], + expected_conflicts: vec![], + precedence_orderings: vec![], word_token: None, }; @@ -453,12 +456,12 @@ mod tests { // The first step in the inlined production retains its precedence // and alias. ProductionStep::new(Symbol::terminal(11)) - .with_prec(2, None) + .with_prec(Precedence::Integer(2), None) .with_alias("inner_alias", true), // The final step of the inlined production inherits the precedence of // the inlined step. ProductionStep::new(Symbol::terminal(12)) - .with_prec(1, Some(Associativity::Left)), + .with_prec(Precedence::Integer(1), Some(Associativity::Left)), ProductionStep::new(Symbol::terminal(10)), ProductionStep::new(Symbol::non_terminal(2)).with_alias("outer_alias", true), ] @@ -475,10 +478,10 @@ mod tests { dynamic_precedence: 0, steps: vec![ ProductionStep::new(Symbol::terminal(11)) - .with_prec(2, None) + .with_prec(Precedence::Integer(2), None) .with_alias("inner_alias", true), ProductionStep::new(Symbol::terminal(12)) - .with_prec(1, Some(Associativity::Left)), + .with_prec(Precedence::Integer(1), Some(Associativity::Left)), ProductionStep::new(Symbol::terminal(10)), // All steps of the inlined production inherit their alias from the // inlined step. diff --git a/cli/src/generate/rules.rs b/cli/src/generate/rules.rs index e99f0197..6f410207 100644 --- a/cli/src/generate/rules.rs +++ b/cli/src/generate/rules.rs @@ -1,7 +1,7 @@ use super::grammars::VariableType; use smallbitvec::SmallBitVec; -use std::collections::HashMap; use std::iter::FromIterator; +use std::{collections::HashMap, fmt}; #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub(crate) enum SymbolType { @@ -24,11 +24,18 @@ pub(crate) struct Alias { pub is_named: bool, } +#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum Precedence { + None, + Integer(i32), + Name(String), +} + pub(crate) type AliasMap = HashMap; #[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] pub(crate) struct MetadataParams { - pub precedence: Option, + pub precedence: Precedence, pub dynamic_precedence: i32, pub associativity: Option, pub is_token: bool, @@ -99,23 +106,23 @@ impl Rule { }) } - pub fn prec(value: i32, content: Rule) -> Self { + pub fn prec(value: Precedence, content: Rule) -> Self { add_metadata(content, |params| { - params.precedence = Some(value); + params.precedence = value; }) } - pub fn prec_left(value: i32, content: Rule) -> Self { + pub fn prec_left(value: Precedence, content: Rule) -> Self { add_metadata(content, |params| { params.associativity = Some(Associativity::Left); - params.precedence = Some(value); + params.precedence = value; }) } - pub fn prec_right(value: i32, content: Rule) -> Self { + pub fn prec_right(value: Precedence, content: Rule) -> Self { add_metadata(content, |params| { params.associativity = Some(Associativity::Right); - params.precedence = Some(value); + params.precedence = value; }) } @@ -152,6 +159,12 @@ impl Alias { } } +impl Precedence { + pub fn is_none(&self) -> bool { + matches!(self, Precedence::None) + } +} + #[cfg(test)] impl Rule { pub fn terminal(index: usize) -> Self { @@ -437,3 +450,19 @@ fn choice_helper(result: &mut Vec, rule: Rule) { } } } + +impl fmt::Display for Precedence { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Precedence::Integer(i) => write!(f, "{}", i), + Precedence::Name(s) => write!(f, "'{}'", s), + Precedence::None => write!(f, "none"), + } + } +} + +impl Default for Precedence { + fn default() -> Self { + Precedence::None + } +} diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index e74ec977..ccbf8895 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -1,5 +1,5 @@ use super::nfa::CharacterSet; -use super::rules::{Alias, Associativity, Symbol, TokenSet}; +use super::rules::{Alias, Symbol, TokenSet}; use std::collections::{BTreeMap, HashMap}; pub(crate) type ProductionInfoId = usize; pub(crate) type ParseStateId = usize; @@ -17,9 +17,7 @@ pub(crate) enum ParseAction { Reduce { symbol: Symbol, child_count: usize, - precedence: i32, dynamic_precedence: i32, - associativity: Option, production_id: ProductionInfoId, }, } @@ -163,13 +161,3 @@ impl ParseState { } } } - -impl ParseAction { - pub fn precedence(&self) -> i32 { - if let ParseAction::Reduce { precedence, .. } = self { - *precedence - } else { - 0 - } - } -} diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index 61206a21..cd6ad061 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -247,6 +247,16 @@ fn test_feature_corpus_files() { failure_count += 1; } } else { + if let Err(e) = &generate_result { + eprintln!( + "Unexpected error for test grammar '{}':\n{}", + language_name, + e.message() + ); + failure_count += 1; + continue; + } + let corpus_path = test_path.join("corpus.txt"); let c_code = generate_result.unwrap().1; let language = get_test_language(language_name, &c_code, Some(&test_path)); diff --git a/test/fixtures/test_grammars/named_precedences/corpus.txt b/test/fixtures/test_grammars/named_precedences/corpus.txt new file mode 100644 index 00000000..bc64e925 --- /dev/null +++ b/test/fixtures/test_grammars/named_precedences/corpus.txt @@ -0,0 +1,12 @@ +============= +Declarations +============= + +A||B c = d; +E.F g = h; + +============= +Expressions +============= + +a || b.c; diff --git a/test/fixtures/test_grammars/named_precedences/grammar.json b/test/fixtures/test_grammars/named_precedences/grammar.json new file mode 100644 index 00000000..ad64ce2c --- /dev/null +++ b/test/fixtures/test_grammars/named_precedences/grammar.json @@ -0,0 +1,167 @@ +{ + "name": "named_precedences", + + "extras": [ + { + "type": "PATTERN", + "value": "\\s+" + } + ], + + "precedences": [ + [ + "member", + "and", + "or" + ], + [ + "type_member", + "type_intersection", + "type_union" + ] + ], + + "conflicts": [ + ["expression", "type"], + ["expression", "nested_type"] + ], + + "rules": { + "program": { + "type": "REPEAT", + "content": { + "type": "CHOICE", + "members": [ + {"type": "SYMBOL", "name": "expression_statement"}, + {"type": "SYMBOL", "name": "declaration_statement"} + ] + } + }, + + "expression_statement": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "expression"}, + {"type": "STRING", "value": ";"} + ] + }, + + "declaration_statement": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "type"}, + {"type": "SYMBOL", "name": "expression"}, + {"type": "STRING", "value": ";"} + ] + }, + + "expression": { + "type": "CHOICE", + "members": [ + {"type": "SYMBOL", "name": "member_expression"}, + {"type": "SYMBOL", "name": "binary_expression"}, + {"type": "SYMBOL", "name": "identifier"} + ] + }, + + "member_expression": { + "type": "PREC", + "value": "member", + "content": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "expression"}, + {"type": "STRING", "value": "."}, + {"type": "SYMBOL", "name": "identifier"} + ] + } + }, + + "binary_expression": { + "type": "CHOICE", + "members": [ + { + "type": "PREC_LEFT", + "value": "or", + "content": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "expression"}, + {"type": "STRING", "value": "||"}, + {"type": "SYMBOL", "name": "expression"} + ] + } + }, + { + "type": "PREC_LEFT", + "value": "and", + "content": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "expression"}, + {"type": "STRING", "value": "&&"}, + {"type": "SYMBOL", "name": "expression"} + ] + } + } + ] + }, + + "type": { + "type": "CHOICE", + "members": [ + {"type": "SYMBOL", "name": "nested_type"}, + {"type": "SYMBOL", "name": "binary_type"}, + {"type": "SYMBOL", "name": "identifier"} + ] + }, + + "nested_type": { + "type": "PREC", + "value": "type_member", + "content": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "identifier"}, + {"type": "STRING", "value": "."}, + {"type": "SYMBOL", "name": "identifier"} + ] + } + }, + + "binary_type": { + "type": "CHOICE", + "members": [ + { + "type": "PREC_LEFT", + "value": "type_union", + "content": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "type"}, + {"type": "STRING", "value": "||"}, + {"type": "SYMBOL", "name": "type"} + ] + } + }, + { + "type": "PREC_LEFT", + "value": "type_intersection", + "content": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "type"}, + {"type": "STRING", "value": "&&"}, + {"type": "SYMBOL", "name": "type"} + ] + } + } + ] + }, + + "identifier": { + "type": "PATTERN", + "value": "[a-z]\\w+" + } + } +} diff --git a/test/fixtures/test_grammars/named_precedences/readme.txt b/test/fixtures/test_grammars/named_precedences/readme.txt new file mode 100644 index 00000000..d3fdfd4e --- /dev/null +++ b/test/fixtures/test_grammars/named_precedences/readme.txt @@ -0,0 +1,3 @@ +This grammar uses named precedences, which have a partial order specified via the grammar's `precedences` field. Named +precedences allow certain conflicts to be resolved statically without accidentally resolving *other* conflicts, which +are intended to be resolved dynamically.