Allow symbols to be used in precedence lists

This commit is contained in:
Max Brunsfeld 2021-03-03 13:10:19 -08:00
parent 24d0a6a817
commit dd4cba2625
9 changed files with 200 additions and 97 deletions

View file

@ -1,6 +1,5 @@
use super::item::{ParseItem, ParseItemSet, ParseItemSetCore};
use super::item_set_builder::ParseItemSetBuilder;
use crate::error::{Error, Result};
use crate::generate::grammars::{
InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType,
};
@ -10,6 +9,10 @@ use crate::generate::tables::{
FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
ProductionInfo, ProductionInfoId,
};
use crate::{
error::{Error, Result},
generate::grammars::PrecedenceEntry,
};
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
use std::fmt::Write;
use std::u32;
@ -31,6 +34,7 @@ struct AuxiliarySymbolInfo {
#[derive(Debug, Default)]
struct ReductionInfo {
precedence: Precedence,
symbols: Vec<Symbol>,
has_left_assoc: bool,
has_right_assoc: bool,
has_non_assoc: bool,
@ -217,11 +221,12 @@ impl<'a> ParseTableBuilder<'a> {
// If the item is finished, then add a Reduce action to this state based
// on this item.
else {
let symbol = Symbol::non_terminal(item.variable_index as usize);
let action = if item.is_augmented() {
ParseAction::Accept
} else {
ParseAction::Reduce {
symbol: Symbol::non_terminal(item.variable_index as usize),
symbol,
child_count: item.step_index as usize,
dynamic_precedence: item.production.dynamic_precedence,
production_id: self.get_production_id(item),
@ -246,7 +251,9 @@ impl<'a> ParseTableBuilder<'a> {
match Self::compare_precedence(
&self.syntax_grammar,
precedence,
&[symbol],
&reduction_info.precedence,
&reduction_info.symbols,
) {
Ordering::Greater => {
table_entry.actions.clear();
@ -263,6 +270,9 @@ impl<'a> ParseTableBuilder<'a> {
}
reduction_info.precedence = precedence.clone();
if let Err(i) = reduction_info.symbols.binary_search(&symbol) {
reduction_info.symbols.insert(i, symbol);
}
match associativity {
Some(Associativity::Left) => reduction_info.has_left_assoc = true,
Some(Associativity::Right) => reduction_info.has_right_assoc = true,
@ -421,7 +431,7 @@ impl<'a> ParseTableBuilder<'a> {
// REDUCE-REDUCE conflicts where all actions have the *same*
// precedence, and there can still be SHIFT/REDUCE conflicts.
let mut considered_associativity = false;
let mut shift_precedence: Vec<&Precedence> = Vec::new();
let mut shift_precedence: Vec<(&Precedence, Symbol)> = Vec::new();
let mut conflicting_items = HashSet::new();
for (item, lookaheads) in &item_set.entries {
if let Some(step) = item.step() {
@ -435,7 +445,10 @@ impl<'a> ParseTableBuilder<'a> {
conflicting_items.insert(item);
}
let p = item.precedence();
let p = (
item.precedence(),
Symbol::non_terminal(item.variable_index as usize),
);
if let Err(i) = shift_precedence.binary_search(&p) {
shift_precedence.insert(i, p);
}
@ -469,8 +482,13 @@ impl<'a> ParseTableBuilder<'a> {
let mut shift_is_less = false;
let mut shift_is_more = false;
for p in shift_precedence {
match Self::compare_precedence(&self.syntax_grammar, p, &reduction_info.precedence)
{
match Self::compare_precedence(
&self.syntax_grammar,
p.0,
&[p.1],
&reduction_info.precedence,
&reduction_info.symbols,
) {
Ordering::Greater => shift_is_more = true,
Ordering::Less => shift_is_less = true,
Ordering::Equal => {}
@ -732,29 +750,49 @@ impl<'a> ParseTableBuilder<'a> {
fn compare_precedence(
grammar: &SyntaxGrammar,
left: &Precedence,
left_symbols: &[Symbol],
right: &Precedence,
right_symbols: &[Symbol],
) -> Ordering {
let precedence_entry_matches =
|entry: &PrecedenceEntry, precedence: &Precedence, symbols: &[Symbol]| -> bool {
match entry {
PrecedenceEntry::Name(n) => {
if let Precedence::Name(p) = precedence {
n == p
} else {
false
}
}
PrecedenceEntry::Symbol(n) => symbols
.iter()
.any(|s| &grammar.variables[s.index].name == n),
}
};
match (left, right) {
// Integer precedences can be compared to other integer precedences,
// and to the default precedence, which is zero.
(Precedence::Integer(l), Precedence::Integer(r)) => l.cmp(r),
(Precedence::Integer(l), Precedence::None) => l.cmp(&0),
(Precedence::None, Precedence::Integer(r)) => 0.cmp(&r),
(Precedence::Integer(l), Precedence::Integer(r)) if *l != 0 || *r != 0 => l.cmp(r),
(Precedence::Integer(l), Precedence::None) if *l != 0 => l.cmp(&0),
(Precedence::None, Precedence::Integer(r)) if *r != 0 => 0.cmp(&r),
// Named precedences can be compared to other named precedences.
(Precedence::Name(l), Precedence::Name(r)) => grammar
_ => grammar
.precedence_orderings
.iter()
.find_map(|list| {
let mut saw_left = false;
let mut saw_right = false;
for name in list {
if name == l {
for entry in list {
let matches_left = precedence_entry_matches(entry, left, left_symbols);
let matches_right = precedence_entry_matches(entry, right, right_symbols);
if matches_left {
saw_left = true;
if saw_right {
return Some(Ordering::Less);
}
} else if name == r {
} else if matches_right {
saw_right = true;
if saw_left {
return Some(Ordering::Greater);
@ -764,9 +802,6 @@ impl<'a> ParseTableBuilder<'a> {
None
})
.unwrap_or(Ordering::Equal),
// Other combinations of precedence types are not comparable.
_ => Ordering::Equal,
}
}

View file

@ -368,7 +368,16 @@ function grammar(baseGrammar, options) {
if (typeof options.precedences !== "function") {
throw new Error("Grammar's 'precedences' property must be a function");
}
precedences = options.precedences.call(null, baseGrammar.precedences);
precedences = options.precedences.call(ruleBuilder, ruleBuilder, baseGrammar.precedences);
if (!Array.isArray(precedences)) {
throw new Error("Grammar's precedences must be an array of arrays of rules.");
}
precedences = precedences.map(list => {
if (!Array.isArray(list)) {
throw new Error("Grammar's precedences must be an array of arrays of rules.");
}
return list.map(normalize);
});
}
if (Object.keys(rules).length == 0) {

View file

@ -1,6 +1,7 @@
use super::nfa::Nfa;
use super::rules::{Alias, Associativity, Precedence, Rule, Symbol};
use std::collections::HashMap;
use std::fmt;
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) enum VariableType {
@ -19,13 +20,19 @@ pub(crate) struct Variable {
pub rule: Rule,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) enum PrecedenceEntry {
Name(String),
Symbol(String),
}
#[derive(Debug, PartialEq, Eq)]
pub(crate) struct InputGrammar {
pub name: String,
pub variables: Vec<Variable>,
pub extra_symbols: Vec<Rule>,
pub expected_conflicts: Vec<Vec<String>>,
pub precedence_orderings: Vec<Vec<String>>,
pub precedence_orderings: Vec<Vec<PrecedenceEntry>>,
pub external_tokens: Vec<Rule>,
pub variables_to_inline: Vec<String>,
pub supertype_symbols: Vec<String>,
@ -94,7 +101,7 @@ pub(crate) struct SyntaxGrammar {
pub supertype_symbols: Vec<Symbol>,
pub variables_to_inline: Vec<Symbol>,
pub word_token: Option<Symbol>,
pub precedence_orderings: Vec<Vec<String>>,
pub precedence_orderings: Vec<Vec<PrecedenceEntry>>,
}
#[cfg(test)]
@ -249,3 +256,12 @@ impl InlinedProductionMap {
})
}
}
impl fmt::Display for PrecedenceEntry {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
PrecedenceEntry::Name(n) => write!(f, "'{}'", n),
PrecedenceEntry::Symbol(s) => write!(f, "$.{}", s),
}
}
}

View file

@ -1,6 +1,6 @@
use super::grammars::{InputGrammar, Variable, VariableType};
use super::grammars::{InputGrammar, PrecedenceEntry, Variable, VariableType};
use super::rules::{Precedence, Rule};
use crate::error::Result;
use crate::error::{Error, Result};
use serde_derive::Deserialize;
use serde_json::{Map, Value};
@ -44,15 +44,15 @@ enum RuleJSON {
content: Box<RuleJSON>,
},
PREC_LEFT {
value: PrecedenceJSON,
value: PrecedenceValueJSON,
content: Box<RuleJSON>,
},
PREC_RIGHT {
value: PrecedenceJSON,
value: PrecedenceValueJSON,
content: Box<RuleJSON>,
},
PREC {
value: PrecedenceJSON,
value: PrecedenceValueJSON,
content: Box<RuleJSON>,
},
TOKEN {
@ -65,7 +65,7 @@ enum RuleJSON {
#[derive(Deserialize)]
#[serde(untagged)]
enum PrecedenceJSON {
enum PrecedenceValueJSON {
Integer(i32),
Name(String),
}
@ -75,7 +75,7 @@ pub(crate) struct GrammarJSON {
pub(crate) name: String,
rules: Map<String, Value>,
#[serde(default)]
precedences: Vec<Vec<String>>,
precedences: Vec<Vec<RuleJSON>>,
#[serde(default)]
conflicts: Vec<Vec<String>>,
#[serde(default)]
@ -101,6 +101,24 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
})
}
let mut precedence_orderings = Vec::with_capacity(grammar_json.precedences.len());
for list in grammar_json.precedences {
let mut ordering = Vec::with_capacity(list.len());
for entry in list {
ordering.push(match entry {
RuleJSON::STRING { value } => PrecedenceEntry::Name(value),
RuleJSON::SYMBOL { name } => PrecedenceEntry::Symbol(name),
_ => {
return Err(Error::new(
"Invalid rule in precedences array. Only strings and symbols are allowed"
.to_string(),
))
}
})
}
precedence_orderings.push(ordering);
}
let extra_symbols = grammar_json.extras.into_iter().map(parse_rule).collect();
let external_tokens = grammar_json.externals.into_iter().map(parse_rule).collect();
@ -110,7 +128,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
expected_conflicts: grammar_json.conflicts,
supertype_symbols: grammar_json.supertypes,
variables_to_inline: grammar_json.inline,
precedence_orderings: grammar_json.precedences,
precedence_orderings,
variables,
extra_symbols,
external_tokens,
@ -150,11 +168,11 @@ fn parse_rule(json: RuleJSON) -> Rule {
}
}
impl Into<Precedence> for PrecedenceJSON {
impl Into<Precedence> for PrecedenceValueJSON {
fn into(self) -> Precedence {
match self {
PrecedenceJSON::Integer(i) => Precedence::Integer(i),
PrecedenceJSON::Name(i) => Precedence::Name(i),
PrecedenceValueJSON::Integer(i) => Precedence::Integer(i),
PrecedenceValueJSON::Name(i) => Precedence::Name(i),
}
}
}

View file

@ -6,31 +6,31 @@ mod flatten_grammar;
mod intern_symbols;
mod process_inlines;
use super::{rules::Precedence, Error};
pub(crate) use self::expand_tokens::expand_tokens;
use self::expand_repeats::expand_repeats;
use self::extract_default_aliases::extract_default_aliases;
use self::extract_tokens::extract_tokens;
use self::flatten_grammar::flatten_grammar;
use self::intern_symbols::intern_symbols;
use self::process_inlines::process_inlines;
use super::grammars::{
ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, PrecedenceEntry,
SyntaxGrammar, Variable,
};
use super::rules::{AliasMap, Precedence, Rule, Symbol};
use super::{Error, Result};
use std::{
cmp::Ordering,
collections::{hash_map, HashMap, HashSet},
mem,
};
use self::expand_repeats::expand_repeats;
pub(crate) use self::expand_tokens::expand_tokens;
use self::extract_default_aliases::extract_default_aliases;
use self::extract_tokens::extract_tokens;
use self::flatten_grammar::flatten_grammar;
use self::intern_symbols::intern_symbols;
use self::process_inlines::process_inlines;
use crate::error::Result;
use crate::generate::grammars::{
ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, SyntaxGrammar, Variable,
};
use crate::generate::rules::{AliasMap, Rule, Symbol};
pub(crate) struct IntermediateGrammar<T, U> {
variables: Vec<Variable>,
extra_symbols: Vec<T>,
expected_conflicts: Vec<Vec<Symbol>>,
precedence_orderings: Vec<Vec<String>>,
precedence_orderings: Vec<Vec<PrecedenceEntry>>,
external_tokens: Vec<U>,
variables_to_inline: Vec<Symbol>,
supertype_symbols: Vec<Symbol>,
@ -57,7 +57,7 @@ pub(crate) fn prepare_grammar(
InlinedProductionMap,
AliasMap,
)> {
validate_named_precedences(input_grammar)?;
validate_precedences(input_grammar)?;
let interned_grammar = intern_symbols(input_grammar)?;
let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
@ -72,30 +72,30 @@ pub(crate) fn prepare_grammar(
/// Check that all of the named precedences used in the grammar are declared
/// within the `precedences` lists, and also that there are no conflicting
/// precedence orderings declared in those lists.
fn validate_named_precedences(grammar: &InputGrammar) -> Result<()> {
fn validate_precedences(grammar: &InputGrammar) -> Result<()> {
// For any two precedence names `a` and `b`, if `a` comes before `b`
// in some list, then it cannot come *after* `b` in any list.
let mut pairs = HashMap::new();
for list in &grammar.precedence_orderings {
for (i, mut name1) in list.iter().enumerate() {
for mut name2 in list.iter().skip(i + 1) {
if name2 == name1 {
for (i, mut entry1) in list.iter().enumerate() {
for mut entry2 in list.iter().skip(i + 1) {
if entry2 == entry1 {
continue;
}
let mut ordering = Ordering::Greater;
if name1 > name2 {
if entry1 > entry2 {
ordering = Ordering::Less;
mem::swap(&mut name1, &mut name2);
mem::swap(&mut entry1, &mut entry2);
}
match pairs.entry((name1, name2)) {
match pairs.entry((entry1, entry2)) {
hash_map::Entry::Vacant(e) => {
e.insert(ordering);
}
hash_map::Entry::Occupied(e) => {
if e.get() != &ordering {
return Err(Error::new(format!(
"Conflicting orderings for precedences '{}' and '{}'",
name1, name2
"Conflicting orderings for precedences {} and {}",
entry1, entry2
)));
}
}
@ -133,6 +133,13 @@ fn validate_named_precedences(grammar: &InputGrammar) -> Result<()> {
.precedence_orderings
.iter()
.flat_map(|l| l.iter())
.filter_map(|p| {
if let PrecedenceEntry::Name(n) = p {
Some(n)
} else {
None
}
})
.collect::<HashSet<&String>>();
for variable in &grammar.variables {
validate(&variable.name, &variable.rule, &precedence_names)?;
@ -147,7 +154,7 @@ mod tests {
use crate::generate::grammars::{InputGrammar, Variable, VariableType};
#[test]
fn test_validate_named_precedences_with_undeclared_precedence() {
fn test_validate_precedences_with_undeclared_precedence() {
let grammar = InputGrammar {
name: String::new(),
word_token: None,
@ -157,8 +164,15 @@ mod tests {
expected_conflicts: vec![],
variables_to_inline: vec![],
precedence_orderings: vec![
vec!["a".to_string(), "b".to_string()],
vec!["b".to_string(), "c".to_string(), "d".to_string()],
vec![
PrecedenceEntry::Name("a".to_string()),
PrecedenceEntry::Name("b".to_string()),
],
vec![
PrecedenceEntry::Name("b".to_string()),
PrecedenceEntry::Name("c".to_string()),
PrecedenceEntry::Name("d".to_string()),
],
],
variables: vec![
Variable {
@ -180,7 +194,7 @@ mod tests {
],
};
let result = validate_named_precedences(&grammar);
let result = validate_precedences(&grammar);
assert_eq!(
result.unwrap_err().message(),
"Undeclared precedence 'omg' in rule 'v2'",
@ -188,7 +202,7 @@ mod tests {
}
#[test]
fn test_validate_named_precedences_with_conflicting_order() {
fn test_validate_precedences_with_conflicting_order() {
let grammar = InputGrammar {
name: String::new(),
word_token: None,
@ -198,8 +212,15 @@ mod tests {
expected_conflicts: vec![],
variables_to_inline: vec![],
precedence_orderings: vec![
vec!["a".to_string(), "b".to_string()],
vec!["b".to_string(), "c".to_string(), "a".to_string()],
vec![
PrecedenceEntry::Name("a".to_string()),
PrecedenceEntry::Name("b".to_string()),
],
vec![
PrecedenceEntry::Name("b".to_string()),
PrecedenceEntry::Name("c".to_string()),
PrecedenceEntry::Name("a".to_string()),
],
],
variables: vec![
Variable {
@ -221,7 +242,7 @@ mod tests {
],
};
let result = validate_named_precedences(&grammar);
let result = validate_precedences(&grammar);
assert_eq!(
result.unwrap_err().message(),
"Conflicting orderings for precedences 'a' and 'b'",

View file

@ -339,23 +339,35 @@ impl TokenSet {
vec.set(other.index, true);
}
pub fn remove(&mut self, other: &Symbol) {
pub fn remove(&mut self, other: &Symbol) -> bool {
let vec = match other.kind {
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
SymbolType::Terminal => &mut self.terminal_bits,
SymbolType::External => &mut self.external_bits,
SymbolType::End => {
self.eof = false;
return;
return if self.eof {
self.eof = false;
true
} else {
false
}
}
SymbolType::EndOfNonTerminalExtra => {
self.end_of_nonterminal_extra = false;
return;
return if self.end_of_nonterminal_extra {
self.end_of_nonterminal_extra = false;
true
} else {
false
};
}
};
if other.index < vec.len() {
vec.set(other.index, false);
if vec[other.index] {
vec.set(other.index, false);
return true;
}
}
false
}
pub fn is_empty(&self) -> bool {

View file

@ -28,7 +28,7 @@ fetch_grammar embedded-template master
fetch_grammar go master
fetch_grammar html master
fetch_grammar java master
fetch_grammar javascript master
fetch_grammar javascript partial-order-precedences
fetch_grammar jsdoc master
fetch_grammar json master
fetch_grammar php master

View file

@ -7,7 +7,7 @@ call:fetch_grammar embedded-template master
call:fetch_grammar go master
call:fetch_grammar html master
call:fetch_grammar java master
call:fetch_grammar javascript master
call:fetch_grammar javascript partial-order-precedences
call:fetch_grammar jsdoc master
call:fetch_grammar json master
call:fetch_grammar php master

View file

@ -10,14 +10,14 @@
"precedences": [
[
"member",
"and",
"or"
{"type": "SYMBOL", "name": "member_expression"},
{"type": "STRING", "value": "and"},
{"type": "STRING", "value": "or"}
],
[
"type_member",
"type_intersection",
"type_union"
{"type": "SYMBOL", "name": "nested_type"},
{"type": "STRING", "value": "type_intersection"},
{"type": "STRING", "value": "type_union"}
]
],
@ -65,16 +65,12 @@
},
"member_expression": {
"type": "PREC",
"value": "member",
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "."},
{"type": "SYMBOL", "name": "identifier"}
]
}
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "."},
{"type": "SYMBOL", "name": "identifier"}
]
},
"binary_expression": {
@ -117,16 +113,12 @@
},
"nested_type": {
"type": "PREC",
"value": "type_member",
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "STRING", "value": "."},
{"type": "SYMBOL", "name": "identifier"}
]
}
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "STRING", "value": "."},
{"type": "SYMBOL", "name": "identifier"}
]
},
"binary_type": {