Implement named precedence comparison

This commit is contained in:
Max Brunsfeld 2021-02-24 16:02:56 -08:00
parent d40f118370
commit 344797c110
17 changed files with 387 additions and 98 deletions

View file

@ -730,14 +730,44 @@ impl<'a> ParseTableBuilder<'a> {
}
fn compare_precedence(
_grammar: &SyntaxGrammar,
grammar: &SyntaxGrammar,
left: &Precedence,
right: &Precedence,
) -> Ordering {
// TODO - compare named precedence
let left_integer = left.as_integer();
let right_integer = right.as_integer();
left_integer.cmp(&right_integer)
match (left, right) {
// Integer precedences can be compared to other integer precedences,
// and to the default precedence, which is zero.
(Precedence::Integer(l), Precedence::Integer(r)) => l.cmp(r),
(Precedence::Integer(l), Precedence::None) => l.cmp(&0),
(Precedence::None, Precedence::Integer(r)) => 0.cmp(&r),
// Named precedences can be compared to other named precedences.
(Precedence::Name(l), Precedence::Name(r)) => grammar
.precedence_orderings
.iter()
.find_map(|list| {
let mut saw_left = false;
let mut saw_right = false;
for name in list {
if name == l {
saw_left = true;
if saw_right {
return Some(Ordering::Less);
}
} else if name == r {
saw_right = true;
if saw_left {
return Some(Ordering::Greater);
}
}
}
None
})
.unwrap_or(Ordering::Equal),
// Other combinations of precedence types are not comparable.
_ => Ordering::Equal,
}
}
fn get_auxiliary_node_info(

View file

@ -225,7 +225,8 @@ function grammar(baseGrammar, options) {
conflicts: [],
externals: [],
inline: [],
supertypes: []
supertypes: [],
precedences: [],
};
}
@ -362,11 +363,19 @@ function grammar(baseGrammar, options) {
supertypes = supertypeRules.map(symbol => symbol.name);
}
let precedences = baseGrammar.precedences;
if (options.precedences) {
if (typeof options.precedences !== "function") {
throw new Error("Grammar's 'precedences' property must be a function");
}
precedences = options.precedences.call(null, baseGrammar.precedences);
}
if (Object.keys(rules).length == 0) {
throw new Error("Grammar must have at least one rule.");
}
return {name, word, rules, extras, conflicts, externals, inline, supertypes};
return {name, word, rules, extras, conflicts, precedences, externals, inline, supertypes};
}
function checkArguments(ruleCount, caller, callerName, suffix = '') {

View file

@ -25,6 +25,7 @@ pub(crate) struct InputGrammar {
pub variables: Vec<Variable>,
pub extra_symbols: Vec<Rule>,
pub expected_conflicts: Vec<Vec<String>>,
pub precedence_orderings: Vec<Vec<String>>,
pub external_tokens: Vec<Rule>,
pub variables_to_inline: Vec<String>,
pub supertype_symbols: Vec<String>,
@ -93,6 +94,7 @@ pub(crate) struct SyntaxGrammar {
pub supertype_symbols: Vec<Symbol>,
pub variables_to_inline: Vec<Symbol>,
pub word_token: Option<Symbol>,
pub precedence_orderings: Vec<Vec<String>>,
}
#[cfg(test)]

View file

@ -713,12 +713,13 @@ mod tests {
fn test_node_types_simple() {
let node_types = get_node_types(InputGrammar {
name: String::new(),
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
word_token: None,
extra_symbols: vec![],
external_tokens: vec![],
supertype_symbols: vec![],
expected_conflicts: vec![],
variables_to_inline: vec![],
precedence_orderings: vec![],
variables: vec![
Variable {
name: "v1".to_string(),
@ -809,11 +810,12 @@ mod tests {
let node_types = get_node_types(InputGrammar {
name: String::new(),
extra_symbols: vec![Rule::named("v3")],
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
word_token: None,
external_tokens: vec![],
supertype_symbols: vec![],
expected_conflicts: vec![],
variables_to_inline: vec![],
precedence_orderings: vec![],
variables: vec![
Variable {
name: "v1".to_string(),
@ -914,11 +916,12 @@ mod tests {
fn test_node_types_with_supertypes() {
let node_types = get_node_types(InputGrammar {
name: String::new(),
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
word_token: None,
extra_symbols: vec![],
external_tokens: vec![],
expected_conflicts: vec![],
variables_to_inline: vec![],
precedence_orderings: vec![],
supertype_symbols: vec!["_v2".to_string()],
variables: vec![
Variable {
@ -1001,12 +1004,13 @@ mod tests {
fn test_node_types_for_children_without_fields() {
let node_types = get_node_types(InputGrammar {
name: String::new(),
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
word_token: None,
extra_symbols: vec![],
external_tokens: vec![],
supertype_symbols: vec![],
expected_conflicts: vec![],
variables_to_inline: vec![],
precedence_orderings: vec![],
variables: vec![
Variable {
name: "v1".to_string(),
@ -1100,11 +1104,12 @@ mod tests {
let node_types = get_node_types(InputGrammar {
name: String::new(),
word_token: None,
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: vec!["v2".to_string()],
extra_symbols: vec![],
external_tokens: vec![],
supertype_symbols: vec![],
expected_conflicts: vec![],
precedence_orderings: vec![],
variables_to_inline: vec!["v2".to_string()],
variables: vec![
Variable {
name: "v1".to_string(),
@ -1154,12 +1159,13 @@ mod tests {
fn test_node_types_for_aliased_nodes() {
let node_types = get_node_types(InputGrammar {
name: String::new(),
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
word_token: None,
extra_symbols: vec![],
external_tokens: vec![],
supertype_symbols: vec![],
expected_conflicts: vec![],
variables_to_inline: vec![],
precedence_orderings: vec![],
variables: vec![
Variable {
name: "thing".to_string(),
@ -1230,12 +1236,13 @@ mod tests {
fn test_node_types_with_multiple_valued_fields() {
let node_types = get_node_types(InputGrammar {
name: String::new(),
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
word_token: None,
extra_symbols: vec![],
external_tokens: vec![],
supertype_symbols: vec![],
expected_conflicts: vec![],
variables_to_inline: vec![],
precedence_orderings: vec![],
variables: vec![
Variable {
name: "a".to_string(),
@ -1298,12 +1305,13 @@ mod tests {
fn test_node_types_with_fields_on_hidden_tokens() {
let node_types = get_node_types(InputGrammar {
name: String::new(),
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
word_token: None,
extra_symbols: vec![],
external_tokens: vec![],
supertype_symbols: vec![],
expected_conflicts: vec![],
variables_to_inline: vec![],
precedence_orderings: vec![],
variables: vec![Variable {
name: "script".to_string(),
kind: VariableType::Named,
@ -1330,12 +1338,13 @@ mod tests {
fn test_node_types_with_multiple_rules_same_alias_name() {
let node_types = get_node_types(InputGrammar {
name: String::new(),
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
word_token: None,
extra_symbols: vec![],
external_tokens: vec![],
supertype_symbols: vec![],
expected_conflicts: vec![],
variables_to_inline: vec![],
precedence_orderings: vec![],
variables: vec![
Variable {
name: "script".to_string(),
@ -1456,12 +1465,13 @@ mod tests {
fn test_node_types_with_tokens_aliased_to_match_rules() {
let node_types = get_node_types(InputGrammar {
name: String::new(),
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
word_token: None,
extra_symbols: vec![],
external_tokens: vec![],
supertype_symbols: vec![],
expected_conflicts: vec![],
variables_to_inline: vec![],
precedence_orderings: vec![],
variables: vec![
Variable {
name: "a".to_string(),

View file

@ -74,11 +74,18 @@ enum PrecedenceJSON {
pub(crate) struct GrammarJSON {
pub(crate) name: String,
rules: Map<String, Value>,
conflicts: Option<Vec<Vec<String>>>,
externals: Option<Vec<RuleJSON>>,
extras: Option<Vec<RuleJSON>>,
inline: Option<Vec<String>>,
supertypes: Option<Vec<String>>,
#[serde(default)]
precedences: Vec<Vec<String>>,
#[serde(default)]
conflicts: Vec<Vec<String>>,
#[serde(default)]
externals: Vec<RuleJSON>,
#[serde(default)]
extras: Vec<RuleJSON>,
#[serde(default)]
inline: Vec<String>,
#[serde(default)]
supertypes: Vec<String>,
word: Option<String>,
}
@ -94,31 +101,19 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
})
}
let extra_symbols = grammar_json
.extras
.unwrap_or(Vec::new())
.into_iter()
.map(parse_rule)
.collect();
let external_tokens = grammar_json
.externals
.unwrap_or(Vec::new())
.into_iter()
.map(parse_rule)
.collect();
let expected_conflicts = grammar_json.conflicts.unwrap_or(Vec::new());
let variables_to_inline = grammar_json.inline.unwrap_or(Vec::new());
let supertype_symbols = grammar_json.supertypes.unwrap_or(Vec::new());
let extra_symbols = grammar_json.extras.into_iter().map(parse_rule).collect();
let external_tokens = grammar_json.externals.into_iter().map(parse_rule).collect();
Ok(InputGrammar {
name: grammar_json.name,
word_token: grammar_json.word,
expected_conflicts: grammar_json.conflicts,
supertype_symbols: grammar_json.supertypes,
variables_to_inline: grammar_json.inline,
precedence_orderings: grammar_json.precedences,
variables,
extra_symbols,
expected_conflicts,
external_tokens,
supertype_symbols,
variables_to_inline,
})
}

View file

@ -285,9 +285,10 @@ mod tests {
variables,
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
supertype_symbols: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
supertype_symbols: Vec::new(),
precedence_orderings: Vec::new(),
word_token: None,
}
}

View file

@ -197,11 +197,12 @@ mod tests {
}],
},
],
extra_symbols: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
supertype_symbols: Vec::new(),
external_tokens: Vec::new(),
extra_symbols: vec![],
external_tokens: vec![],
supertype_symbols: vec![],
expected_conflicts: vec![],
variables_to_inline: vec![],
precedence_orderings: vec![],
word_token: None,
};

View file

@ -155,6 +155,7 @@ pub(super) fn extract_tokens(
supertype_symbols,
external_tokens,
word_token,
precedence_orderings: grammar.precedence_orderings,
},
ExtractedLexicalGrammar {
variables: lexical_variables,
@ -494,9 +495,10 @@ mod test {
variables,
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
supertype_symbols: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
supertype_symbols: Vec::new(),
precedence_orderings: Vec::new(),
word_token: None,
}
}

View file

@ -208,6 +208,7 @@ unless they are used only as the grammar's start rule.
extra_symbols: grammar.extra_symbols,
expected_conflicts: grammar.expected_conflicts,
variables_to_inline: grammar.variables_to_inline,
precedence_orderings: grammar.precedence_orderings,
external_tokens: grammar.external_tokens,
supertype_symbols: grammar.supertype_symbols,
word_token: grammar.word_token,

View file

@ -87,6 +87,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
variables_to_inline,
supertype_symbols,
word_token,
precedence_orderings: grammar.precedence_orderings.clone(),
})
}
@ -244,9 +245,10 @@ mod tests {
name: "the_language".to_string(),
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
supertype_symbols: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
supertype_symbols: Vec::new(),
precedence_orderings: Vec::new(),
word_token: None,
}
}

View file

@ -6,6 +6,13 @@ mod flatten_grammar;
mod intern_symbols;
mod process_inlines;
use super::Error;
use std::{
cmp::Ordering,
collections::{hash_map, HashMap},
mem,
};
use self::expand_repeats::expand_repeats;
pub(crate) use self::expand_tokens::expand_tokens;
use self::extract_default_aliases::extract_default_aliases;
@ -23,6 +30,7 @@ pub(crate) struct IntermediateGrammar<T, U> {
variables: Vec<Variable>,
extra_symbols: Vec<T>,
expected_conflicts: Vec<Vec<Symbol>>,
precedence_orderings: Vec<Vec<String>>,
external_tokens: Vec<U>,
variables_to_inline: Vec<Symbol>,
supertype_symbols: Vec<Symbol>,
@ -39,6 +47,8 @@ pub(crate) struct ExtractedLexicalGrammar {
pub separators: Vec<Rule>,
}
/// Transform an input grammar into separate components that are ready
/// for parse table construction.
pub(crate) fn prepare_grammar(
input_grammar: &InputGrammar,
) -> Result<(
@ -47,6 +57,8 @@ pub(crate) fn prepare_grammar(
InlinedProductionMap,
AliasMap,
)> {
validate_precedence_orderings(&input_grammar.precedence_orderings)?;
let interned_grammar = intern_symbols(input_grammar)?;
let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?;
let syntax_grammar = expand_repeats(syntax_grammar);
@ -56,3 +68,38 @@ pub(crate) fn prepare_grammar(
let inlines = process_inlines(&syntax_grammar);
Ok((syntax_grammar, lexical_grammar, inlines, default_aliases))
}
/// Make sure that there are no conflicting orderings. For any two precedence
/// names `a` and `b`, if `a` comes before `b` in some list, then it cannot come
// *after* `b` in any list.
fn validate_precedence_orderings(order_lists: &[Vec<String>]) -> Result<()> {
let mut pairs = HashMap::new();
for list in order_lists {
for (i, mut name1) in list.iter().enumerate() {
for mut name2 in list.iter().skip(i + 1) {
if name2 == name1 {
continue;
}
let mut ordering = Ordering::Greater;
if name1 > name2 {
ordering = Ordering::Less;
mem::swap(&mut name1, &mut name2);
}
match pairs.entry((name1, name2)) {
hash_map::Entry::Vacant(e) => {
e.insert(ordering);
}
hash_map::Entry::Occupied(e) => {
if e.get() != &ordering {
return Err(Error::new(format!(
"Conflicting orderings for precedences '{}' and '{}'",
name1, name2
)));
}
}
}
}
}
}
Ok(())
}

View file

@ -198,11 +198,12 @@ mod tests {
#[test]
fn test_basic_inlining() {
let grammar = SyntaxGrammar {
expected_conflicts: Vec::new(),
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
supertype_symbols: Vec::new(),
word_token: None,
extra_symbols: vec![],
external_tokens: vec![],
supertype_symbols: vec![],
expected_conflicts: vec![],
precedence_orderings: vec![],
variables_to_inline: vec![Symbol::non_terminal(1)],
variables: vec![
SyntaxVariable {
@ -329,10 +330,11 @@ mod tests {
Symbol::non_terminal(2),
Symbol::non_terminal(3),
],
expected_conflicts: Vec::new(),
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
supertype_symbols: Vec::new(),
extra_symbols: vec![],
external_tokens: vec![],
supertype_symbols: vec![],
expected_conflicts: vec![],
precedence_orderings: vec![],
word_token: None,
};
let inline_map = process_inlines(&grammar);
@ -431,10 +433,11 @@ mod tests {
}],
},
],
expected_conflicts: Vec::new(),
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
supertype_symbols: Vec::new(),
extra_symbols: vec![],
external_tokens: vec![],
supertype_symbols: vec![],
expected_conflicts: vec![],
precedence_orderings: vec![],
word_token: None,
};

View file

@ -160,14 +160,6 @@ impl Alias {
}
impl Precedence {
pub fn as_integer(&self) -> i32 {
if let Precedence::Integer(i) = self {
*i
} else {
0
}
}
pub fn is_none(&self) -> bool {
matches!(self, Precedence::None)
}

View file

@ -248,6 +248,16 @@ fn test_feature_corpus_files() {
failure_count += 1;
}
} else {
if let Err(e) = &generate_result {
eprintln!(
"Unexpected error for test grammar '{}':\n{}",
language_name,
e.message()
);
failure_count += 1;
continue;
}
let corpus_path = test_path.join("corpus.txt");
let c_code = generate_result.unwrap().1;
let language = get_test_language(language_name, &c_code, Some(&test_path));
@ -390,7 +400,9 @@ fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String, bool)> {
}
result.push((name, input, output, has_fields));
}
TestEntry::Group { mut name, children, .. } => {
TestEntry::Group {
mut name, children, ..
} => {
if !prefix.is_empty() {
name.insert_str(0, " - ");
name.insert_str(0, prefix);

View file

@ -0,0 +1,12 @@
=============
Declarations
=============
A||B c = d;
E.F g = h;
=============
Expressions
=============
a || b.c;

View file

@ -0,0 +1,167 @@
{
"name": "named_precedences",
"extras": [
{
"type": "PATTERN",
"value": "\\s+"
}
],
"precedences": [
[
"member",
"and",
"or"
],
[
"type_member",
"type_intersection",
"type_union"
]
],
"conflicts": [
["expression", "type"],
["expression", "nested_type"]
],
"rules": {
"program": {
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "expression_statement"},
{"type": "SYMBOL", "name": "declaration_statement"}
]
}
},
"expression_statement": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": ";"}
]
},
"declaration_statement": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "type"},
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": ";"}
]
},
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "member_expression"},
{"type": "SYMBOL", "name": "binary_expression"},
{"type": "SYMBOL", "name": "identifier"}
]
},
"member_expression": {
"type": "PREC",
"value": "member",
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "."},
{"type": "SYMBOL", "name": "identifier"}
]
}
},
"binary_expression": {
"type": "CHOICE",
"members": [
{
"type": "PREC_LEFT",
"value": "or",
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "||"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
{
"type": "PREC_LEFT",
"value": "and",
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "&&"},
{"type": "SYMBOL", "name": "expression"}
]
}
}
]
},
"type": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "nested_type"},
{"type": "SYMBOL", "name": "binary_type"},
{"type": "SYMBOL", "name": "identifier"}
]
},
"nested_type": {
"type": "PREC",
"value": "type_member",
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "STRING", "value": "."},
{"type": "SYMBOL", "name": "identifier"}
]
}
},
"binary_type": {
"type": "CHOICE",
"members": [
{
"type": "PREC_LEFT",
"value": "type_union",
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "type"},
{"type": "STRING", "value": "||"},
{"type": "SYMBOL", "name": "type"}
]
}
},
{
"type": "PREC_LEFT",
"value": "type_intersection",
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "type"},
{"type": "STRING", "value": "&&"},
{"type": "SYMBOL", "name": "type"}
]
}
}
]
},
"identifier": {
"type": "PATTERN",
"value": "[a-z]\\w+"
}
}
}

View file

@ -0,0 +1,3 @@
This grammar uses named precedences, which have a partial order specified via the grammar's `precedences` field. Named
precedences allow certain conflicts to be resolved statically without accidentally resolving *other* conflicts, which
are intended to be resolved dynamically.