feat: add 'reserved word' construct
Co-authored-by: Amaan Qureshi <amaanq12@gmail.com>
This commit is contained in:
parent
2a63077cac
commit
201b41cf11
31 changed files with 2367 additions and 1628 deletions
|
|
@ -1,10 +1,10 @@
|
|||
use std::{collections::HashMap, mem};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
|
||||
use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
|
||||
use crate::{
|
||||
grammars::{ExternalToken, Variable, VariableType},
|
||||
grammars::{ExternalToken, ReservedWordContext, Variable, VariableType},
|
||||
rules::{MetadataParams, Rule, Symbol, SymbolType},
|
||||
};
|
||||
|
||||
|
|
@ -148,6 +148,27 @@ pub(super) fn extract_tokens(
|
|||
word_token = Some(token);
|
||||
}
|
||||
|
||||
let mut reserved_word_contexts = Vec::new();
|
||||
for reserved_word_context in grammar.reserved_word_sets {
|
||||
let mut reserved_words = Vec::new();
|
||||
for reserved_rule in reserved_word_context.reserved_words {
|
||||
if let Rule::Symbol(symbol) = reserved_rule {
|
||||
reserved_words.push(symbol_replacer.replace_symbol(symbol));
|
||||
} else if let Some(index) = lexical_variables
|
||||
.iter()
|
||||
.position(|v| v.rule == reserved_rule)
|
||||
{
|
||||
reserved_words.push(Symbol::terminal(index));
|
||||
} else {
|
||||
return Err(anyhow!("Reserved words must be tokens"));
|
||||
}
|
||||
}
|
||||
reserved_word_contexts.push(ReservedWordContext {
|
||||
name: reserved_word_context.name,
|
||||
reserved_words,
|
||||
});
|
||||
}
|
||||
|
||||
Ok((
|
||||
ExtractedSyntaxGrammar {
|
||||
variables,
|
||||
|
|
@ -158,6 +179,7 @@ pub(super) fn extract_tokens(
|
|||
external_tokens,
|
||||
word_token,
|
||||
precedence_orderings: grammar.precedence_orderings,
|
||||
reserved_word_sets: reserved_word_contexts,
|
||||
},
|
||||
ExtractedLexicalGrammar {
|
||||
variables: lexical_variables,
|
||||
|
|
@ -188,9 +210,7 @@ impl TokenExtractor {
|
|||
self.current_variable_name.push_str(&variable.name);
|
||||
self.current_variable_token_count = 0;
|
||||
self.is_first_rule = is_first;
|
||||
let mut rule = Rule::Blank;
|
||||
mem::swap(&mut rule, &mut variable.rule);
|
||||
variable.rule = self.extract_tokens_in_rule(&rule)?;
|
||||
variable.rule = self.extract_tokens_in_rule(&variable.rule)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -237,6 +257,10 @@ impl TokenExtractor {
|
|||
.map(|e| self.extract_tokens_in_rule(e))
|
||||
.collect::<Result<Vec<_>>>()?,
|
||||
)),
|
||||
Rule::Reserved { rule, context_name } => Ok(Rule::Reserved {
|
||||
rule: Box::new(self.extract_tokens_in_rule(rule)?),
|
||||
context_name: context_name.clone(),
|
||||
}),
|
||||
_ => Ok(input.clone()),
|
||||
}
|
||||
}
|
||||
|
|
@ -305,6 +329,10 @@ impl SymbolReplacer {
|
|||
params: params.clone(),
|
||||
rule: Box::new(self.replace_symbols_in_rule(rule)),
|
||||
},
|
||||
Rule::Reserved { rule, context_name } => Rule::Reserved {
|
||||
rule: Box::new(self.replace_symbols_in_rule(rule)),
|
||||
context_name: context_name.clone(),
|
||||
},
|
||||
_ => rule.clone(),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,48 +1,77 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use indoc::indoc;
|
||||
|
||||
use super::ExtractedSyntaxGrammar;
|
||||
use crate::{
|
||||
grammars::{Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable},
|
||||
rules::{Alias, Associativity, Precedence, Rule, Symbol},
|
||||
grammars::{
|
||||
Production, ProductionStep, ReservedWordSetId, SyntaxGrammar, SyntaxVariable, Variable,
|
||||
},
|
||||
rules::{Alias, Associativity, Precedence, Rule, Symbol, TokenSet},
|
||||
};
|
||||
|
||||
struct RuleFlattener {
|
||||
production: Production,
|
||||
reserved_word_set_ids: HashMap<String, ReservedWordSetId>,
|
||||
precedence_stack: Vec<Precedence>,
|
||||
associativity_stack: Vec<Associativity>,
|
||||
reserved_word_stack: Vec<ReservedWordSetId>,
|
||||
alias_stack: Vec<Alias>,
|
||||
field_name_stack: Vec<String>,
|
||||
}
|
||||
|
||||
impl RuleFlattener {
|
||||
const fn new() -> Self {
|
||||
const fn new(reserved_word_set_ids: HashMap<String, ReservedWordSetId>) -> Self {
|
||||
Self {
|
||||
production: Production {
|
||||
steps: Vec::new(),
|
||||
dynamic_precedence: 0,
|
||||
},
|
||||
reserved_word_set_ids,
|
||||
precedence_stack: Vec::new(),
|
||||
associativity_stack: Vec::new(),
|
||||
reserved_word_stack: Vec::new(),
|
||||
alias_stack: Vec::new(),
|
||||
field_name_stack: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn flatten(mut self, rule: Rule) -> Production {
|
||||
self.apply(rule, true);
|
||||
self.production
|
||||
fn flatten_variable(&mut self, variable: Variable) -> Result<SyntaxVariable> {
|
||||
let mut productions = Vec::new();
|
||||
for rule in extract_choices(variable.rule) {
|
||||
let production = self.flatten_rule(rule)?;
|
||||
if !productions.contains(&production) {
|
||||
productions.push(production);
|
||||
}
|
||||
}
|
||||
Ok(SyntaxVariable {
|
||||
name: variable.name,
|
||||
kind: variable.kind,
|
||||
productions,
|
||||
})
|
||||
}
|
||||
|
||||
fn apply(&mut self, rule: Rule, at_end: bool) -> bool {
|
||||
fn flatten_rule(&mut self, rule: Rule) -> Result<Production> {
|
||||
self.production = Production::default();
|
||||
self.alias_stack.clear();
|
||||
self.reserved_word_stack.clear();
|
||||
self.precedence_stack.clear();
|
||||
self.associativity_stack.clear();
|
||||
self.field_name_stack.clear();
|
||||
self.apply(rule, true)?;
|
||||
Ok(self.production.clone())
|
||||
}
|
||||
|
||||
fn apply(&mut self, rule: Rule, at_end: bool) -> Result<bool> {
|
||||
match rule {
|
||||
Rule::Seq(members) => {
|
||||
let mut result = false;
|
||||
let last_index = members.len() - 1;
|
||||
for (i, member) in members.into_iter().enumerate() {
|
||||
result |= self.apply(member, i == last_index && at_end);
|
||||
result |= self.apply(member, i == last_index && at_end)?;
|
||||
}
|
||||
result
|
||||
Ok(result)
|
||||
}
|
||||
Rule::Metadata { rule, params } => {
|
||||
let mut has_precedence = false;
|
||||
|
|
@ -73,7 +102,7 @@ impl RuleFlattener {
|
|||
self.production.dynamic_precedence = params.dynamic_precedence;
|
||||
}
|
||||
|
||||
let did_push = self.apply(*rule, at_end);
|
||||
let did_push = self.apply(*rule, at_end)?;
|
||||
|
||||
if has_precedence {
|
||||
self.precedence_stack.pop();
|
||||
|
|
@ -102,7 +131,18 @@ impl RuleFlattener {
|
|||
self.field_name_stack.pop();
|
||||
}
|
||||
|
||||
did_push
|
||||
Ok(did_push)
|
||||
}
|
||||
Rule::Reserved { rule, context_name } => {
|
||||
self.reserved_word_stack.push(
|
||||
self.reserved_word_set_ids
|
||||
.get(&context_name)
|
||||
.copied()
|
||||
.ok_or_else(|| anyhow!("no such reserved word set: {context_name}"))?,
|
||||
);
|
||||
let did_push = self.apply(*rule, at_end)?;
|
||||
self.reserved_word_stack.pop();
|
||||
Ok(did_push)
|
||||
}
|
||||
Rule::Symbol(symbol) => {
|
||||
self.production.steps.push(ProductionStep {
|
||||
|
|
@ -113,12 +153,17 @@ impl RuleFlattener {
|
|||
.cloned()
|
||||
.unwrap_or(Precedence::None),
|
||||
associativity: self.associativity_stack.last().copied(),
|
||||
reserved_word_set_id: self
|
||||
.reserved_word_stack
|
||||
.last()
|
||||
.copied()
|
||||
.unwrap_or(ReservedWordSetId::default()),
|
||||
alias: self.alias_stack.last().cloned(),
|
||||
field_name: self.field_name_stack.last().cloned(),
|
||||
});
|
||||
true
|
||||
Ok(true)
|
||||
}
|
||||
_ => false,
|
||||
_ => Ok(false),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -155,25 +200,17 @@ fn extract_choices(rule: Rule) -> Vec<Rule> {
|
|||
params: params.clone(),
|
||||
})
|
||||
.collect(),
|
||||
Rule::Reserved { rule, context_name } => extract_choices(*rule)
|
||||
.into_iter()
|
||||
.map(|rule| Rule::Reserved {
|
||||
rule: Box::new(rule),
|
||||
context_name: context_name.clone(),
|
||||
})
|
||||
.collect(),
|
||||
_ => vec![rule],
|
||||
}
|
||||
}
|
||||
|
||||
fn flatten_variable(variable: Variable) -> SyntaxVariable {
|
||||
let mut productions = Vec::new();
|
||||
for rule in extract_choices(variable.rule) {
|
||||
let production = RuleFlattener::new().flatten(rule);
|
||||
if !productions.contains(&production) {
|
||||
productions.push(production);
|
||||
}
|
||||
}
|
||||
SyntaxVariable {
|
||||
name: variable.name,
|
||||
kind: variable.kind,
|
||||
productions,
|
||||
}
|
||||
}
|
||||
|
||||
fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool {
|
||||
for variable in variables {
|
||||
for production in &variable.productions {
|
||||
|
|
@ -188,10 +225,18 @@ fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool {
|
|||
}
|
||||
|
||||
pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
|
||||
let mut variables = Vec::new();
|
||||
for variable in grammar.variables {
|
||||
variables.push(flatten_variable(variable));
|
||||
let mut reserved_word_set_ids_by_name = HashMap::new();
|
||||
for (ix, set) in grammar.reserved_word_sets.iter().enumerate() {
|
||||
reserved_word_set_ids_by_name.insert(set.name.clone(), ReservedWordSetId(ix));
|
||||
}
|
||||
|
||||
let mut flattener = RuleFlattener::new(reserved_word_set_ids_by_name);
|
||||
let variables = grammar
|
||||
.variables
|
||||
.into_iter()
|
||||
.map(|variable| flattener.flatten_variable(variable))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
for (i, variable) in variables.iter().enumerate() {
|
||||
let symbol = Symbol::non_terminal(i);
|
||||
|
||||
|
|
@ -218,6 +263,17 @@ pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxG
|
|||
}
|
||||
}
|
||||
}
|
||||
let mut reserved_word_sets = grammar
|
||||
.reserved_word_sets
|
||||
.into_iter()
|
||||
.map(|set| set.reserved_words.into_iter().collect())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// If no default reserved word set is specified, there are no reserved words.
|
||||
if reserved_word_sets.is_empty() {
|
||||
reserved_word_sets.push(TokenSet::default());
|
||||
}
|
||||
|
||||
Ok(SyntaxGrammar {
|
||||
extra_symbols: grammar.extra_symbols,
|
||||
expected_conflicts: grammar.expected_conflicts,
|
||||
|
|
@ -226,6 +282,7 @@ pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxG
|
|||
external_tokens: grammar.external_tokens,
|
||||
supertype_symbols: grammar.supertype_symbols,
|
||||
word_token: grammar.word_token,
|
||||
reserved_word_sets,
|
||||
variables,
|
||||
})
|
||||
}
|
||||
|
|
@ -237,28 +294,31 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_flatten_grammar() {
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::prec_left(
|
||||
Precedence::Integer(101),
|
||||
Rule::seq(vec![
|
||||
Rule::non_terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::prec_right(
|
||||
Precedence::Integer(102),
|
||||
Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
|
||||
),
|
||||
Rule::non_terminal(5),
|
||||
let mut flattener = RuleFlattener::new(HashMap::default());
|
||||
let result = flattener
|
||||
.flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::prec_left(
|
||||
Precedence::Integer(101),
|
||||
Rule::seq(vec![
|
||||
Rule::non_terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::prec_right(
|
||||
Precedence::Integer(102),
|
||||
Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
|
||||
),
|
||||
Rule::non_terminal(5),
|
||||
]),
|
||||
Rule::non_terminal(6),
|
||||
]),
|
||||
Rule::non_terminal(6),
|
||||
]),
|
||||
),
|
||||
Rule::non_terminal(7),
|
||||
]),
|
||||
});
|
||||
),
|
||||
Rule::non_terminal(7),
|
||||
]),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
|
|
@ -295,28 +355,31 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_flatten_grammar_with_maximum_dynamic_precedence() {
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::prec_dynamic(
|
||||
101,
|
||||
Rule::seq(vec![
|
||||
Rule::non_terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::prec_dynamic(
|
||||
102,
|
||||
Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
|
||||
),
|
||||
Rule::non_terminal(5),
|
||||
let mut flattener = RuleFlattener::new(HashMap::default());
|
||||
let result = flattener
|
||||
.flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::prec_dynamic(
|
||||
101,
|
||||
Rule::seq(vec![
|
||||
Rule::non_terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::prec_dynamic(
|
||||
102,
|
||||
Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
|
||||
),
|
||||
Rule::non_terminal(5),
|
||||
]),
|
||||
Rule::non_terminal(6),
|
||||
]),
|
||||
Rule::non_terminal(6),
|
||||
]),
|
||||
),
|
||||
Rule::non_terminal(7),
|
||||
]),
|
||||
});
|
||||
),
|
||||
Rule::non_terminal(7),
|
||||
]),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
|
|
@ -348,14 +411,17 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_flatten_grammar_with_final_precedence() {
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::prec_left(
|
||||
Precedence::Integer(101),
|
||||
Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
|
||||
),
|
||||
});
|
||||
let mut flattener = RuleFlattener::new(HashMap::default());
|
||||
let result = flattener
|
||||
.flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::prec_left(
|
||||
Precedence::Integer(101),
|
||||
Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
|
||||
),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
|
|
@ -370,14 +436,16 @@ mod tests {
|
|||
}]
|
||||
);
|
||||
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::prec_left(
|
||||
Precedence::Integer(101),
|
||||
Rule::seq(vec![Rule::non_terminal(1)]),
|
||||
),
|
||||
});
|
||||
let result = flattener
|
||||
.flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::prec_left(
|
||||
Precedence::Integer(101),
|
||||
Rule::seq(vec![Rule::non_terminal(1)]),
|
||||
),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
|
|
@ -391,18 +459,21 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_flatten_grammar_with_field_names() {
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::field("first-thing".to_string(), Rule::terminal(1)),
|
||||
Rule::terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::Blank,
|
||||
Rule::field("second-thing".to_string(), Rule::terminal(3)),
|
||||
let mut flattener = RuleFlattener::new(HashMap::default());
|
||||
let result = flattener
|
||||
.flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::field("first-thing".to_string(), Rule::terminal(1)),
|
||||
Rule::terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::Blank,
|
||||
Rule::field("second-thing".to_string(), Rule::terminal(3)),
|
||||
]),
|
||||
]),
|
||||
]),
|
||||
});
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
|
|
@ -436,6 +507,7 @@ mod tests {
|
|||
external_tokens: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
word_token: None,
|
||||
reserved_word_sets: Vec::new(),
|
||||
variables: vec![Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ use anyhow::{anyhow, Result};
|
|||
|
||||
use super::InternedGrammar;
|
||||
use crate::{
|
||||
grammars::{InputGrammar, Variable, VariableType},
|
||||
grammars::{InputGrammar, ReservedWordContext, Variable, VariableType},
|
||||
rules::{Rule, Symbol},
|
||||
};
|
||||
|
||||
|
|
@ -45,6 +45,18 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
|
|||
})?);
|
||||
}
|
||||
|
||||
let mut reserved_words = Vec::with_capacity(grammar.reserved_words.len());
|
||||
for reserved_word_set in &grammar.reserved_words {
|
||||
let mut interned_set = Vec::new();
|
||||
for rule in &reserved_word_set.reserved_words {
|
||||
interned_set.push(interner.intern_rule(rule, None)?);
|
||||
}
|
||||
reserved_words.push(ReservedWordContext {
|
||||
name: reserved_word_set.name.clone(),
|
||||
reserved_words: interned_set,
|
||||
});
|
||||
}
|
||||
|
||||
let mut expected_conflicts = Vec::new();
|
||||
for conflict in &grammar.expected_conflicts {
|
||||
let mut interned_conflict = Vec::with_capacity(conflict.len());
|
||||
|
|
@ -87,6 +99,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
|
|||
supertype_symbols,
|
||||
word_token,
|
||||
precedence_orderings: grammar.precedence_orderings.clone(),
|
||||
reserved_word_sets: reserved_words,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -118,6 +131,10 @@ impl Interner<'_> {
|
|||
rule: Box::new(self.intern_rule(rule, name)?),
|
||||
params: params.clone(),
|
||||
}),
|
||||
Rule::Reserved { rule, context_name } => Ok(Rule::Reserved {
|
||||
rule: Box::new(self.intern_rule(rule, name)?),
|
||||
context_name: context_name.clone(),
|
||||
}),
|
||||
Rule::NamedSymbol(name) => self.intern_name(name).map_or_else(
|
||||
|| Err(anyhow!("Undefined symbol `{name}`")),
|
||||
|symbol| Ok(Rule::Symbol(symbol)),
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ use super::{
|
|||
},
|
||||
rules::{AliasMap, Precedence, Rule, Symbol},
|
||||
};
|
||||
use crate::grammars::ReservedWordContext;
|
||||
|
||||
pub struct IntermediateGrammar<T, U> {
|
||||
variables: Vec<Variable>,
|
||||
|
|
@ -37,6 +38,7 @@ pub struct IntermediateGrammar<T, U> {
|
|||
variables_to_inline: Vec<Symbol>,
|
||||
supertype_symbols: Vec<Symbol>,
|
||||
word_token: Option<Symbol>,
|
||||
reserved_word_sets: Vec<ReservedWordContext<T>>,
|
||||
}
|
||||
|
||||
pub type InternedGrammar = IntermediateGrammar<Rule, Variable>;
|
||||
|
|
@ -60,6 +62,7 @@ impl<T, U> Default for IntermediateGrammar<T, U> {
|
|||
variables_to_inline: Vec::default(),
|
||||
supertype_symbols: Vec::default(),
|
||||
word_token: Option::default(),
|
||||
reserved_word_sets: Vec::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue