Start work on including child refs in generated parsers
This commit is contained in:
parent
bf4e1304f8
commit
108ca989ea
9 changed files with 111 additions and 30 deletions
|
|
@ -6,7 +6,7 @@ use crate::generate::grammars::{
|
|||
};
|
||||
use crate::generate::rules::{Alias, Associativity, Symbol, SymbolType};
|
||||
use crate::generate::tables::{
|
||||
AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
||||
ChildInfoSequenceId, ChildInfo, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
||||
};
|
||||
use core::ops::Range;
|
||||
use hashbrown::hash_map::Entry;
|
||||
|
|
@ -47,7 +47,7 @@ struct ParseTableBuilder<'a> {
|
|||
impl<'a> ParseTableBuilder<'a> {
|
||||
fn build(mut self) -> Result<ParseTable> {
|
||||
// Ensure that the empty alias sequence has index 0.
|
||||
self.parse_table.alias_sequences.push(Vec::new());
|
||||
self.parse_table.child_info_sequences.push(Vec::new());
|
||||
|
||||
// Add the error state at index 0.
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
|
||||
|
|
@ -176,7 +176,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
precedence: item.precedence(),
|
||||
associativity: item.associativity(),
|
||||
dynamic_precedence: item.production.dynamic_precedence,
|
||||
alias_sequence_id: self.get_alias_sequence_id(item),
|
||||
child_info_sequence_id: self.get_child_info_sequence_id(item),
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -645,29 +645,32 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn get_alias_sequence_id(&mut self, item: &ParseItem) -> AliasSequenceId {
|
||||
let mut alias_sequence: Vec<Option<Alias>> = item
|
||||
fn get_child_info_sequence_id(&mut self, item: &ParseItem) -> ChildInfoSequenceId {
|
||||
let mut child_info_sequence: Vec<ChildInfo> = item
|
||||
.production
|
||||
.steps
|
||||
.iter()
|
||||
.map(|s| s.alias.clone())
|
||||
.map(|s| ChildInfo {
|
||||
alias: s.alias.clone(),
|
||||
child_ref: s.child_ref.clone(),
|
||||
})
|
||||
.collect();
|
||||
while alias_sequence.last() == Some(&None) {
|
||||
alias_sequence.pop();
|
||||
while child_info_sequence.last() == Some(&ChildInfo::default()) {
|
||||
child_info_sequence.pop();
|
||||
}
|
||||
if item.production.steps.len() > self.parse_table.max_aliased_production_length {
|
||||
self.parse_table.max_aliased_production_length = item.production.steps.len()
|
||||
}
|
||||
if let Some(index) = self
|
||||
.parse_table
|
||||
.alias_sequences
|
||||
.child_info_sequences
|
||||
.iter()
|
||||
.position(|seq| *seq == alias_sequence)
|
||||
.position(|seq| *seq == child_info_sequence)
|
||||
{
|
||||
index
|
||||
} else {
|
||||
self.parse_table.alias_sequences.push(alias_sequence);
|
||||
self.parse_table.alias_sequences.len() - 1
|
||||
self.parse_table.child_info_sequences.push(child_info_sequence);
|
||||
self.parse_table.child_info_sequences.len() - 1
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -740,7 +743,7 @@ pub(crate) fn build_parse_table(
|
|||
parse_table: ParseTable {
|
||||
states: Vec::new(),
|
||||
symbols: Vec::new(),
|
||||
alias_sequences: Vec::new(),
|
||||
child_info_sequences: Vec::new(),
|
||||
max_aliased_production_length: 0,
|
||||
},
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ lazy_static! {
|
|||
precedence: 0,
|
||||
associativity: None,
|
||||
alias: None,
|
||||
child_ref: None,
|
||||
}],
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ impl<'a> Minimizer<'a> {
|
|||
ParseAction::ShiftExtra => continue,
|
||||
ParseAction::Reduce {
|
||||
child_count: 1,
|
||||
alias_sequence_id: 0,
|
||||
child_info_sequence_id: 0,
|
||||
symbol,
|
||||
..
|
||||
} => {
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ pub(crate) struct ProductionStep {
|
|||
pub precedence: i32,
|
||||
pub associativity: Option<Associativity>,
|
||||
pub alias: Option<Alias>,
|
||||
pub child_ref: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
|
|
@ -99,6 +100,7 @@ impl ProductionStep {
|
|||
precedence: 0,
|
||||
associativity: None,
|
||||
alias: None,
|
||||
child_ref: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -108,6 +110,7 @@ impl ProductionStep {
|
|||
precedence,
|
||||
associativity,
|
||||
alias: self.alias,
|
||||
child_ref: self.child_ref,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -120,6 +123,16 @@ impl ProductionStep {
|
|||
value: value.to_string(),
|
||||
is_named,
|
||||
}),
|
||||
child_ref: self.child_ref,
|
||||
}
|
||||
}
|
||||
pub(crate) fn with_child_ref(self, name: &str) -> Self {
|
||||
Self {
|
||||
symbol: self.symbol,
|
||||
precedence: self.precedence,
|
||||
associativity: self.associativity,
|
||||
alias: self.alias,
|
||||
child_ref: Some(name.to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ fn parse_rule(json: RuleJSON) -> Rule {
|
|||
RuleJSON::PATTERN { value } => Rule::Pattern(value),
|
||||
RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name),
|
||||
RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
|
||||
RuleJSON::REF { content, value } => parse_rule(*content),
|
||||
RuleJSON::REF { content, value } => Rule::child_ref(value, parse_rule(*content)),
|
||||
RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()),
|
||||
RuleJSON::REPEAT1 { content } => Rule::repeat(parse_rule(*content)),
|
||||
RuleJSON::REPEAT { content } => {
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ struct RuleFlattener {
|
|||
precedence_stack: Vec<i32>,
|
||||
associativity_stack: Vec<Associativity>,
|
||||
alias_stack: Vec<Alias>,
|
||||
child_ref_stack: Vec<String>,
|
||||
}
|
||||
|
||||
impl RuleFlattener {
|
||||
|
|
@ -23,6 +24,7 @@ impl RuleFlattener {
|
|||
precedence_stack: Vec::new(),
|
||||
associativity_stack: Vec::new(),
|
||||
alias_stack: Vec::new(),
|
||||
child_ref_stack: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -60,6 +62,12 @@ impl RuleFlattener {
|
|||
self.alias_stack.push(alias);
|
||||
}
|
||||
|
||||
let mut has_child_ref = false;
|
||||
if let Some(child_ref) = params.child_ref {
|
||||
has_child_ref = true;
|
||||
self.child_ref_stack.push(child_ref);
|
||||
}
|
||||
|
||||
if params.dynamic_precedence.abs() > self.production.dynamic_precedence.abs() {
|
||||
self.production.dynamic_precedence = params.dynamic_precedence;
|
||||
}
|
||||
|
|
@ -86,6 +94,10 @@ impl RuleFlattener {
|
|||
self.alias_stack.pop();
|
||||
}
|
||||
|
||||
if has_child_ref {
|
||||
self.child_ref_stack.pop();
|
||||
}
|
||||
|
||||
did_push
|
||||
}
|
||||
Rule::Symbol(symbol) => {
|
||||
|
|
@ -94,6 +106,7 @@ impl RuleFlattener {
|
|||
precedence: self.precedence_stack.last().cloned().unwrap_or(0),
|
||||
associativity: self.associativity_stack.last().cloned(),
|
||||
alias: self.alias_stack.last().cloned(),
|
||||
child_ref: self.child_ref_stack.last().cloned(),
|
||||
});
|
||||
true
|
||||
}
|
||||
|
|
@ -355,4 +368,42 @@ mod tests {
|
|||
}]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_flatten_grammar_with_child_refs() {
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::child_ref("first-thing".to_string(), Rule::terminal(1)),
|
||||
Rule::terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::Blank,
|
||||
Rule::child_ref("second-thing".to_string(), Rule::terminal(3)),
|
||||
]),
|
||||
]),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(1)).with_child_ref("first-thing"),
|
||||
ProductionStep::new(Symbol::terminal(2))
|
||||
]
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(1)).with_child_ref("first-thing"),
|
||||
ProductionStep::new(Symbol::terminal(2)),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_child_ref("second-thing"),
|
||||
]
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ impl Generator {
|
|||
self.add_symbol_names_list();
|
||||
self.add_symbol_metadata_list();
|
||||
|
||||
if self.parse_table.alias_sequences.len() > 1 {
|
||||
if self.parse_table.child_info_sequences.len() > 1 {
|
||||
self.add_alias_sequences();
|
||||
}
|
||||
|
||||
|
|
@ -148,9 +148,9 @@ impl Generator {
|
|||
self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
|
||||
}
|
||||
|
||||
for alias_sequence in &self.parse_table.alias_sequences {
|
||||
for entry in alias_sequence {
|
||||
if let Some(alias) = entry {
|
||||
for child_info_sequence in &self.parse_table.child_info_sequences {
|
||||
for entry in child_info_sequence {
|
||||
if let Some(alias) = &entry.alias {
|
||||
let alias_kind = if alias.is_named {
|
||||
VariableType::Named
|
||||
} else {
|
||||
|
|
@ -307,14 +307,14 @@ impl Generator {
|
|||
add_line!(
|
||||
self,
|
||||
"static TSSymbol ts_alias_sequences[{}][MAX_ALIAS_SEQUENCE_LENGTH] = {{",
|
||||
self.parse_table.alias_sequences.len()
|
||||
self.parse_table.child_info_sequences.len()
|
||||
);
|
||||
indent!(self);
|
||||
for (i, sequence) in self.parse_table.alias_sequences.iter().enumerate().skip(1) {
|
||||
for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate().skip(1) {
|
||||
add_line!(self, "[{}] = {{", i);
|
||||
indent!(self);
|
||||
for (j, alias) in sequence.iter().enumerate() {
|
||||
if let Some(alias) = alias {
|
||||
for (j, child_info) in sequence.iter().enumerate() {
|
||||
if let Some(alias) = &child_info.alias {
|
||||
add_line!(self, "[{}] = {},", j, self.alias_ids[&alias]);
|
||||
}
|
||||
}
|
||||
|
|
@ -686,15 +686,15 @@ impl Generator {
|
|||
symbol,
|
||||
child_count,
|
||||
dynamic_precedence,
|
||||
alias_sequence_id,
|
||||
child_info_sequence_id,
|
||||
..
|
||||
} => {
|
||||
add!(self, "REDUCE({}, {}", self.symbol_ids[&symbol], child_count);
|
||||
if dynamic_precedence != 0 {
|
||||
add!(self, ", .dynamic_precedence = {}", dynamic_precedence);
|
||||
}
|
||||
if alias_sequence_id != 0 {
|
||||
add!(self, ", .alias_sequence_id = {}", alias_sequence_id);
|
||||
if child_info_sequence_id != 0 {
|
||||
add!(self, ", .alias_sequence_id = {}", child_info_sequence_id);
|
||||
}
|
||||
add!(self, ")");
|
||||
}
|
||||
|
|
@ -759,7 +759,7 @@ impl Generator {
|
|||
add_line!(self, ".lex_modes = ts_lex_modes,");
|
||||
add_line!(self, ".symbol_names = ts_symbol_names,");
|
||||
|
||||
if self.parse_table.alias_sequences.len() > 1 {
|
||||
if self.parse_table.child_info_sequences.len() > 1 {
|
||||
add_line!(
|
||||
self,
|
||||
".alias_sequences = (const TSSymbol *)ts_alias_sequences,"
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ pub(crate) struct MetadataParams {
|
|||
pub is_active: bool,
|
||||
pub is_main_token: bool,
|
||||
pub alias: Option<Alias>,
|
||||
pub child_ref: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
|
|
@ -57,6 +58,12 @@ pub(crate) enum Rule {
|
|||
}
|
||||
|
||||
impl Rule {
|
||||
pub fn child_ref(name: String, content: Rule) -> Self {
|
||||
add_metadata(content, move |params| {
|
||||
params.child_ref = Some(name);
|
||||
})
|
||||
}
|
||||
|
||||
pub fn alias(content: Rule, value: String, is_named: bool) -> Self {
|
||||
add_metadata(content, move |params| {
|
||||
params.alias = Some(Alias { is_named, value });
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ use super::nfa::CharacterSet;
|
|||
use super::rules::{Alias, Associativity, Symbol};
|
||||
use hashbrown::HashMap;
|
||||
|
||||
pub(crate) type AliasSequenceId = usize;
|
||||
pub(crate) type ChildInfoSequenceId = usize;
|
||||
pub(crate) type ParseStateId = usize;
|
||||
pub(crate) type LexStateId = usize;
|
||||
|
||||
|
|
@ -21,7 +21,7 @@ pub(crate) enum ParseAction {
|
|||
precedence: i32,
|
||||
dynamic_precedence: i32,
|
||||
associativity: Option<Associativity>,
|
||||
alias_sequence_id: AliasSequenceId,
|
||||
child_info_sequence_id: ChildInfoSequenceId,
|
||||
},
|
||||
}
|
||||
|
||||
|
|
@ -39,11 +39,17 @@ pub(crate) struct ParseState {
|
|||
pub unfinished_item_signature: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq)]
|
||||
pub(crate) struct ChildInfo {
|
||||
pub alias: Option<Alias>,
|
||||
pub child_ref: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseTable {
|
||||
pub states: Vec<ParseState>,
|
||||
pub symbols: Vec<Symbol>,
|
||||
pub alias_sequences: Vec<Vec<Option<Alias>>>,
|
||||
pub child_info_sequences: Vec<Vec<ChildInfo>>,
|
||||
pub max_aliased_production_length: usize,
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue