Implement more of parse table generation
This commit is contained in:
parent
5eb8806959
commit
a3dcfa0a52
14 changed files with 1515 additions and 107 deletions
|
|
@ -1,10 +1,10 @@
|
|||
use super::inline_variables::InlinedProductionMap;
|
||||
use crate::grammars::{Production, ProductionStep, SyntaxGrammar};
|
||||
use crate::rules::{Symbol, SymbolType};
|
||||
use crate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
|
||||
use crate::rules::{Associativity, Symbol, SymbolType};
|
||||
use smallbitvec::SmallBitVec;
|
||||
use std::collections::HashMap;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::collections::{HashMap, BTreeMap};
|
||||
use std::fmt;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
lazy_static! {
|
||||
static ref START_PRODUCTION: Production = Production {
|
||||
|
|
@ -28,7 +28,7 @@ pub(crate) struct LookaheadSet {
|
|||
eof: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub(crate) enum ParseItem {
|
||||
Start {
|
||||
step_index: u32,
|
||||
|
|
@ -47,10 +47,29 @@ pub(crate) enum ParseItem {
|
|||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseItemSet {
|
||||
pub entries: HashMap<ParseItem, LookaheadSet>,
|
||||
pub entries: BTreeMap<ParseItem, LookaheadSet>,
|
||||
}
|
||||
|
||||
pub(crate) struct ParseItemDisplay<'a>(&'a ParseItem, &'a SyntaxGrammar, &'a InlinedProductionMap);
|
||||
pub(crate) struct ParseItemDisplay<'a>(
|
||||
&'a ParseItem,
|
||||
&'a SyntaxGrammar,
|
||||
&'a LexicalGrammar,
|
||||
&'a InlinedProductionMap,
|
||||
);
|
||||
|
||||
pub(crate) struct LookaheadSetDisplay<'a>(&'a LookaheadSet, &'a SyntaxGrammar, &'a LexicalGrammar);
|
||||
|
||||
pub(crate) struct ParseItemSetDisplay<'a>(
|
||||
&'a ParseItemSet,
|
||||
&'a SyntaxGrammar,
|
||||
&'a LexicalGrammar,
|
||||
&'a InlinedProductionMap,
|
||||
);
|
||||
|
||||
struct ParseItemSetMapEntry(ParseItemSet, u64);
|
||||
pub(crate) struct ParseItemSetMap<T> {
|
||||
map: HashMap<ParseItemSetMapEntry, T>
|
||||
}
|
||||
|
||||
impl LookaheadSet {
|
||||
pub fn new() -> Self {
|
||||
|
|
@ -61,12 +80,61 @@ impl LookaheadSet {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, other: Symbol) {
|
||||
match other.kind {
|
||||
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a LookaheadSet"),
|
||||
SymbolType::Terminal => self.terminal_bits.set(other.index, true),
|
||||
SymbolType::External => self.external_bits.set(other.index, true),
|
||||
pub fn iter<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
|
||||
self.terminal_bits
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, value)| {
|
||||
if value {
|
||||
Some(Symbol::terminal(i))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.chain(
|
||||
self.external_bits
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, value)| {
|
||||
if value {
|
||||
Some(Symbol::external(i))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}),
|
||||
)
|
||||
.chain(if self.eof { Some(Symbol::end()) } else { None })
|
||||
}
|
||||
|
||||
pub fn with<'a>(symbols: impl IntoIterator<Item = &'a Symbol>) -> Self {
|
||||
let mut result = Self::new();
|
||||
for symbol in symbols {
|
||||
result.insert(*symbol);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn contains(&self, symbol: &Symbol) -> bool {
|
||||
match symbol.kind {
|
||||
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a LookaheadSet"),
|
||||
SymbolType::Terminal => self.terminal_bits.get(symbol.index).unwrap_or(false),
|
||||
SymbolType::External => self.external_bits.get(symbol.index).unwrap_or(false),
|
||||
SymbolType::End => self.eof,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, other: Symbol) {
|
||||
let vec = match other.kind {
|
||||
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a LookaheadSet"),
|
||||
SymbolType::Terminal => &mut self.terminal_bits,
|
||||
SymbolType::External => &mut self.external_bits,
|
||||
SymbolType::End => {
|
||||
self.eof = true;
|
||||
return;
|
||||
}
|
||||
};
|
||||
vec.resize(other.index + 1, false);
|
||||
vec.set(other.index, true);
|
||||
}
|
||||
|
||||
pub fn insert_all(&mut self, other: &LookaheadSet) -> bool {
|
||||
|
|
@ -95,6 +163,14 @@ impl LookaheadSet {
|
|||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn display_with<'a>(
|
||||
&'a self,
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
) -> LookaheadSetDisplay<'a> {
|
||||
LookaheadSetDisplay(self, syntax_grammar, lexical_grammar)
|
||||
}
|
||||
}
|
||||
|
||||
impl ParseItem {
|
||||
|
|
@ -126,18 +202,53 @@ impl ParseItem {
|
|||
&grammar.variables[*variable_index as usize].productions[*production_index as usize]
|
||||
}
|
||||
ParseItem::Inlined {
|
||||
production_index,
|
||||
..
|
||||
production_index, ..
|
||||
} => &inlined_productions.inlined_productions[*production_index as usize],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn symbol(
|
||||
&self,
|
||||
grammar: &SyntaxGrammar,
|
||||
inlined_productions: &InlinedProductionMap,
|
||||
) -> Option<Symbol> {
|
||||
self.step(grammar, inlined_productions).map(|s| s.symbol)
|
||||
}
|
||||
|
||||
pub fn step<'a>(
|
||||
&self,
|
||||
grammar: &'a SyntaxGrammar,
|
||||
inlined_productions: &'a InlinedProductionMap,
|
||||
) -> Option<&'a ProductionStep> {
|
||||
self.production(grammar, inlined_productions).steps.get(self.step_index())
|
||||
self.production(grammar, inlined_productions)
|
||||
.steps
|
||||
.get(self.step_index())
|
||||
}
|
||||
|
||||
pub fn precedence<'a>(
|
||||
&self,
|
||||
grammar: &'a SyntaxGrammar,
|
||||
inlines: &'a InlinedProductionMap,
|
||||
) -> i32 {
|
||||
self.production(grammar, inlines)
|
||||
.steps
|
||||
.get(self.step_index() - 1)
|
||||
.map(|s| s.precedence)
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
pub fn associativity<'a>(
|
||||
&self,
|
||||
grammar: &'a SyntaxGrammar,
|
||||
inlines: &'a InlinedProductionMap,
|
||||
) -> Option<Associativity> {
|
||||
let production = self.production(grammar, inlines);
|
||||
let step_index = self.step_index();
|
||||
if step_index == production.steps.len() {
|
||||
production.steps.last().and_then(|s| s.associativity)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn variable_index(&self) -> u32 {
|
||||
|
|
@ -156,6 +267,14 @@ impl ParseItem {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn is_final(&self) -> bool {
|
||||
if let ParseItem::Start { step_index: 1 } = self {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn step_index_mut(&mut self) -> &mut u32 {
|
||||
match self {
|
||||
ParseItem::Start { step_index }
|
||||
|
|
@ -164,8 +283,13 @@ impl ParseItem {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn with<'a>(&'a self, grammar: &'a SyntaxGrammar, inlines: &'a InlinedProductionMap) -> ParseItemDisplay<'a> {
|
||||
ParseItemDisplay(self, grammar, inlines)
|
||||
pub fn display_with<'a>(
|
||||
&'a self,
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
inlines: &'a InlinedProductionMap,
|
||||
) -> ParseItemDisplay<'a> {
|
||||
ParseItemDisplay(self, syntax_grammar, lexical_grammar, inlines)
|
||||
}
|
||||
|
||||
pub fn successor(&self) -> ParseItem {
|
||||
|
|
@ -176,33 +300,107 @@ impl ParseItem {
|
|||
}
|
||||
|
||||
impl ParseItemSet {
|
||||
pub fn new() -> Self {
|
||||
Self { entries: HashMap::new() }
|
||||
pub fn with<'a>(elements: impl IntoIterator<Item = &'a (ParseItem, LookaheadSet)>) -> Self {
|
||||
let mut result = Self::default();
|
||||
for (item, lookaheads) in elements {
|
||||
result.entries.insert(*item, lookaheads.clone());
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn display_with<'a>(
|
||||
&'a self,
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
inlines: &'a InlinedProductionMap,
|
||||
) -> ParseItemSetDisplay<'a> {
|
||||
ParseItemSetDisplay(self, syntax_grammar, lexical_grammar, inlines)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ParseItemSet {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
entries: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> fmt::Display for ParseItemDisplay<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
if let ParseItem::Start { .. } = &self.0 {
|
||||
write!(f, "START →")?;
|
||||
} else {
|
||||
write!(
|
||||
f,
|
||||
"{} →",
|
||||
&self.1.variables[self.0.variable_index() as usize].name
|
||||
)?;
|
||||
}
|
||||
|
||||
let step_index = self.0.step_index();
|
||||
let production = self.0.production(self.1, self.2);
|
||||
let production = self.0.production(self.1, self.3);
|
||||
for (i, step) in production.steps.iter().enumerate() {
|
||||
if i > 0 {
|
||||
write!(f, " ")?;
|
||||
}
|
||||
|
||||
if i == step_index {
|
||||
write!(f, "• ")?;
|
||||
write!(f, " •")?;
|
||||
}
|
||||
|
||||
let name = if step.symbol.is_terminal() {
|
||||
"terminal"
|
||||
write!(f, " ")?;
|
||||
if step.symbol.is_terminal() {
|
||||
if let Some(variable) = self.2.variables.get(step.symbol.index) {
|
||||
write!(f, "{}", &variable.name)?;
|
||||
} else {
|
||||
write!(f, "{}-{}", "terminal", step.symbol.index)?;
|
||||
}
|
||||
} else if step.symbol.is_external() {
|
||||
"external"
|
||||
write!(f, "{}", &self.1.external_tokens[step.symbol.index].name)?;
|
||||
} else {
|
||||
"non-terminal"
|
||||
};
|
||||
write!(f, "{}", &self.1.variables[step.symbol.index].name)?;
|
||||
}
|
||||
}
|
||||
|
||||
write!(f, "{}-{}", name, step.symbol.index)?;
|
||||
if production.steps.len() == step_index {
|
||||
write!(f, " •")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> fmt::Display for LookaheadSetDisplay<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
write!(f, "[")?;
|
||||
for (i, symbol) in self.0.iter().enumerate() {
|
||||
if i > 0 {
|
||||
write!(f, ", ")?;
|
||||
}
|
||||
|
||||
if symbol.is_terminal() {
|
||||
if let Some(variable) = self.2.variables.get(symbol.index) {
|
||||
write!(f, "{}", &variable.name)?;
|
||||
} else {
|
||||
write!(f, "{}-{}", "terminal", symbol.index)?;
|
||||
}
|
||||
} else if symbol.is_external() {
|
||||
write!(f, "{}", &self.1.external_tokens[symbol.index].name)?;
|
||||
} else {
|
||||
write!(f, "{}", &self.1.variables[symbol.index].name)?;
|
||||
}
|
||||
}
|
||||
write!(f, "]")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> fmt::Display for ParseItemSetDisplay<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
for (item, lookaheads) in self.0.entries.iter() {
|
||||
writeln!(
|
||||
f,
|
||||
"{}\t{}",
|
||||
item.display_with(self.1, self.2, self.3),
|
||||
lookaheads.display_with(self.1, self.2)
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ pub(crate) struct ParseItemSetBuilder {
|
|||
first_sets: HashMap<Symbol, LookaheadSet>,
|
||||
last_sets: HashMap<Symbol, LookaheadSet>,
|
||||
transitive_closure_additions: Vec<Vec<TransitiveClosureAddition>>,
|
||||
inlined_production_map: InlinedProductionMap,
|
||||
pub inlines: InlinedProductionMap,
|
||||
}
|
||||
|
||||
fn find_or_push<T: Eq>(vector: &mut Vec<T>, value: T) {
|
||||
|
|
@ -35,7 +35,7 @@ impl ParseItemSetBuilder {
|
|||
first_sets: HashMap::new(),
|
||||
last_sets: HashMap::new(),
|
||||
transitive_closure_additions: vec![Vec::new(); syntax_grammar.variables.len()],
|
||||
inlined_production_map: InlinedProductionMap::new(syntax_grammar),
|
||||
inlines: InlinedProductionMap::new(syntax_grammar),
|
||||
};
|
||||
|
||||
// For each grammar symbol, populate the FIRST and LAST sets: the set of
|
||||
|
|
@ -192,6 +192,10 @@ impl ParseItemSetBuilder {
|
|||
let additions_for_non_terminal = &mut result.transitive_closure_additions[i];
|
||||
for (variable_index, follow_set_info) in follow_set_info_by_non_terminal {
|
||||
let variable = &syntax_grammar.variables[variable_index];
|
||||
let non_terminal = Symbol::non_terminal(variable_index);
|
||||
if syntax_grammar.variables_to_inline.contains(&non_terminal) {
|
||||
continue;
|
||||
}
|
||||
for production_index in 0..variable.productions.len() {
|
||||
let item = ParseItem::Normal {
|
||||
variable_index: variable_index as u32,
|
||||
|
|
@ -199,7 +203,7 @@ impl ParseItemSetBuilder {
|
|||
step_index: 0,
|
||||
};
|
||||
|
||||
if let Some(inlined_items) = result.inlined_production_map.inlined_items(item) {
|
||||
if let Some(inlined_items) = result.inlines.inlined_items(item) {
|
||||
for inlined_item in inlined_items {
|
||||
find_or_push(
|
||||
additions_for_non_terminal,
|
||||
|
|
@ -227,32 +231,36 @@ impl ParseItemSetBuilder {
|
|||
|
||||
pub(crate) fn transitive_closure(
|
||||
&mut self,
|
||||
item_set: ParseItemSet,
|
||||
item_set: &ParseItemSet,
|
||||
grammar: &SyntaxGrammar,
|
||||
) -> ParseItemSet {
|
||||
let mut result = ParseItemSet::new();
|
||||
for (item, lookaheads) in item_set.entries {
|
||||
if let Some(items) = self.inlined_production_map.inlined_items(item) {
|
||||
let mut result = ParseItemSet::default();
|
||||
for (item, lookaheads) in &item_set.entries {
|
||||
if let Some(items) = self.inlines.inlined_items(*item) {
|
||||
for item in items {
|
||||
self.add_item(&mut result, item, lookaheads.clone(), grammar);
|
||||
self.add_item(&mut result, item, lookaheads, grammar);
|
||||
}
|
||||
} else {
|
||||
self.add_item(&mut result, item, lookaheads, grammar);
|
||||
self.add_item(&mut result, *item, lookaheads, grammar);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn first_set(&self, symbol: &Symbol) -> &LookaheadSet {
|
||||
&self.first_sets[symbol]
|
||||
}
|
||||
|
||||
fn add_item(
|
||||
&self,
|
||||
set: &mut ParseItemSet,
|
||||
item: ParseItem,
|
||||
lookaheads: LookaheadSet,
|
||||
lookaheads: &LookaheadSet,
|
||||
grammar: &SyntaxGrammar,
|
||||
) {
|
||||
if let Some(step) = item.step(grammar, &self.inlined_production_map) {
|
||||
if let Some(step) = item.step(grammar, &self.inlines) {
|
||||
if step.symbol.is_non_terminal() {
|
||||
let next_step = item.successor().step(grammar, &self.inlined_production_map);
|
||||
let next_step = item.successor().step(grammar, &self.inlines);
|
||||
|
||||
// Determine which tokens can follow this non-terminal.
|
||||
let following_tokens = if let Some(next_step) = next_step {
|
||||
|
|
@ -274,6 +282,6 @@ impl ParseItemSetBuilder {
|
|||
}
|
||||
}
|
||||
}
|
||||
set.entries.insert(item, lookaheads);
|
||||
set.entries.insert(item, lookaheads.clone());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,37 +1,611 @@
|
|||
mod item;
|
||||
mod inline_variables;
|
||||
mod item;
|
||||
mod item_set_builder;
|
||||
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use crate::grammars::{SyntaxGrammar, LexicalGrammar};
|
||||
use crate::tables::{ParseTable, LexTable, ParseStateId};
|
||||
use crate::rules::{AliasMap, Symbol};
|
||||
use crate::error::Result;
|
||||
use self::item::ParseItemSet;
|
||||
use self::item::{LookaheadSet, ParseItem, ParseItemSet};
|
||||
use self::item_set_builder::ParseItemSetBuilder;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
|
||||
use crate::rules::{AliasMap, Associativity, Symbol, SymbolType};
|
||||
use crate::tables::ParseTableEntry;
|
||||
use crate::tables::{AliasSequenceId, LexTable, ParseAction, ParseState, ParseStateId, ParseTable};
|
||||
use core::ops::Range;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
use std::fmt::Write;
|
||||
|
||||
#[derive(Clone)]
|
||||
struct AuxiliarySymbolInfo {
|
||||
auxiliary_symbol: Symbol,
|
||||
parent_symbols: Vec<Symbol>,
|
||||
}
|
||||
|
||||
type SymbolSequence = Vec<Symbol>;
|
||||
type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
|
||||
|
||||
struct ParseStateQueueEntry {
|
||||
preceding_symbols: SymbolSequence,
|
||||
item_set: ParseItemSet,
|
||||
preceding_auxiliary_symbols: AuxiliarySymbolSequence,
|
||||
state_id: ParseStateId,
|
||||
}
|
||||
|
||||
struct ParseTableBuilder<'a> {
|
||||
item_set_builder: ParseItemSetBuilder,
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
simple_aliases: &'a AliasMap,
|
||||
state_ids_by_item_set: HashMap<ParseItemSet, ParseStateId>,
|
||||
item_sets_by_state_id: Vec<&'a ParseItemSet>,
|
||||
item_sets_by_state_id: Vec<ParseItemSet>,
|
||||
parse_state_queue: VecDeque<ParseStateQueueEntry>,
|
||||
parse_table: ParseTable,
|
||||
}
|
||||
|
||||
impl<'a> ParseTableBuilder<'a> {
|
||||
fn build(mut self) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
|
||||
// Ensure that the empty rename sequence has index 0.
|
||||
self.parse_table.alias_sequences.push(Vec::new());
|
||||
|
||||
// Ensure that the error state has index 0.
|
||||
let error_state_id = self.add_parse_state(
|
||||
&Vec::new(),
|
||||
&Vec::new(),
|
||||
ParseItemSet::default(),
|
||||
);
|
||||
|
||||
self.add_parse_state(
|
||||
&Vec::new(),
|
||||
&Vec::new(),
|
||||
ParseItemSet::with(&[(ParseItem::start(), LookaheadSet::with(&[Symbol::end()]))]),
|
||||
);
|
||||
|
||||
self.process_part_state_queue()?;
|
||||
self.populate_used_symbols();
|
||||
|
||||
Err(Error::grammar("oh no"))
|
||||
}
|
||||
|
||||
fn add_parse_state(
|
||||
&mut self,
|
||||
preceding_symbols: &SymbolSequence,
|
||||
preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
|
||||
item_set: ParseItemSet,
|
||||
) -> ParseStateId {
|
||||
match self.state_ids_by_item_set.entry(item_set) {
|
||||
Entry::Occupied(o) => {
|
||||
// eprintln!("Item set already processed at state {}", *o.get());
|
||||
*o.get()
|
||||
}
|
||||
Entry::Vacant(v) => {
|
||||
// eprintln!("Item set not yet processed");
|
||||
let state_id = self.parse_table.states.len();
|
||||
self.item_sets_by_state_id.push(v.key().clone());
|
||||
self.parse_table.states.push(ParseState {
|
||||
terminal_entries: HashMap::new(),
|
||||
nonterminal_entries: HashMap::new(),
|
||||
});
|
||||
self.parse_state_queue.push_back(ParseStateQueueEntry {
|
||||
state_id,
|
||||
preceding_symbols: preceding_symbols.clone(),
|
||||
preceding_auxiliary_symbols: preceding_auxiliary_symbols.clone(),
|
||||
});
|
||||
v.insert(state_id);
|
||||
state_id
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn process_part_state_queue(&mut self) -> Result<()> {
|
||||
while let Some(entry) = self.parse_state_queue.pop_front() {
|
||||
println!(
|
||||
"ITEM SET {}:\n{}",
|
||||
entry.state_id,
|
||||
self.item_sets_by_state_id[entry.state_id].display_with(
|
||||
&self.syntax_grammar,
|
||||
&self.lexical_grammar,
|
||||
&self.item_set_builder.inlines
|
||||
)
|
||||
);
|
||||
|
||||
let item_set = self.item_set_builder.transitive_closure(
|
||||
&self.item_sets_by_state_id[entry.state_id],
|
||||
self.syntax_grammar,
|
||||
);
|
||||
|
||||
// println!("TRANSITIVE CLOSURE:");
|
||||
// for item in item_set.entries.keys() {
|
||||
// println!("{}", item.display_with(&self.syntax_grammar, &self.lexical_grammar, &self.item_set_builder.inlines));
|
||||
// }
|
||||
// println!("");
|
||||
|
||||
self.add_actions(
|
||||
entry.preceding_symbols,
|
||||
entry.preceding_auxiliary_symbols,
|
||||
item_set,
|
||||
entry.state_id,
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn add_actions(
|
||||
&mut self,
|
||||
mut preceding_symbols: SymbolSequence,
|
||||
mut preceding_auxiliary_symbols: Vec<AuxiliarySymbolInfo>,
|
||||
item_set: ParseItemSet,
|
||||
state_id: ParseStateId,
|
||||
) -> Result<()> {
|
||||
let mut terminal_successors = HashMap::new();
|
||||
let mut non_terminal_successors = HashMap::new();
|
||||
let mut lookaheads_with_conflicts = HashSet::new();
|
||||
|
||||
for (item, lookaheads) in &item_set.entries {
|
||||
if let Some(next_symbol) =
|
||||
item.symbol(self.syntax_grammar, &self.item_set_builder.inlines)
|
||||
{
|
||||
let successor = item.successor();
|
||||
if next_symbol.is_non_terminal() {
|
||||
// Keep track of where auxiliary non-terminals (repeat symbols) are
|
||||
// used within visible symbols. This information may be needed later
|
||||
// for conflict resolution.
|
||||
if self.syntax_grammar.variables[next_symbol.index].is_auxiliary() {
|
||||
preceding_auxiliary_symbols
|
||||
.push(self.get_auxiliary_node_info(&item_set, next_symbol));
|
||||
}
|
||||
|
||||
non_terminal_successors
|
||||
.entry(next_symbol)
|
||||
.or_insert_with(|| ParseItemSet::default())
|
||||
.entries
|
||||
.entry(successor)
|
||||
.or_insert_with(|| LookaheadSet::new())
|
||||
.insert_all(lookaheads);
|
||||
} else {
|
||||
terminal_successors
|
||||
.entry(next_symbol)
|
||||
.or_insert_with(|| ParseItemSet::default())
|
||||
.entries
|
||||
.entry(successor)
|
||||
.or_insert_with(|| LookaheadSet::new())
|
||||
.insert_all(lookaheads);
|
||||
}
|
||||
} else {
|
||||
let action = if item.is_final() {
|
||||
ParseAction::Accept
|
||||
} else {
|
||||
let production =
|
||||
item.production(&self.syntax_grammar, &self.item_set_builder.inlines);
|
||||
ParseAction::Reduce {
|
||||
symbol: Symbol::non_terminal(item.variable_index() as usize),
|
||||
child_count: item.step_index(),
|
||||
precedence: production.last_precedence(),
|
||||
associativity: production.last_associativity(),
|
||||
dynamic_precedence: production.dynamic_precedence,
|
||||
alias_sequence_id: self.get_alias_sequence_id(item),
|
||||
}
|
||||
};
|
||||
|
||||
for lookahead in lookaheads.iter() {
|
||||
let entry = self.parse_table.states[state_id]
|
||||
.terminal_entries
|
||||
.entry(lookahead);
|
||||
let entry = entry.or_insert_with(|| ParseTableEntry::new());
|
||||
if entry.actions.is_empty() {
|
||||
entry.actions.push(action);
|
||||
} else if action.precedence() > entry.actions[0].precedence() {
|
||||
entry.actions.clear();
|
||||
entry.actions.push(action);
|
||||
lookaheads_with_conflicts.remove(&lookahead);
|
||||
} else if action.precedence() == entry.actions[0].precedence() {
|
||||
entry.actions.push(action);
|
||||
lookaheads_with_conflicts.insert(lookahead);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (symbol, next_item_set) in terminal_successors {
|
||||
preceding_symbols.push(symbol);
|
||||
let next_state_id = self.add_parse_state(
|
||||
&preceding_symbols,
|
||||
&preceding_auxiliary_symbols,
|
||||
next_item_set,
|
||||
);
|
||||
preceding_symbols.pop();
|
||||
|
||||
let entry = self.parse_table.states[state_id]
|
||||
.terminal_entries
|
||||
.entry(symbol);
|
||||
if let Entry::Occupied(e) = &entry {
|
||||
if !e.get().actions.is_empty() {
|
||||
lookaheads_with_conflicts.insert(symbol);
|
||||
}
|
||||
}
|
||||
|
||||
entry
|
||||
.or_insert_with(|| ParseTableEntry::new())
|
||||
.actions
|
||||
.push(ParseAction::Shift {
|
||||
state: next_state_id,
|
||||
is_repetition: false,
|
||||
});
|
||||
}
|
||||
|
||||
for (symbol, next_item_set) in non_terminal_successors {
|
||||
preceding_symbols.push(symbol);
|
||||
let next_state_id = self.add_parse_state(
|
||||
&preceding_symbols,
|
||||
&preceding_auxiliary_symbols,
|
||||
next_item_set,
|
||||
);
|
||||
preceding_symbols.pop();
|
||||
self.parse_table.states[state_id]
|
||||
.nonterminal_entries
|
||||
.insert(symbol, next_state_id);
|
||||
}
|
||||
|
||||
for symbol in lookaheads_with_conflicts {
|
||||
self.handle_conflict(
|
||||
&item_set,
|
||||
state_id,
|
||||
&preceding_symbols,
|
||||
&preceding_auxiliary_symbols,
|
||||
symbol,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn handle_conflict(
|
||||
&mut self,
|
||||
item_set: &ParseItemSet,
|
||||
state_id: ParseStateId,
|
||||
preceding_symbols: &SymbolSequence,
|
||||
preceding_auxiliary_symbols: &Vec<AuxiliarySymbolInfo>,
|
||||
conflicting_lookahead: Symbol,
|
||||
) -> Result<()> {
|
||||
let entry = self.parse_table.states[state_id]
|
||||
.terminal_entries
|
||||
.get_mut(&conflicting_lookahead)
|
||||
.unwrap();
|
||||
|
||||
// Determine which items in the set conflict with each other, and the
|
||||
// precedences associated with SHIFT vs REDUCE actions. There won't
|
||||
// be multiple REDUCE actions with different precedences; that is
|
||||
// sorted out ahead of time in `add_actions`. But there can still be
|
||||
// REDUCE-REDUCE conflicts where all actions have the *same*
|
||||
// precedence, and there can still be SHIFT/REDUCE conflicts.
|
||||
let reduce_precedence = entry.actions[0].precedence();
|
||||
let mut considered_associativity = false;
|
||||
let mut shift_precedence: Option<Range<i32>> = None;
|
||||
let mut conflicting_items = HashSet::new();
|
||||
for (item, lookaheads) in &item_set.entries {
|
||||
let production = item.production(&self.syntax_grammar, &self.item_set_builder.inlines);
|
||||
let step_index = item.step_index();
|
||||
if let Some(step) = production.steps.get(step_index) {
|
||||
if step_index > 0 {
|
||||
if self
|
||||
.item_set_builder
|
||||
.first_set(&step.symbol)
|
||||
.contains(&conflicting_lookahead)
|
||||
{
|
||||
conflicting_items.insert(item);
|
||||
let precedence = production.steps[step_index - 1].precedence;
|
||||
if let Some(range) = &mut shift_precedence {
|
||||
if precedence < range.start {
|
||||
range.start = precedence;
|
||||
} else if precedence > range.end {
|
||||
range.end = precedence;
|
||||
}
|
||||
} else {
|
||||
shift_precedence = Some(precedence..precedence);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if lookaheads.contains(&conflicting_lookahead) {
|
||||
conflicting_items.insert(item);
|
||||
}
|
||||
}
|
||||
|
||||
if let ParseAction::Shift { is_repetition, .. } = entry.actions.last_mut().unwrap() {
|
||||
let shift_precedence = shift_precedence.unwrap_or(0..0);
|
||||
|
||||
// If all of the items in the conflict have the same parent symbol,
|
||||
// and that parent symbols is auxiliary, then this is just the intentional
|
||||
// ambiguity associated with a repeat rule. Resolve that class of ambiguity
|
||||
// by leaving it in the parse table, but marking the SHIFT action with
|
||||
// an `is_repetition` flag.
|
||||
let conflicting_variable_index =
|
||||
conflicting_items.iter().next().unwrap().variable_index();
|
||||
if self.syntax_grammar.variables[conflicting_variable_index as usize].is_auxiliary() {
|
||||
if conflicting_items
|
||||
.iter()
|
||||
.all(|item| item.variable_index() == conflicting_variable_index)
|
||||
{
|
||||
*is_repetition = true;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
// If the SHIFT action has higher precedence, remove all the REDUCE actions.
|
||||
if shift_precedence.start > reduce_precedence
|
||||
|| (shift_precedence.start == reduce_precedence
|
||||
&& shift_precedence.end > reduce_precedence)
|
||||
{
|
||||
entry.actions.drain(0..entry.actions.len() - 1);
|
||||
}
|
||||
// If the REDUCE actions have higher precedence, remove the SHIFT action.
|
||||
else if shift_precedence.end < reduce_precedence
|
||||
|| (shift_precedence.end == reduce_precedence
|
||||
&& shift_precedence.start < reduce_precedence)
|
||||
{
|
||||
entry.actions.pop();
|
||||
conflicting_items.retain(|item| {
|
||||
item.step(&self.syntax_grammar, &self.item_set_builder.inlines)
|
||||
.is_none()
|
||||
});
|
||||
}
|
||||
// If the SHIFT and REDUCE actions have the same predence, consider
|
||||
// the REDUCE actions' associativity.
|
||||
else if shift_precedence == (reduce_precedence..reduce_precedence) {
|
||||
considered_associativity = true;
|
||||
let mut has_left = false;
|
||||
let mut has_right = false;
|
||||
let mut has_non = false;
|
||||
for action in &entry.actions {
|
||||
if let ParseAction::Reduce { associativity, .. } = action {
|
||||
match associativity {
|
||||
Some(Associativity::Left) => has_left = true,
|
||||
Some(Associativity::Right) => has_right = true,
|
||||
None => has_non = true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If all reduce actions are left associative, remove the SHIFT action.
|
||||
// If all reduce actions are right associative, remove the REDUCE actions.
|
||||
match (has_left, has_non, has_right) {
|
||||
(true, false, false) => {
|
||||
entry.actions.pop();
|
||||
conflicting_items.retain(|item| {
|
||||
item.step(&self.syntax_grammar, &self.item_set_builder.inlines)
|
||||
.is_none()
|
||||
});
|
||||
}
|
||||
(false, false, true) => {
|
||||
entry.actions.drain(0..entry.actions.len() - 1);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If all of the actions but one have been eliminated, then there's no problem.
|
||||
let entry = self.parse_table.states[state_id]
|
||||
.terminal_entries
|
||||
.get_mut(&conflicting_lookahead)
|
||||
.unwrap();
|
||||
if entry.actions.len() == 1 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Determine the set of parent symbols involved in this conflict.
|
||||
let mut actual_conflict = Vec::new();
|
||||
for item in &conflicting_items {
|
||||
let symbol = Symbol::non_terminal(item.variable_index() as usize);
|
||||
if self.syntax_grammar.variables[symbol.index].is_auxiliary() {
|
||||
actual_conflict.extend(
|
||||
preceding_auxiliary_symbols
|
||||
.iter()
|
||||
.rev()
|
||||
.find_map(|info| {
|
||||
if info.auxiliary_symbol == symbol {
|
||||
Some(&info.parent_symbols)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.unwrap()
|
||||
.iter(),
|
||||
);
|
||||
} else {
|
||||
actual_conflict.push(symbol);
|
||||
}
|
||||
}
|
||||
actual_conflict.sort_unstable();
|
||||
actual_conflict.dedup();
|
||||
|
||||
// If this set of symbols has been whitelisted, then there's no error.
|
||||
if self
|
||||
.syntax_grammar
|
||||
.expected_conflicts
|
||||
.contains(&actual_conflict)
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut msg = "Unresolved conflict for symbol sequence:\n\n".to_string();
|
||||
for symbol in preceding_symbols {
|
||||
write!(&mut msg, " {}", self.symbol_name(symbol)).unwrap();
|
||||
}
|
||||
|
||||
write!(
|
||||
&mut msg,
|
||||
" • {} …\n\n",
|
||||
self.symbol_name(&conflicting_lookahead)
|
||||
)
|
||||
.unwrap();
|
||||
write!(&mut msg, "Possible interpretations:\n").unwrap();
|
||||
for (i, item) in conflicting_items.iter().enumerate() {
|
||||
write!(&mut msg, "\n {}:", i).unwrap();
|
||||
|
||||
for preceding_symbol in preceding_symbols
|
||||
.iter()
|
||||
.take(preceding_symbols.len() - item.step_index())
|
||||
{
|
||||
write!(&mut msg, " {}", self.symbol_name(preceding_symbol)).unwrap();
|
||||
}
|
||||
|
||||
write!(
|
||||
&mut msg,
|
||||
" ({}",
|
||||
&self.syntax_grammar.variables[item.variable_index() as usize].name
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
for (j, step) in item
|
||||
.production(&self.syntax_grammar, &self.item_set_builder.inlines)
|
||||
.steps
|
||||
.iter()
|
||||
.enumerate()
|
||||
{
|
||||
if j == item.step_index() {
|
||||
write!(&mut msg, " •").unwrap();
|
||||
}
|
||||
write!(&mut msg, " {}", self.symbol_name(&step.symbol)).unwrap();
|
||||
}
|
||||
|
||||
write!(&mut msg, ")").unwrap();
|
||||
|
||||
if item
|
||||
.step(&self.syntax_grammar, &self.item_set_builder.inlines)
|
||||
.is_none()
|
||||
{
|
||||
write!(
|
||||
&mut msg,
|
||||
" • {}",
|
||||
self.symbol_name(&conflicting_lookahead)
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
let precedence = item.precedence(&self.syntax_grammar, &self.item_set_builder.inlines);
|
||||
let associativity =
|
||||
item.associativity(&self.syntax_grammar, &self.item_set_builder.inlines);
|
||||
if precedence != 0 || associativity.is_some() {
|
||||
write!(
|
||||
&mut msg,
|
||||
"(precedence: {}, associativity: {:?})",
|
||||
precedence, associativity
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
// TODO - generate suggested resolutions
|
||||
|
||||
Err(Error::ConflictError(msg))
|
||||
}
|
||||
|
||||
fn get_auxiliary_node_info(
|
||||
&self,
|
||||
item_set: &ParseItemSet,
|
||||
symbol: Symbol,
|
||||
) -> AuxiliarySymbolInfo {
|
||||
let parent_symbols = item_set
|
||||
.entries
|
||||
.keys()
|
||||
.filter_map(|item| {
|
||||
if item.symbol(&self.syntax_grammar, &self.item_set_builder.inlines) == Some(symbol)
|
||||
{
|
||||
None
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
AuxiliarySymbolInfo {
|
||||
auxiliary_symbol: symbol,
|
||||
parent_symbols,
|
||||
}
|
||||
}
|
||||
|
||||
fn populate_used_symbols(&mut self) {
|
||||
let mut terminal_usages = vec![false; self.lexical_grammar.variables.len()];
|
||||
let mut non_terminal_usages = vec![false; self.syntax_grammar.variables.len()];
|
||||
let mut external_usages = vec![false; self.syntax_grammar.external_tokens.len()];
|
||||
for state in &self.parse_table.states {
|
||||
for symbol in state.terminal_entries.keys() {
|
||||
match symbol.kind {
|
||||
SymbolType::Terminal => terminal_usages[symbol.index] = true,
|
||||
SymbolType::External => external_usages[symbol.index] = true,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
for symbol in state.nonterminal_entries.keys() {
|
||||
non_terminal_usages[symbol.index] = true;
|
||||
}
|
||||
}
|
||||
for (i, value) in terminal_usages.into_iter().enumerate() {
|
||||
if value {
|
||||
self.parse_table.symbols.push(Symbol::terminal(i));
|
||||
}
|
||||
}
|
||||
for (i, value) in non_terminal_usages.into_iter().enumerate() {
|
||||
if value {
|
||||
self.parse_table.symbols.push(Symbol::non_terminal(i));
|
||||
}
|
||||
}
|
||||
for (i, value) in external_usages.into_iter().enumerate() {
|
||||
if value {
|
||||
self.parse_table.symbols.push(Symbol::external(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_alias_sequence_id(&mut self, item: &ParseItem) -> AliasSequenceId {
|
||||
let production = item.production(&self.syntax_grammar, &self.item_set_builder.inlines);
|
||||
let alias_sequence = production.steps.iter().map(|s| s.alias.clone()).collect();
|
||||
if let Some(index) = self
|
||||
.parse_table
|
||||
.alias_sequences
|
||||
.iter()
|
||||
.position(|seq| *seq == alias_sequence)
|
||||
{
|
||||
index
|
||||
} else {
|
||||
self.parse_table.alias_sequences.push(alias_sequence);
|
||||
self.parse_table.alias_sequences.len() - 1
|
||||
}
|
||||
}
|
||||
|
||||
fn symbol_name(&self, symbol: &Symbol) -> String {
|
||||
match symbol.kind {
|
||||
SymbolType::End => "EOF".to_string(),
|
||||
SymbolType::External => self.syntax_grammar.external_tokens[symbol.index]
|
||||
.name
|
||||
.clone(),
|
||||
SymbolType::NonTerminal => self.syntax_grammar.variables[symbol.index].name.clone(),
|
||||
SymbolType::Terminal => {
|
||||
let variable = &self.lexical_grammar.variables[symbol.index];
|
||||
if variable.kind == VariableType::Named {
|
||||
variable.name.clone()
|
||||
} else {
|
||||
format!("\"{}\"", &variable.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn build_tables(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
simple_aliases: &AliasMap
|
||||
simple_aliases: &AliasMap,
|
||||
) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
|
||||
unimplemented!();
|
||||
ParseTableBuilder {
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
simple_aliases,
|
||||
item_set_builder: ParseItemSetBuilder::new(syntax_grammar, lexical_grammar),
|
||||
state_ids_by_item_set: HashMap::new(),
|
||||
item_sets_by_state_id: Vec::new(),
|
||||
parse_state_queue: VecDeque::new(),
|
||||
parse_table: ParseTable {
|
||||
states: Vec::new(),
|
||||
alias_sequences: Vec::new(),
|
||||
symbols: Vec::new(),
|
||||
},
|
||||
}
|
||||
.build()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ pub enum Error {
|
|||
GrammarError(String),
|
||||
SymbolError(String),
|
||||
RegexError(String),
|
||||
ConflictError(String),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@ use crate::prepare_grammar::prepare_grammar;
|
|||
use crate::build_tables::build_tables;
|
||||
use crate::render::render_c_code;
|
||||
|
||||
pub fn generate_parser_for_grammar(input: String) -> Result<String> {
|
||||
let input_grammar = parse_grammar(&input)?;
|
||||
pub fn generate_parser_for_grammar(input: &str) -> Result<String> {
|
||||
let input_grammar = parse_grammar(input)?;
|
||||
let (syntax_grammar, lexical_grammar, simple_aliases) = prepare_grammar(&input_grammar)?;
|
||||
let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
|
||||
&syntax_grammar,
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ pub(crate) struct LexicalVariable {
|
|||
pub start_state: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[derive(Debug, Default, PartialEq, Eq)]
|
||||
pub(crate) struct LexicalGrammar {
|
||||
pub nfa: Nfa,
|
||||
pub variables: Vec<LexicalVariable>,
|
||||
|
|
@ -112,6 +112,14 @@ impl Production {
|
|||
pub fn first_symbol(&self) -> Option<Symbol> {
|
||||
self.steps.first().map(|s| s.symbol.clone())
|
||||
}
|
||||
|
||||
pub fn last_precedence(&self) -> i32 {
|
||||
self.steps.last().map(|s| s.precedence).unwrap_or(0)
|
||||
}
|
||||
|
||||
pub fn last_associativity(&self) -> Option<Associativity> {
|
||||
self.steps.last().map(|s| s.associativity).unwrap_or(None)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Production {
|
||||
|
|
@ -137,3 +145,9 @@ impl Variable {
|
|||
Self { name: name.to_string(), kind: VariableType::Anonymous, rule }
|
||||
}
|
||||
}
|
||||
|
||||
impl SyntaxVariable {
|
||||
pub fn is_auxiliary(&self) -> bool {
|
||||
self.kind == VariableType::Auxiliary
|
||||
}
|
||||
}
|
||||
|
|
|
|||
334
src/js/dsl.js
Normal file
334
src/js/dsl.js
Normal file
|
|
@ -0,0 +1,334 @@
|
|||
const UNICODE_ESCAPE_PATTERN = /\\u([0-9a-f]{4})/gi;
|
||||
const DELIMITER_ESCAPE_PATTERN = /\\\//g;
|
||||
|
||||
function alias(rule, value) {
|
||||
const result = {
|
||||
type: "ALIAS",
|
||||
content: normalize(rule),
|
||||
named: false,
|
||||
value: null
|
||||
};
|
||||
|
||||
switch (value.constructor) {
|
||||
case String:
|
||||
result.named = false;
|
||||
result.value = value;
|
||||
return result;
|
||||
case ReferenceError:
|
||||
result.named = true;
|
||||
result.value = value.symbol.name;
|
||||
return result;
|
||||
case Object:
|
||||
if (typeof value.type === 'string' && value.type === 'SYMBOL') {
|
||||
result.named = true;
|
||||
result.value = value.name;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error('Invalid alias value ' + value);
|
||||
}
|
||||
|
||||
function blank() {
|
||||
return {
|
||||
type: "BLANK"
|
||||
};
|
||||
}
|
||||
|
||||
function choice(...elements) {
|
||||
return {
|
||||
type: "CHOICE",
|
||||
members: elements.map(normalize)
|
||||
};
|
||||
}
|
||||
|
||||
function optional(value) {
|
||||
return choice(value, blank());
|
||||
}
|
||||
|
||||
function prec(number, rule) {
|
||||
if (rule == null) {
|
||||
rule = number;
|
||||
number = 0;
|
||||
}
|
||||
|
||||
return {
|
||||
type: "PREC",
|
||||
value: number,
|
||||
content: normalize(rule)
|
||||
};
|
||||
}
|
||||
|
||||
prec.left = function(number, rule) {
|
||||
if (rule == null) {
|
||||
rule = number;
|
||||
number = 0;
|
||||
}
|
||||
|
||||
return {
|
||||
type: "PREC_LEFT",
|
||||
value: number,
|
||||
content: normalize(rule)
|
||||
};
|
||||
}
|
||||
|
||||
prec.right = function(number, rule) {
|
||||
if (rule == null) {
|
||||
rule = number;
|
||||
number = 0;
|
||||
}
|
||||
|
||||
return {
|
||||
type: "PREC_RIGHT",
|
||||
value: number,
|
||||
content: normalize(rule)
|
||||
};
|
||||
}
|
||||
|
||||
prec.dynamic = function(number, rule) {
|
||||
return {
|
||||
type: "PREC_DYNAMIC",
|
||||
value: number,
|
||||
content: normalize(rule)
|
||||
};
|
||||
}
|
||||
|
||||
function repeat(rule) {
|
||||
return {
|
||||
type: "REPEAT",
|
||||
content: normalize(rule)
|
||||
};
|
||||
}
|
||||
|
||||
function repeat1(rule) {
|
||||
return {
|
||||
type: "REPEAT1",
|
||||
content: normalize(rule)
|
||||
};
|
||||
}
|
||||
|
||||
function seq(...elements) {
|
||||
return {
|
||||
type: "SEQ",
|
||||
members: elements.map(normalize)
|
||||
};
|
||||
}
|
||||
|
||||
function sym(name) {
|
||||
return {
|
||||
type: "SYMBOL",
|
||||
name: name
|
||||
};
|
||||
}
|
||||
|
||||
function token(value) {
|
||||
return {
|
||||
type: "TOKEN",
|
||||
content: normalize(value)
|
||||
};
|
||||
}
|
||||
|
||||
token.immediate = function(value) {
|
||||
return {
|
||||
type: "IMMEDIATE_TOKEN",
|
||||
content: normalize(value)
|
||||
};
|
||||
}
|
||||
|
||||
function normalize(value) {
|
||||
|
||||
if (typeof value == "undefined")
|
||||
throw new Error("Undefined symbol");
|
||||
|
||||
switch (value.constructor) {
|
||||
case String:
|
||||
return {
|
||||
type: 'STRING',
|
||||
value
|
||||
};
|
||||
case RegExp:
|
||||
return {
|
||||
type: 'PATTERN',
|
||||
value: value.source
|
||||
.replace(
|
||||
DELIMITER_ESCAPE_PATTERN,
|
||||
'/'
|
||||
)
|
||||
.replace(
|
||||
UNICODE_ESCAPE_PATTERN,
|
||||
(match, group) => String.fromCharCode(parseInt(group, 16))
|
||||
)
|
||||
};
|
||||
case ReferenceError:
|
||||
throw value
|
||||
default:
|
||||
if (typeof value.type === 'string') {
|
||||
return value;
|
||||
} else {
|
||||
throw new TypeError("Invalid rule: " + value.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function RuleBuilder(ruleMap) {
|
||||
return new Proxy({}, {
|
||||
get(target, propertyName) {
|
||||
const symbol = {
|
||||
type: 'SYMBOL',
|
||||
name: propertyName
|
||||
};
|
||||
|
||||
if (!ruleMap || ruleMap.hasOwnProperty(propertyName)) {
|
||||
return symbol;
|
||||
} else {
|
||||
const error = new ReferenceError(`Undefined symbol '${propertyName}'`);
|
||||
error.symbol = symbol;
|
||||
return error;
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
function grammar(baseGrammar, options) {
|
||||
if (!options) {
|
||||
options = baseGrammar;
|
||||
baseGrammar = {
|
||||
name: null,
|
||||
rules: {},
|
||||
extras: [normalize(/\s/)],
|
||||
conflicts: [],
|
||||
externals: [],
|
||||
inline: []
|
||||
};
|
||||
}
|
||||
|
||||
let externals = baseGrammar.externals;
|
||||
if (options.externals) {
|
||||
if (typeof options.externals !== "function") {
|
||||
throw new Error("Grammar's 'externals' property must be a function.");
|
||||
}
|
||||
|
||||
const externalsRuleBuilder = RuleBuilder(null)
|
||||
const externalRules = options.externals.call(externalsRuleBuilder, externalsRuleBuilder, baseGrammar.externals);
|
||||
|
||||
if (!Array.isArray(externalRules)) {
|
||||
throw new Error("Grammar's 'externals' property must return an array of rules.");
|
||||
}
|
||||
|
||||
externals = externalRules.map(normalize);
|
||||
}
|
||||
|
||||
const ruleMap = {};
|
||||
for (const key in options.rules) {
|
||||
ruleMap[key] = true;
|
||||
}
|
||||
for (const key in baseGrammar.rules) {
|
||||
ruleMap[key] = true;
|
||||
}
|
||||
for (const external of externals) {
|
||||
if (typeof external.name === 'string') {
|
||||
ruleMap[external.name] = true;
|
||||
}
|
||||
}
|
||||
|
||||
const ruleBuilder = RuleBuilder(ruleMap);
|
||||
|
||||
const name = options.name;
|
||||
if (typeof name !== "string") {
|
||||
throw new Error("Grammar's 'name' property must be a string.");
|
||||
}
|
||||
|
||||
if (!/^[a-zA-Z_]\w*$/.test(name)) {
|
||||
throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters.");
|
||||
}
|
||||
|
||||
let rules = Object.assign({}, baseGrammar.rules);
|
||||
if (options.rules) {
|
||||
if (typeof options.rules !== "object") {
|
||||
throw new Error("Grammar's 'rules' property must be an object.");
|
||||
}
|
||||
|
||||
for (const ruleName in options.rules) {
|
||||
const ruleFn = options.rules[ruleName];
|
||||
if (typeof ruleFn !== "function") {
|
||||
throw new Error("Grammar rules must all be functions. '" + ruleName + "' rule is not.");
|
||||
}
|
||||
rules[ruleName] = normalize(ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName]));
|
||||
}
|
||||
}
|
||||
|
||||
let extras = baseGrammar.extras.slice();
|
||||
if (options.extras) {
|
||||
if (typeof options.extras !== "function") {
|
||||
throw new Error("Grammar's 'extras' property must be a function.");
|
||||
}
|
||||
|
||||
extras = options.extras
|
||||
.call(ruleBuilder, ruleBuilder, baseGrammar.extras)
|
||||
.map(normalize);
|
||||
}
|
||||
|
||||
let word = baseGrammar.word;
|
||||
if (options.word) {
|
||||
word = options.word.call(ruleBuilder, ruleBuilder).name;
|
||||
if (typeof word != 'string') {
|
||||
throw new Error("Grammar's 'word' property must be a named rule.");
|
||||
}
|
||||
}
|
||||
|
||||
let conflicts = baseGrammar.conflicts;
|
||||
if (options.conflicts) {
|
||||
if (typeof options.conflicts !== "function") {
|
||||
throw new Error("Grammar's 'conflicts' property must be a function.");
|
||||
}
|
||||
|
||||
const baseConflictRules = baseGrammar.conflicts.map(conflict => conflict.map(sym));
|
||||
const conflictRules = options.conflicts.call(ruleBuilder, ruleBuilder, baseConflictRules);
|
||||
|
||||
if (!Array.isArray(conflictRules)) {
|
||||
throw new Error("Grammar's conflicts must be an array of arrays of rules.");
|
||||
}
|
||||
|
||||
conflicts = conflictRules.map(conflictSet => {
|
||||
if (!Array.isArray(conflictSet)) {
|
||||
throw new Error("Grammar's conflicts must be an array of arrays of rules.");
|
||||
}
|
||||
|
||||
return conflictSet.map(symbol => symbol.name);
|
||||
});
|
||||
}
|
||||
|
||||
let inline = baseGrammar.inline;
|
||||
if (options.inline) {
|
||||
if (typeof options.inline !== "function") {
|
||||
throw new Error("Grammar's 'inline' property must be a function.");
|
||||
}
|
||||
|
||||
const baseInlineRules = baseGrammar.inline.map(sym);
|
||||
const inlineRules = options.inline.call(ruleBuilder, ruleBuilder, baseInlineRules);
|
||||
|
||||
if (!Array.isArray(inlineRules)) {
|
||||
throw new Error("Grammar's inline must be an array of rules.");
|
||||
}
|
||||
|
||||
inline = inlineRules.map(symbol => symbol.name);
|
||||
}
|
||||
|
||||
if (Object.keys(rules).length == 0) {
|
||||
throw new Error("Grammar must have at least one rule.");
|
||||
}
|
||||
|
||||
return {name, word, rules, extras, conflicts, externals, inline};
|
||||
}
|
||||
|
||||
global.alias = alias;
|
||||
global.blank = blank;
|
||||
global.choice = choice;
|
||||
global.optional = optional;
|
||||
global.prec = prec;
|
||||
global.repeat = repeat;
|
||||
global.repeat1 = repeat1;
|
||||
global.seq = seq;
|
||||
global.sym = sym;
|
||||
global.token = token;
|
||||
global.grammar = grammar;
|
||||
65
src/main.rs
65
src/main.rs
|
|
@ -1,8 +1,15 @@
|
|||
use clap::{App, Arg, SubCommand};
|
||||
#[macro_use]
|
||||
extern crate serde_derive;
|
||||
#[macro_use]
|
||||
extern crate serde_json;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
|
||||
#[macro_use] extern crate serde_derive;
|
||||
#[macro_use] extern crate serde_json;
|
||||
#[macro_use] extern crate lazy_static;
|
||||
use std::path::PathBuf;
|
||||
use clap::{App, Arg, SubCommand};
|
||||
use std::env;
|
||||
use std::io::Write;
|
||||
use std::process::{Command, Stdio};
|
||||
|
||||
mod build_tables;
|
||||
mod error;
|
||||
|
|
@ -20,25 +27,59 @@ fn main() -> error::Result<()> {
|
|||
.version("0.1")
|
||||
.author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
|
||||
.about("Generates and tests parsers")
|
||||
.subcommand(SubCommand::with_name("generate").about("Generate a parser"))
|
||||
.subcommand(
|
||||
SubCommand::with_name("generate")
|
||||
.about("Generate a parser")
|
||||
).subcommand(
|
||||
SubCommand::with_name("parse")
|
||||
.about("Parse a file")
|
||||
.arg(Arg::with_name("path").index(1))
|
||||
).subcommand(
|
||||
.arg(Arg::with_name("path").index(1)),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("test")
|
||||
.about("Run a parser's tests")
|
||||
.arg(Arg::with_name("path").index(1).required(true))
|
||||
.arg(Arg::with_name("line").index(2).required(true))
|
||||
.arg(Arg::with_name("column").index(3).required(true))
|
||||
).get_matches();
|
||||
.arg(Arg::with_name("column").index(3).required(true)),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
if let Some(matches) = matches.subcommand_matches("generate") {
|
||||
let code = generate::generate_parser_for_grammar(String::new())?;
|
||||
let mut grammar_path = env::current_dir().expect("Failed to read CWD");
|
||||
grammar_path.push("grammar.js");
|
||||
let grammar_json = load_js_grammar_file(grammar_path);
|
||||
let code = generate::generate_parser_for_grammar(&grammar_json)?;
|
||||
println!("{}", code);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn load_js_grammar_file(grammar_path: PathBuf) -> String {
|
||||
let mut node_process = Command::new("node")
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.spawn()
|
||||
.expect("Failed to run `node`");
|
||||
|
||||
let js_prelude = include_str!("./js/dsl.js");
|
||||
let mut node_stdin = node_process
|
||||
.stdin
|
||||
.take()
|
||||
.expect("Failed to open stdin for node");
|
||||
write!(
|
||||
node_stdin,
|
||||
"{}\nconsole.log(JSON.stringify(require(\"{}\"), null, 2));\n",
|
||||
js_prelude,
|
||||
grammar_path.to_str().unwrap()
|
||||
).expect("Failed to write to node's stdin");
|
||||
drop(node_stdin);
|
||||
let output = node_process
|
||||
.wait_with_output()
|
||||
.expect("Failed to read output from node");
|
||||
match output.status.code() {
|
||||
None => panic!("Node process was killed"),
|
||||
Some(0) => {}
|
||||
Some(code) => panic!(format!("Node process exited with status {}", code)),
|
||||
}
|
||||
|
||||
String::from_utf8(output.stdout).expect("Got invalid UTF8 from node")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,12 @@ pub struct Nfa {
|
|||
pub states: Vec<NfaState>
|
||||
}
|
||||
|
||||
impl Default for Nfa {
|
||||
fn default() -> Self {
|
||||
Self { states: Vec::new() }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct NfaCursor<'a> {
|
||||
pub(crate) state_ids: Vec<u32>,
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ pub(super) fn extract_simple_aliases(
|
|||
Symbol { kind: SymbolType::External, index} => &mut external_status_list[index],
|
||||
Symbol { kind: SymbolType::NonTerminal, index} => &mut non_terminal_status_list[index],
|
||||
Symbol { kind: SymbolType::Terminal, index} => &mut terminal_status_list[index],
|
||||
Symbol { kind: SymbolType::End, .. } => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
if step.alias.is_none() {
|
||||
|
|
@ -49,6 +50,7 @@ pub(super) fn extract_simple_aliases(
|
|||
Symbol { kind: SymbolType::External, index} => &external_status_list[index],
|
||||
Symbol { kind: SymbolType::NonTerminal, index} => &non_terminal_status_list[index],
|
||||
Symbol { kind: SymbolType::Terminal, index} => &terminal_status_list[index],
|
||||
Symbol { kind: SymbolType::End, .. } => panic!("Unexpected end token"),
|
||||
};
|
||||
|
||||
if status.alias.is_some() {
|
||||
|
|
|
|||
|
|
@ -67,10 +67,13 @@ pub(super) fn extract_tokens(
|
|||
.expected_conflicts
|
||||
.into_iter()
|
||||
.map(|conflict| {
|
||||
conflict
|
||||
let mut result: Vec<_> = conflict
|
||||
.iter()
|
||||
.map(|symbol| symbol_replacer.replace_symbol(*symbol))
|
||||
.collect()
|
||||
.collect();
|
||||
result.sort_unstable();
|
||||
result.dedup();
|
||||
result
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,188 @@
|
|||
use crate::rules::{Symbol, AliasMap};
|
||||
use crate::grammars::{SyntaxGrammar, LexicalGrammar};
|
||||
use crate::tables::{ParseTable, LexTable};
|
||||
use crate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
|
||||
use crate::rules::{Alias, AliasMap, Symbol, SymbolType};
|
||||
use crate::tables::{LexTable, ParseTable, ParseTableEntry};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt::Write;
|
||||
|
||||
macro_rules! add_line {
|
||||
($this: tt, $($arg: tt)*) => {
|
||||
for _ in 0..$this.indent_level {
|
||||
write!(&mut $this.buffer, " ").unwrap();
|
||||
}
|
||||
$this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
|
||||
$this.buffer += "\n";
|
||||
}
|
||||
}
|
||||
|
||||
struct Generator {
|
||||
buffer: String,
|
||||
indent_level: usize,
|
||||
|
||||
language_name: String,
|
||||
parse_table: ParseTable,
|
||||
main_lex_table: LexTable,
|
||||
keyword_lex_table: LexTable,
|
||||
keyword_capture_token: Option<Symbol>,
|
||||
syntax_grammar: SyntaxGrammar,
|
||||
lexical_grammar: LexicalGrammar,
|
||||
simple_aliases: AliasMap,
|
||||
symbol_ids: HashMap<Symbol, String>,
|
||||
parse_table_entries: Vec<(usize, ParseTableEntry)>,
|
||||
next_parse_action_list_index: usize,
|
||||
unique_aliases: HashSet<Alias>,
|
||||
}
|
||||
|
||||
impl Generator {
|
||||
fn generate(mut self) -> String {
|
||||
self.add_includes();
|
||||
self.add_pragmas();
|
||||
self.add_stats();
|
||||
self.add_symbol_enum();
|
||||
self.add_symbol_names_list();
|
||||
self.buffer
|
||||
}
|
||||
|
||||
fn add_includes(&mut self) {
|
||||
add_line!(self, "#include <tree_sitter/parser.h>");
|
||||
add_line!(self, "");
|
||||
}
|
||||
|
||||
fn add_pragmas(&mut self) {
|
||||
add_line!(self, "#if defined(__GNUC__) || defined(__clang__)");
|
||||
add_line!(self, "#pragma GCC diagnostic push");
|
||||
add_line!(self, "#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"");
|
||||
add_line!(self, "#endif");
|
||||
add_line!(self, "");
|
||||
|
||||
// Compiling large lexer functions can be very slow, especially when
|
||||
// using Visual Studio on Windows. Disabling optimizations is not
|
||||
// ideal, but only a very small fraction of overall parse time is
|
||||
// spent lexing, so the performance impact of this is pretty small.
|
||||
if self.main_lex_table.states.len() > 500 {
|
||||
add_line!(self, "#ifdef _MSC_VER");
|
||||
add_line!(self, "#pragma optimize(\"\", off)");
|
||||
add_line!(self, "#endif");
|
||||
add_line!(self, "");
|
||||
}
|
||||
}
|
||||
|
||||
fn add_stats(&mut self) {
|
||||
let mut token_count = 0;
|
||||
|
||||
for symbol in &self.parse_table.symbols {
|
||||
if symbol.is_terminal() {
|
||||
token_count += 1;
|
||||
} else if symbol.is_external() {
|
||||
let external_token = &self.syntax_grammar.external_tokens[symbol.index];
|
||||
if external_token.corresponding_internal_token.is_none() {
|
||||
token_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for alias_sequence in &self.parse_table.alias_sequences {
|
||||
for entry in alias_sequence {
|
||||
if let Some(alias) = entry {
|
||||
self.unique_aliases.insert(alias.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut symbol_id_values = HashSet::new();
|
||||
for i in 0..self.parse_table.symbols.len() {
|
||||
self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_id_values);
|
||||
}
|
||||
|
||||
add_line!(self, "#define LANGUAGE_VERSION {}", 6);
|
||||
add_line!(self, "#define STATE_COUNT {}", self.parse_table.states.len());
|
||||
add_line!(self, "#define SYMBOL_COUNT {}", self.parse_table.symbols.len());
|
||||
add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len());
|
||||
add_line!(self, "#define TOKEN_COUNT {}", token_count);
|
||||
add_line!(self, "#define EXTERNAL_TOKEN_COUNT {}", self.syntax_grammar.external_tokens.len());
|
||||
// add_line!(self, "#define MAX_ALIAS_SEQUENCE_LENGTH {}\n", self.parse_table.max_alias_sequence_length);
|
||||
add_line!(self, "");
|
||||
}
|
||||
|
||||
fn add_symbol_enum(&mut self) {
|
||||
add_line!(self, "enum {{");
|
||||
self.indent();
|
||||
for i in 0..self.parse_table.symbols.len() {
|
||||
let symbol = self.parse_table.symbols[i];
|
||||
if symbol != Symbol::end() {
|
||||
add_line!(self, "{} = {}", self.symbol_ids[&symbol], i);
|
||||
}
|
||||
}
|
||||
self.dedent();
|
||||
add_line!(self, "}};");
|
||||
add_line!(self, "");
|
||||
}
|
||||
|
||||
fn add_symbol_names_list(&mut self) {
|
||||
add_line!(self, "static const char *ts_symbol_names[] = {{");
|
||||
self.indent();
|
||||
self.dedent();
|
||||
add_line!(self, "}};");
|
||||
add_line!(self, "");
|
||||
}
|
||||
|
||||
fn assign_symbol_id(&mut self, symbol: Symbol, used_ids: &mut HashSet<String>) {
|
||||
let mut id;
|
||||
if symbol == Symbol::end() {
|
||||
id = "ts_builtin_sym_end".to_string();
|
||||
} else {
|
||||
let (name, kind) = self.metadata_for_symbol(symbol);
|
||||
id = match kind {
|
||||
VariableType::Auxiliary => format!("aux_sym_{}", self.sanitize_name(name)),
|
||||
VariableType::Anonymous => format!("anon_sym_{}", self.sanitize_name(name)),
|
||||
VariableType::Hidden | VariableType::Named => {
|
||||
format!("sym_{}", self.sanitize_name(name))
|
||||
}
|
||||
};
|
||||
|
||||
let mut suffix_number = 1;
|
||||
let mut suffix = String::new();
|
||||
while used_ids.contains(&id) {
|
||||
id.drain(id.len() - suffix.len()..);
|
||||
suffix_number += 1;
|
||||
suffix = suffix_number.to_string();
|
||||
id += &suffix;
|
||||
}
|
||||
}
|
||||
|
||||
used_ids.insert(id.clone());
|
||||
self.symbol_ids.insert(symbol, id);
|
||||
}
|
||||
|
||||
fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) {
|
||||
match symbol.kind {
|
||||
SymbolType::End => ("end", VariableType::Auxiliary),
|
||||
SymbolType::NonTerminal => {
|
||||
let variable = &self.syntax_grammar.variables[symbol.index];
|
||||
(&variable.name, variable.kind)
|
||||
}
|
||||
SymbolType::Terminal => {
|
||||
let variable = &self.lexical_grammar.variables[symbol.index];
|
||||
(&variable.name, variable.kind)
|
||||
}
|
||||
SymbolType::External => {
|
||||
let token = &self.syntax_grammar.external_tokens[symbol.index];
|
||||
(&token.name, token.kind)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn sanitize_name(&self, name: &str) -> String {
|
||||
name.to_string()
|
||||
}
|
||||
|
||||
fn indent(&mut self) {
|
||||
self.indent_level += 1;
|
||||
}
|
||||
|
||||
fn dedent(&mut self) {
|
||||
self.indent_level -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn render_c_code(
|
||||
name: &str,
|
||||
|
|
@ -12,5 +194,21 @@ pub(crate) fn render_c_code(
|
|||
lexical_grammar: LexicalGrammar,
|
||||
simple_aliases: AliasMap,
|
||||
) -> String {
|
||||
unimplemented!();
|
||||
Generator {
|
||||
buffer: String::new(),
|
||||
indent_level: 0,
|
||||
language_name: name.to_string(),
|
||||
parse_table,
|
||||
main_lex_table,
|
||||
keyword_lex_table,
|
||||
keyword_capture_token,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
simple_aliases,
|
||||
symbol_ids: HashMap::new(),
|
||||
parse_table_entries: Vec::new(),
|
||||
next_parse_action_list_index: 0,
|
||||
unique_aliases: HashSet::new(),
|
||||
}
|
||||
.generate()
|
||||
}
|
||||
|
|
|
|||
23
src/rules.rs
23
src/rules.rs
|
|
@ -1,10 +1,11 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub(crate) enum SymbolType {
|
||||
External,
|
||||
Terminal,
|
||||
NonTerminal,
|
||||
End,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
|
|
@ -33,7 +34,7 @@ pub(crate) struct MetadataParams {
|
|||
pub alias: Option<Alias>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub(crate) struct Symbol {
|
||||
pub kind: SymbolType,
|
||||
pub index: usize,
|
||||
|
|
@ -56,6 +57,15 @@ pub(crate) enum Rule {
|
|||
}
|
||||
|
||||
impl Rule {
|
||||
pub fn alias(content: Rule, value: String, is_named: bool) -> Self {
|
||||
add_metadata(content, move |params| {
|
||||
params.alias = Some(Alias {
|
||||
is_named,
|
||||
value
|
||||
});
|
||||
})
|
||||
}
|
||||
|
||||
pub fn token(content: Rule) -> Self {
|
||||
add_metadata(content, |params| {
|
||||
params.is_token = true;
|
||||
|
|
@ -169,6 +179,13 @@ impl Symbol {
|
|||
index,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn end() -> Self {
|
||||
Symbol {
|
||||
kind: SymbolType::End,
|
||||
index: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Symbol> for Rule {
|
||||
|
|
@ -177,7 +194,7 @@ impl From<Symbol> for Rule {
|
|||
}
|
||||
}
|
||||
|
||||
fn add_metadata<T: Fn(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
|
||||
fn add_metadata<T: FnOnce(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
|
||||
match input {
|
||||
Rule::Metadata { rule, mut params } => {
|
||||
f(&mut params);
|
||||
|
|
|
|||
|
|
@ -6,20 +6,13 @@ pub(crate) type AliasSequenceId = usize;
|
|||
pub(crate) type ParseStateId = usize;
|
||||
pub(crate) type LexStateId = usize;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum ParseActionType {
|
||||
Error,
|
||||
Shift,
|
||||
Reduce,
|
||||
Accept,
|
||||
Recover,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum ParseAction {
|
||||
Accept,
|
||||
Error,
|
||||
Shift(ParseStateId),
|
||||
Shift {
|
||||
state: ParseStateId,
|
||||
is_repetition: bool,
|
||||
},
|
||||
ShiftExtra,
|
||||
Recover,
|
||||
Reduce {
|
||||
|
|
@ -28,50 +21,69 @@ pub(crate) enum ParseAction {
|
|||
precedence: i32,
|
||||
dynamic_precedence: i32,
|
||||
associativity: Option<Associativity>,
|
||||
alias_sequence_id: Option<AliasSequenceId>,
|
||||
is_repetition: bool,
|
||||
alias_sequence_id: AliasSequenceId,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseTableEntry {
|
||||
actions: Vec<ParseAction>,
|
||||
reusable: bool,
|
||||
pub actions: Vec<ParseAction>,
|
||||
pub reusable: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseState {
|
||||
terminal_entries: HashMap<Symbol, ParseTableEntry>,
|
||||
nonterminal_entries: HashMap<Symbol, ParseStateId>
|
||||
pub terminal_entries: HashMap<Symbol, ParseTableEntry>,
|
||||
pub nonterminal_entries: HashMap<Symbol, ParseStateId>
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseTable {
|
||||
states: Vec<ParseState>,
|
||||
alias_sequences: Vec<Vec<Alias>>,
|
||||
pub states: Vec<ParseState>,
|
||||
pub symbols: Vec<Symbol>,
|
||||
pub alias_sequences: Vec<Vec<Option<Alias>>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct AdvanceAction {
|
||||
state: LexStateId,
|
||||
precedence: Range<i32>,
|
||||
in_main_token: bool,
|
||||
pub state: LexStateId,
|
||||
pub precedence: Range<i32>,
|
||||
pub in_main_token: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct AcceptTokenAction {
|
||||
symbol: Symbol,
|
||||
precedence: i32,
|
||||
implicit_precedence: i32,
|
||||
pub symbol: Symbol,
|
||||
pub precedence: i32,
|
||||
pub implicit_precedence: i32,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct LexState {
|
||||
advance_actions: HashMap<Symbol, AdvanceAction>,
|
||||
accept_action: Option<AcceptTokenAction>,
|
||||
pub advance_actions: HashMap<Symbol, AdvanceAction>,
|
||||
pub accept_action: Option<AcceptTokenAction>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(crate) struct LexTable {
|
||||
states: Vec<LexState>,
|
||||
pub states: Vec<LexState>,
|
||||
}
|
||||
|
||||
impl ParseTableEntry {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
reusable: true,
|
||||
actions: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ParseAction {
|
||||
pub fn precedence(&self) -> i32 {
|
||||
if let ParseAction::Reduce { precedence, .. } = self {
|
||||
*precedence
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue