Implement variable inlining

This commit is contained in:
Max Brunsfeld 2018-12-18 16:05:36 -08:00
parent 5fa586f7c9
commit 889f232b4c
9 changed files with 567 additions and 31 deletions

15
Cargo.lock generated
View file

@ -67,11 +67,6 @@ name = "bitflags"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "bitvec"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "blake2-rfc"
version = "0.2.18"
@ -461,16 +456,17 @@ dependencies = [
name = "rust-tree-sitter-cli"
version = "0.1.0"
dependencies = [
"bitvec 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
"dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
"ignore 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
"rusqlite 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)",
"smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"tree-sitter 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -548,6 +544,11 @@ dependencies = [
"serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "smallbitvec"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "smallvec"
version = "0.6.7"
@ -729,7 +730,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "89a47830402e9981c5c41223151efcced65a0510c13097c769cede7efb34782a"
"checksum backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)" = "c66d56ac8dabd07f6aacdaf633f4b8262f5b3601a810a0dcddffd5c22c69daa0"
"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
"checksum bitvec 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e37e2176261200377c7cde4c6de020394174df556c356f965e4bc239f5ce1c5a"
"checksum blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400"
"checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16"
"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
@ -787,6 +787,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "15c141fc7027dd265a47c090bf864cf62b42c4d228bbcf4e51a0c9e2b0d3f7ef"
"checksum serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "225de307c6302bec3898c51ca302fc94a7a1697ef0845fcee6448f33c032249c"
"checksum serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)" = "c37ccd6be3ed1fdf419ee848f7c758eb31b054d7cd3ae3600e3bae0adf569811"
"checksum smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1764fe2b30ee783bfe3b9b37b2649d8d590b3148bb12e0079715d4d5c673562e"
"checksum smallvec 0.6.7 (registry+https://github.com/rust-lang/crates.io-index)" = "b73ea3738b47563803ef814925e69be00799a8c07420be8b996f8e98fb2336db"
"checksum stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dba1a27d3efae4351c8051072d619e3ade2820635c3958d826bfea39d59b54c8"
"checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550"

View file

@ -5,7 +5,8 @@ authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
edition = "2018"
[dependencies]
bitvec = "0.8"
lazy_static = "1.2.0"
smallbitvec = "2.3.0"
clap = "2.32"
dirs = "1.0.2"
ignore = "0.4.4"

View file

@ -0,0 +1,318 @@
use super::item::ParseItem;
use crate::grammars::{Production, SyntaxGrammar};
use std::collections::HashMap;
pub(crate) struct InlinedProductionMap {
pub inlined_productions: Vec<Production>,
item_map: HashMap<ParseItem, Vec<u32>>,
}
impl InlinedProductionMap {
pub fn new(grammar: &SyntaxGrammar) -> Self {
let mut result = Self {
inlined_productions: Vec::new(),
item_map: HashMap::new(),
};
let mut items_to_process = Vec::new();
for (variable_index, variable) in grammar.variables.iter().enumerate() {
for production_index in 0..variable.productions.len() {
items_to_process.push(ParseItem::Normal {
variable_index: variable_index as u32,
production_index: production_index as u32,
step_index: 0,
});
while !items_to_process.is_empty() {
let mut i = 0;
while i < items_to_process.len() {
let item = &items_to_process[i];
if let Some(step) = item.step(grammar, &result) {
if grammar.variables_to_inline.contains(&step.symbol) {
let inlined_items = result
.inline(*item, grammar)
.into_iter()
.map(|production_index| ParseItem::Inlined {
variable_index: item.variable_index(),
production_index: *production_index,
step_index: item.step_index() as u32,
})
.collect::<Vec<_>>();
items_to_process.splice(i..i + 1, inlined_items);
} else {
items_to_process[i] = item.successor();
i += 1;
}
} else {
items_to_process.remove(i);
}
}
}
}
}
result
}
pub fn inlined_items<'a>(
&'a self,
item: ParseItem,
) -> Option<impl Iterator<Item = ParseItem> + 'a> {
self.item_map.get(&item).map(|production_indices| {
production_indices
.iter()
.cloned()
.map(move |production_index| ParseItem::Inlined {
variable_index: item.variable_index(),
production_index,
step_index: item.step_index() as u32,
})
})
}
fn inline(&mut self, item: ParseItem, grammar: &SyntaxGrammar) -> &Vec<u32> {
let step_index = item.step_index();
let mut productions_to_add = grammar.variables
[item.step(grammar, self).unwrap().symbol.index]
.productions
.clone();
let mut i = 0;
while i < productions_to_add.len() {
if let Some(first_symbol) = productions_to_add[i].first_symbol() {
if grammar.variables_to_inline.contains(&first_symbol) {
// Remove the production from the vector, replacing it with a placeholder.
let production = productions_to_add
.splice(i..i + 1, [Production::default()].iter().cloned())
.next()
.unwrap();
// Replace the placeholder with the inlined productions.
productions_to_add.splice(
i..i + 1,
grammar.variables[first_symbol.index]
.productions
.iter()
.map(|p| {
let mut p = p.clone();
p.steps.extend(production.steps[1..].iter().cloned());
p
}),
);
continue;
}
}
i += 1;
}
let result = productions_to_add
.into_iter()
.map(|production_to_add| {
let mut inlined_production = item.production(grammar, &self).clone();
inlined_production.steps.splice(
step_index..step_index + 1,
production_to_add.steps.iter().cloned(),
);
self.inlined_productions
.iter()
.position(|p| *p == inlined_production)
.unwrap_or({
self.inlined_productions.push(inlined_production);
self.inlined_productions.len() - 1
}) as u32
})
.collect();
self.item_map.entry(item).or_insert(result)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::grammars::{ProductionStep, SyntaxVariable, VariableType};
use crate::rules::Symbol;
#[test]
fn test_basic_inlining() {
let grammar = SyntaxGrammar {
expected_conflicts: Vec::new(),
extra_tokens: Vec::new(),
external_tokens: Vec::new(),
word_token: None,
variables_to_inline: vec![Symbol::non_terminal(1)],
variables: vec![
SyntaxVariable {
name: "var0".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(10)),
ProductionStep::new(Symbol::non_terminal(1)), // inlined
ProductionStep::new(Symbol::terminal(11)),
],
}],
},
SyntaxVariable {
name: "var1".to_string(),
kind: VariableType::Named,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(12)),
ProductionStep::new(Symbol::terminal(13)),
],
},
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(14))],
},
],
},
],
};
let inline_map = InlinedProductionMap::new(&grammar);
// Nothing to inline at step 0.
assert_eq!(
display_items(
inline_map.inlined_items(ParseItem::Normal {
variable_index: 0,
production_index: 0,
step_index: 0
}),
&grammar,
&inline_map
),
None
);
// Inlining variable 1 yields two productions.
assert_eq!(
display_items(
inline_map.inlined_items(ParseItem::Normal {
variable_index: 0,
production_index: 0,
step_index: 1
}),
&grammar,
&inline_map
),
Some(vec![
"terminal-10 • terminal-12 terminal-13 terminal-11".to_string(),
"terminal-10 • terminal-14 terminal-11".to_string(),
])
);
}
#[test]
fn test_nested_inlining() {
let grammar = SyntaxGrammar {
variables: vec![
SyntaxVariable {
name: "var0".to_string(),
kind: VariableType::Named,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(10)),
ProductionStep::new(Symbol::non_terminal(1)), // inlined
ProductionStep::new(Symbol::terminal(11)),
ProductionStep::new(Symbol::non_terminal(2)), // inlined
ProductionStep::new(Symbol::terminal(12)),
],
},
],
},
SyntaxVariable {
name: "var1".to_string(),
kind: VariableType::Named,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(13))],
},
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::non_terminal(3)), // inlined
ProductionStep::new(Symbol::terminal(14)),
],
},
],
},
SyntaxVariable {
name: "var2".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(15))],
}],
},
SyntaxVariable {
name: "var3".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(16))],
}],
},
],
variables_to_inline: vec![
Symbol::non_terminal(1),
Symbol::non_terminal(2),
Symbol::non_terminal(3),
],
expected_conflicts: Vec::new(),
extra_tokens: Vec::new(),
external_tokens: Vec::new(),
word_token: None,
};
let inline_map = InlinedProductionMap::new(&grammar);
let items = inline_map.inlined_items(ParseItem::Normal {
variable_index: 0,
production_index: 0,
step_index: 1
}).unwrap().collect::<Vec<_>>();
assert_eq!(
display_items(Some(items.iter().cloned()), &grammar, &inline_map),
Some(vec![
"terminal-10 • terminal-13 terminal-11 non-terminal-2 terminal-12".to_string(),
"terminal-10 • terminal-16 terminal-14 terminal-11 non-terminal-2 terminal-12".to_string()
])
);
let item = items[0].successor().successor();
assert_eq!(
display_items(Some([item].iter().cloned()), &grammar, &inline_map),
Some(vec![
"terminal-10 terminal-13 terminal-11 • non-terminal-2 terminal-12".to_string(),
])
);
assert_eq!(
display_items(inline_map.inlined_items(item), &grammar, &inline_map),
Some(vec![
"terminal-10 terminal-13 terminal-11 • terminal-15 terminal-12".to_string(),
])
);
}
fn display_items(
items: Option<impl Iterator<Item = ParseItem>>,
grammar: &SyntaxGrammar,
inline_map: &InlinedProductionMap,
) -> Option<Vec<String>> {
items.map(|items| {
items
.map(|item| format!("{}", item.with(grammar, inline_map)))
.collect()
})
}
}

View file

@ -1,22 +1,209 @@
use crate::grammars::Production;
use super::inline_variables::InlinedProductionMap;
use crate::grammars::{Production, ProductionStep, SyntaxGrammar};
use crate::rules::{Symbol, SymbolType};
use smallbitvec::SmallBitVec;
use std::collections::HashMap;
use bitvec::BitVec;
use std::hash::{Hash, Hasher};
use std::fmt;
#[derive(Debug, PartialEq, Eq)]
pub(super) struct LookaheadSet {
terminal_bits: BitVec,
external_bits: BitVec,
lazy_static! {
static ref START_PRODUCTION: Production = Production {
dynamic_precedence: 0,
steps: vec![ProductionStep {
symbol: Symbol {
index: 0,
kind: SymbolType::NonTerminal,
},
precedence: 0,
associativity: None,
alias: None,
}],
};
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) struct LookaheadSet {
terminal_bits: SmallBitVec,
external_bits: SmallBitVec,
eof: bool,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub(super) struct ParseItem {
variable_index: u32,
production_index: u32,
step_index: u32,
pub(crate) enum ParseItem {
Start {
step_index: u32,
},
Normal {
variable_index: u32,
production_index: u32,
step_index: u32,
},
Inlined {
variable_index: u32,
production_index: u32,
step_index: u32,
},
}
#[derive(Debug, PartialEq, Eq)]
pub(super) struct ParseItemSet {
entries: HashMap<ParseItem, LookaheadSet>
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct ParseItemSet {
pub entries: HashMap<ParseItem, LookaheadSet>,
}
impl LookaheadSet {
pub fn new() -> Self {
Self {
terminal_bits: SmallBitVec::new(),
external_bits: SmallBitVec::new(),
eof: false,
}
}
pub fn insert(&mut self, other: Symbol) {
match other.kind {
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a LookaheadSet"),
SymbolType::Terminal => self.terminal_bits.set(other.index, true),
SymbolType::External => self.external_bits.set(other.index, true),
}
}
pub fn insert_all(&mut self, other: &LookaheadSet) -> bool {
let mut result = false;
if other.terminal_bits.len() > self.terminal_bits.len() {
self.terminal_bits.resize(other.terminal_bits.len(), false);
}
if other.external_bits.len() > self.external_bits.len() {
self.external_bits.resize(other.external_bits.len(), false);
}
for (i, element) in other.terminal_bits.iter().enumerate() {
if element {
result |= !self.terminal_bits[i];
self.terminal_bits.set(i, element);
}
}
for (i, element) in other.external_bits.iter().enumerate() {
if element {
result |= !self.external_bits[i];
self.external_bits.set(i, element);
}
}
if other.eof {
result |= !self.eof;
self.eof = true;
}
result
}
}
impl ParseItem {
pub fn is_kernel(&self) -> bool {
match self {
ParseItem::Start { .. } => true,
ParseItem::Normal { step_index, .. } | ParseItem::Inlined { step_index, .. } => {
*step_index > 0
}
}
}
pub fn production<'a>(
&'a self,
grammar: &'a SyntaxGrammar,
inlined_productions: &'a InlinedProductionMap,
) -> &'a Production {
match self {
ParseItem::Start { .. } => &START_PRODUCTION,
ParseItem::Normal {
variable_index,
production_index,
..
} => {
&grammar.variables[*variable_index as usize].productions[*production_index as usize]
}
ParseItem::Inlined {
production_index,
..
} => &inlined_productions.inlined_productions[*production_index as usize],
}
}
pub fn step<'a>(
&'a self,
grammar: &'a SyntaxGrammar,
inlined_productions: &'a InlinedProductionMap,
) -> Option<&'a ProductionStep> {
self.production(grammar, inlined_productions).steps.get(self.step_index())
}
pub fn variable_index(&self) -> u32 {
match self {
ParseItem::Start { .. } => panic!("Start item doesn't have a variable index"),
ParseItem::Normal { variable_index, .. }
| ParseItem::Inlined { variable_index, .. } => *variable_index,
}
}
pub fn step_index(&self) -> usize {
match self {
ParseItem::Start { step_index }
| ParseItem::Normal { step_index, .. }
| ParseItem::Inlined { step_index, .. } => *step_index as usize,
}
}
fn step_index_mut(&mut self) -> &mut u32 {
match self {
ParseItem::Start { step_index }
| ParseItem::Normal { step_index, .. }
| ParseItem::Inlined { step_index, .. } => step_index,
}
}
pub fn with<'a>(&'a self, grammar: &'a SyntaxGrammar, inlines: &'a InlinedProductionMap) -> ParseItemDisplay<'a> {
ParseItemDisplay(self, grammar, inlines)
}
pub fn successor(&self) -> ParseItem {
let mut result = self.clone();
*result.step_index_mut() += 1;
result
}
}
pub struct ParseItemDisplay<'a>(&'a ParseItem, &'a SyntaxGrammar, &'a InlinedProductionMap);
impl<'a> fmt::Display for ParseItemDisplay<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
let step_index = self.0.step_index();
let production = self.0.production(self.1, self.2);
for (i, step) in production.steps.iter().enumerate() {
if i > 0 {
write!(f, " ")?;
}
if i == step_index {
write!(f, "")?;
}
let name = if step.symbol.is_terminal() {
"terminal"
} else if step.symbol.is_external() {
"external"
} else {
"non-terminal"
};
write!(f, "{}-{}", name, step.symbol.index)?;
}
Ok(())
}
}
impl Hash for ParseItemSet {
fn hash<H: Hasher>(&self, hasher: &mut H) {
hasher.write_usize(self.entries.len());
for (item, lookaheads) in self.entries.iter() {
item.hash(hasher);
lookaheads.hash(hasher);
}
}
}

View file

@ -1,4 +1,5 @@
mod item;
mod inline_variables;
use std::collections::{HashMap, VecDeque};
use crate::grammars::{SyntaxGrammar, LexicalGrammar};

View file

@ -108,6 +108,18 @@ impl ProductionStep {
}
}
impl Production {
pub fn first_symbol(&self) -> Option<Symbol> {
self.steps.first().map(|s| s.symbol.clone())
}
}
impl Default for Production {
fn default() -> Self {
Production { dynamic_precedence: 0, steps: Vec::new() }
}
}
impl Variable {
pub fn named(name: &str, rule: Rule) -> Self {
Self { name: name.to_string(), kind: VariableType::Named, rule }

View file

@ -2,6 +2,7 @@ use clap::{App, Arg, SubCommand};
#[macro_use] extern crate serde_derive;
#[macro_use] extern crate serde_json;
#[macro_use] extern crate lazy_static;
mod build_tables;
mod error;

View file

@ -2,7 +2,6 @@ use serde_json::{Map, Value};
use crate::error::Result;
use crate::grammars::{InputGrammar, Variable, VariableType};
use crate::rules::Rule;
use std::collections::HashMap;
#[derive(Deserialize)]
#[serde(tag = "type")]

View file

@ -10,7 +10,7 @@ pub(crate) enum SymbolType {
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub(crate) enum Associativity {
Left,
Right
Right,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
@ -137,24 +137,37 @@ impl Rule {
}
impl Symbol {
pub fn is_terminal(&self) -> bool {
self.kind == SymbolType::Terminal
}
pub fn is_non_terminal(&self) -> bool {
return self.kind == SymbolType::NonTerminal
self.kind == SymbolType::NonTerminal
}
pub fn is_external(&self) -> bool {
return self.kind == SymbolType::External
self.kind == SymbolType::External
}
pub fn non_terminal(index: usize) -> Self {
Symbol { kind: SymbolType::NonTerminal, index }
Symbol {
kind: SymbolType::NonTerminal,
index,
}
}
pub fn terminal(index: usize) -> Self {
Symbol { kind: SymbolType::Terminal, index }
Symbol {
kind: SymbolType::Terminal,
index,
}
}
pub fn external(index: usize) -> Self {
Symbol { kind: SymbolType::External, index }
Symbol {
kind: SymbolType::External,
index,
}
}
}
@ -169,11 +182,14 @@ fn add_metadata<T: Fn(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
Rule::Metadata { rule, mut params } => {
f(&mut params);
Rule::Metadata { rule, params }
},
}
_ => {
let mut params = MetadataParams::default();
f(&mut params);
Rule::Metadata { rule: Box::new(input), params }
Rule::Metadata {
rule: Box::new(input),
params,
}
}
}
}
@ -184,7 +200,7 @@ fn choice_helper(result: &mut Vec<Rule>, rule: Rule) {
for element in elements {
choice_helper(result, element);
}
},
}
_ => {
if !result.contains(&rule) {
result.push(rule);