tree-sitter/cli/src/generate/rules.rs
2021-02-24 16:02:56 -08:00

468 lines
12 KiB
Rust

use super::grammars::VariableType;
use smallbitvec::SmallBitVec;
use std::iter::FromIterator;
use std::{collections::HashMap, fmt};
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) enum SymbolType {
External,
End,
EndOfNonTerminalExtra,
Terminal,
NonTerminal,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) enum Associativity {
Left,
Right,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) struct Alias {
pub value: String,
pub is_named: bool,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum Precedence {
None,
Integer(i32),
Name(String),
}
pub(crate) type AliasMap = HashMap<Symbol, Alias>;
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
pub(crate) struct MetadataParams {
pub precedence: Precedence,
pub dynamic_precedence: i32,
pub associativity: Option<Associativity>,
pub is_token: bool,
pub is_string: bool,
pub is_active: bool,
pub is_main_token: bool,
pub alias: Option<Alias>,
pub field_name: Option<String>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) struct Symbol {
pub kind: SymbolType,
pub index: usize,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub(crate) enum Rule {
Blank,
String(String),
Pattern(String),
NamedSymbol(String),
Symbol(Symbol),
Choice(Vec<Rule>),
Metadata {
params: MetadataParams,
rule: Box<Rule>,
},
Repeat(Box<Rule>),
Seq(Vec<Rule>),
}
// Because tokens are represented as small (~400 max) unsigned integers,
// sets of tokens can be efficiently represented as bit vectors with each
// index correspoding to a token, and each value representing whether or not
// the token is present in the set.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) struct TokenSet {
terminal_bits: SmallBitVec,
external_bits: SmallBitVec,
eof: bool,
end_of_nonterminal_extra: bool,
}
impl Rule {
pub fn field(name: String, content: Rule) -> Self {
add_metadata(content, move |params| {
params.field_name = Some(name);
})
}
pub fn alias(content: Rule, value: String, is_named: bool) -> Self {
add_metadata(content, move |params| {
params.alias = Some(Alias { is_named, value });
})
}
pub fn token(content: Rule) -> Self {
add_metadata(content, |params| {
params.is_token = true;
})
}
pub fn immediate_token(content: Rule) -> Self {
add_metadata(content, |params| {
params.is_token = true;
params.is_main_token = true;
})
}
pub fn prec(value: Precedence, content: Rule) -> Self {
add_metadata(content, |params| {
params.precedence = value;
})
}
pub fn prec_left(value: Precedence, content: Rule) -> Self {
add_metadata(content, |params| {
params.associativity = Some(Associativity::Left);
params.precedence = value;
})
}
pub fn prec_right(value: Precedence, content: Rule) -> Self {
add_metadata(content, |params| {
params.associativity = Some(Associativity::Right);
params.precedence = value;
})
}
pub fn prec_dynamic(value: i32, content: Rule) -> Self {
add_metadata(content, |params| {
params.dynamic_precedence = value;
})
}
pub fn repeat(rule: Rule) -> Self {
Rule::Repeat(Box::new(rule))
}
pub fn choice(rules: Vec<Rule>) -> Self {
let mut elements = Vec::with_capacity(rules.len());
for rule in rules {
choice_helper(&mut elements, rule);
}
Rule::Choice(elements)
}
pub fn seq(rules: Vec<Rule>) -> Self {
Rule::Seq(rules)
}
}
impl Alias {
pub fn kind(&self) -> VariableType {
if self.is_named {
VariableType::Named
} else {
VariableType::Anonymous
}
}
}
impl Precedence {
pub fn is_none(&self) -> bool {
matches!(self, Precedence::None)
}
}
#[cfg(test)]
impl Rule {
pub fn terminal(index: usize) -> Self {
Rule::Symbol(Symbol::terminal(index))
}
pub fn non_terminal(index: usize) -> Self {
Rule::Symbol(Symbol::non_terminal(index))
}
pub fn external(index: usize) -> Self {
Rule::Symbol(Symbol::external(index))
}
pub fn named(name: &'static str) -> Self {
Rule::NamedSymbol(name.to_string())
}
pub fn string(value: &'static str) -> Self {
Rule::String(value.to_string())
}
pub fn pattern(value: &'static str) -> Self {
Rule::Pattern(value.to_string())
}
}
impl Symbol {
pub fn is_terminal(&self) -> bool {
self.kind == SymbolType::Terminal
}
pub fn is_non_terminal(&self) -> bool {
self.kind == SymbolType::NonTerminal
}
pub fn is_external(&self) -> bool {
self.kind == SymbolType::External
}
pub fn is_eof(&self) -> bool {
self.kind == SymbolType::End
}
pub fn non_terminal(index: usize) -> Self {
Symbol {
kind: SymbolType::NonTerminal,
index,
}
}
pub fn terminal(index: usize) -> Self {
Symbol {
kind: SymbolType::Terminal,
index,
}
}
pub fn external(index: usize) -> Self {
Symbol {
kind: SymbolType::External,
index,
}
}
pub fn end() -> Self {
Symbol {
kind: SymbolType::End,
index: 0,
}
}
pub fn end_of_nonterminal_extra() -> Self {
Symbol {
kind: SymbolType::EndOfNonTerminalExtra,
index: 0,
}
}
}
impl From<Symbol> for Rule {
fn from(symbol: Symbol) -> Self {
Rule::Symbol(symbol)
}
}
impl TokenSet {
pub fn new() -> Self {
Self {
terminal_bits: SmallBitVec::new(),
external_bits: SmallBitVec::new(),
eof: false,
end_of_nonterminal_extra: false,
}
}
pub fn iter<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
self.terminal_bits
.iter()
.enumerate()
.filter_map(|(i, value)| {
if value {
Some(Symbol::terminal(i))
} else {
None
}
})
.chain(
self.external_bits
.iter()
.enumerate()
.filter_map(|(i, value)| {
if value {
Some(Symbol::external(i))
} else {
None
}
}),
)
.chain(if self.eof { Some(Symbol::end()) } else { None })
.chain(if self.end_of_nonterminal_extra {
Some(Symbol::end_of_nonterminal_extra())
} else {
None
})
}
pub fn terminals<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
self.terminal_bits
.iter()
.enumerate()
.filter_map(|(i, value)| {
if value {
Some(Symbol::terminal(i))
} else {
None
}
})
}
pub fn contains(&self, symbol: &Symbol) -> bool {
match symbol.kind {
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
SymbolType::Terminal => self.terminal_bits.get(symbol.index).unwrap_or(false),
SymbolType::External => self.external_bits.get(symbol.index).unwrap_or(false),
SymbolType::End => self.eof,
SymbolType::EndOfNonTerminalExtra => self.end_of_nonterminal_extra,
}
}
pub fn contains_terminal(&self, index: usize) -> bool {
self.terminal_bits.get(index).unwrap_or(false)
}
pub fn insert(&mut self, other: Symbol) {
let vec = match other.kind {
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
SymbolType::Terminal => &mut self.terminal_bits,
SymbolType::External => &mut self.external_bits,
SymbolType::End => {
self.eof = true;
return;
}
SymbolType::EndOfNonTerminalExtra => {
self.end_of_nonterminal_extra = true;
return;
}
};
if other.index >= vec.len() {
vec.resize(other.index + 1, false);
}
vec.set(other.index, true);
}
pub fn remove(&mut self, other: &Symbol) {
let vec = match other.kind {
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
SymbolType::Terminal => &mut self.terminal_bits,
SymbolType::External => &mut self.external_bits,
SymbolType::End => {
self.eof = false;
return;
}
SymbolType::EndOfNonTerminalExtra => {
self.end_of_nonterminal_extra = false;
return;
}
};
if other.index < vec.len() {
vec.set(other.index, false);
}
}
pub fn is_empty(&self) -> bool {
!self.eof
&& !self.end_of_nonterminal_extra
&& !self.terminal_bits.iter().any(|a| a)
&& !self.external_bits.iter().any(|a| a)
}
pub fn insert_all_terminals(&mut self, other: &TokenSet) -> bool {
let mut result = false;
if other.terminal_bits.len() > self.terminal_bits.len() {
self.terminal_bits.resize(other.terminal_bits.len(), false);
}
for (i, element) in other.terminal_bits.iter().enumerate() {
if element {
result |= !self.terminal_bits[i];
self.terminal_bits.set(i, element);
}
}
result
}
fn insert_all_externals(&mut self, other: &TokenSet) -> bool {
let mut result = false;
if other.external_bits.len() > self.external_bits.len() {
self.external_bits.resize(other.external_bits.len(), false);
}
for (i, element) in other.external_bits.iter().enumerate() {
if element {
result |= !self.external_bits[i];
self.external_bits.set(i, element);
}
}
result
}
pub fn insert_all(&mut self, other: &TokenSet) -> bool {
let mut result = false;
if other.eof {
result |= !self.eof;
self.eof = true;
}
if other.end_of_nonterminal_extra {
result |= !self.end_of_nonterminal_extra;
self.end_of_nonterminal_extra = true;
}
result |= self.insert_all_terminals(other);
result |= self.insert_all_externals(other);
result
}
}
impl FromIterator<Symbol> for TokenSet {
fn from_iter<T: IntoIterator<Item = Symbol>>(iter: T) -> Self {
let mut result = Self::new();
for symbol in iter {
result.insert(symbol);
}
result
}
}
fn add_metadata<T: FnOnce(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
match input {
Rule::Metadata { rule, mut params } if !params.is_token => {
f(&mut params);
Rule::Metadata { rule, params }
}
_ => {
let mut params = MetadataParams::default();
f(&mut params);
Rule::Metadata {
rule: Box::new(input),
params,
}
}
}
}
fn choice_helper(result: &mut Vec<Rule>, rule: Rule) {
match rule {
Rule::Choice(elements) => {
for element in elements {
choice_helper(result, element);
}
}
_ => {
if !result.contains(&rule) {
result.push(rule);
}
}
}
}
impl fmt::Display for Precedence {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Precedence::Integer(i) => write!(f, "{}", i),
Precedence::Name(s) => write!(f, "'{}'", s),
Precedence::None => write!(f, "none"),
}
}
}
impl Default for Precedence {
fn default() -> Self {
Precedence::None
}
}