tree-sitter/cli/generate/src/rules.rs

545 lines
14 KiB
Rust
Raw Normal View History

use std::{collections::HashMap, fmt};
2018-12-05 12:50:12 -08:00
use serde::Serialize;
2024-04-09 13:35:08 -04:00
use smallbitvec::SmallBitVec;
use super::grammars::VariableType;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
2024-02-04 01:30:33 -05:00
pub enum SymbolType {
2018-12-05 12:50:12 -08:00
External,
End,
EndOfNonTerminalExtra,
2018-12-05 12:50:12 -08:00
Terminal,
NonTerminal,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
2024-02-04 01:30:33 -05:00
pub enum Associativity {
2018-12-05 12:50:12 -08:00
Left,
2018-12-18 16:05:36 -08:00
Right,
2018-12-05 12:50:12 -08:00
}
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
2024-02-04 01:30:33 -05:00
pub struct Alias {
2018-12-06 22:11:52 -08:00
pub value: String,
pub is_named: bool,
2018-12-05 12:50:12 -08:00
}
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Default, Serialize)]
pub enum Precedence {
2024-02-04 01:30:33 -05:00
#[default]
None,
Integer(i32),
Name(String),
}
2024-02-04 01:30:33 -05:00
pub type AliasMap = HashMap<Symbol, Alias>;
2018-12-05 12:50:12 -08:00
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Serialize)]
2024-02-04 01:30:33 -05:00
pub struct MetadataParams {
pub precedence: Precedence,
2018-12-06 22:11:52 -08:00
pub dynamic_precedence: i32,
pub associativity: Option<Associativity>,
pub is_token: bool,
pub is_main_token: bool,
pub alias: Option<Alias>,
2019-02-07 12:29:20 -08:00
pub field_name: Option<String>,
2018-12-05 12:50:12 -08:00
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
2024-02-04 01:30:33 -05:00
pub struct Symbol {
2018-12-06 22:11:52 -08:00
pub kind: SymbolType,
pub index: usize,
2018-12-05 12:50:12 -08:00
}
#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize)]
2024-02-04 01:30:33 -05:00
pub enum Rule {
2018-12-05 12:50:12 -08:00
Blank,
String(String),
Pattern(String, String),
2018-12-05 12:50:12 -08:00
NamedSymbol(String),
Symbol(Symbol),
2018-12-08 23:35:48 -08:00
Choice(Vec<Rule>),
2018-12-05 12:50:12 -08:00
Metadata {
params: MetadataParams,
2018-12-08 23:35:48 -08:00
rule: Box<Rule>,
2018-12-05 12:50:12 -08:00
},
2018-12-08 23:35:48 -08:00
Repeat(Box<Rule>),
Seq(Vec<Rule>),
Reserved {
rule: Box<Rule>,
context_name: String,
},
2018-12-05 12:50:12 -08:00
}
2019-08-29 15:25:45 -07:00
// Because tokens are represented as small (~400 max) unsigned integers,
// sets of tokens can be efficiently represented as bit vectors with each
2022-06-28 19:57:42 +08:00
// index corresponding to a token, and each value representing whether or not
2019-08-29 15:25:45 -07:00
// the token is present in the set.
#[derive(Default, Clone, PartialEq, Eq, Hash)]
2024-02-04 01:30:33 -05:00
pub struct TokenSet {
2019-08-29 15:25:45 -07:00
terminal_bits: SmallBitVec,
external_bits: SmallBitVec,
eof: bool,
end_of_nonterminal_extra: bool,
2019-08-29 15:25:45 -07:00
}
impl fmt::Debug for TokenSet {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_list().entries(self.iter()).finish()
}
}
impl PartialOrd for TokenSet {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for TokenSet {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.terminal_bits
.iter()
.cmp(other.terminal_bits.iter())
.then_with(|| self.external_bits.iter().cmp(other.external_bits.iter()))
.then_with(|| self.eof.cmp(&other.eof))
.then_with(|| {
self.end_of_nonterminal_extra
.cmp(&other.end_of_nonterminal_extra)
})
}
}
2018-12-05 12:50:12 -08:00
impl Rule {
2024-02-04 01:30:33 -05:00
pub fn field(name: String, content: Self) -> Self {
add_metadata(content, move |params| {
2019-02-07 12:29:20 -08:00
params.field_name = Some(name);
})
}
2024-02-04 01:30:33 -05:00
pub fn alias(content: Self, value: String, is_named: bool) -> Self {
add_metadata(content, move |params| {
2024-02-04 01:30:33 -05:00
params.alias = Some(Alias { value, is_named });
})
}
2024-02-04 01:30:33 -05:00
pub fn token(content: Self) -> Self {
2018-12-05 12:50:12 -08:00
add_metadata(content, |params| {
params.is_token = true;
})
}
2024-02-04 01:30:33 -05:00
pub fn immediate_token(content: Self) -> Self {
2018-12-05 12:50:12 -08:00
add_metadata(content, |params| {
params.is_token = true;
params.is_main_token = true;
})
}
2024-02-04 01:30:33 -05:00
pub fn prec(value: Precedence, content: Self) -> Self {
2018-12-05 12:50:12 -08:00
add_metadata(content, |params| {
params.precedence = value;
2018-12-05 12:50:12 -08:00
})
}
2024-02-04 01:30:33 -05:00
pub fn prec_left(value: Precedence, content: Self) -> Self {
2018-12-05 12:50:12 -08:00
add_metadata(content, |params| {
params.associativity = Some(Associativity::Left);
params.precedence = value;
2018-12-05 12:50:12 -08:00
})
}
2024-02-04 01:30:33 -05:00
pub fn prec_right(value: Precedence, content: Self) -> Self {
2018-12-05 12:50:12 -08:00
add_metadata(content, |params| {
params.associativity = Some(Associativity::Right);
params.precedence = value;
2018-12-05 12:50:12 -08:00
})
}
2024-02-04 01:30:33 -05:00
pub fn prec_dynamic(value: i32, content: Self) -> Self {
2018-12-11 12:14:34 -08:00
add_metadata(content, |params| {
params.dynamic_precedence = value;
})
}
2024-02-04 01:30:33 -05:00
pub fn repeat(rule: Self) -> Self {
Self::Repeat(Box::new(rule))
2018-12-05 12:50:12 -08:00
}
2024-02-04 01:30:33 -05:00
pub fn choice(rules: Vec<Self>) -> Self {
2018-12-05 12:50:12 -08:00
let mut elements = Vec::with_capacity(rules.len());
for rule in rules {
choice_helper(&mut elements, rule);
}
2024-02-04 01:30:33 -05:00
Self::Choice(elements)
2018-12-05 12:50:12 -08:00
}
2024-09-07 20:13:58 -04:00
pub const fn seq(rules: Vec<Self>) -> Self {
2024-02-04 01:30:33 -05:00
Self::Seq(rules)
2018-12-05 12:50:12 -08:00
}
pub fn is_empty(&self) -> bool {
match self {
Self::Blank | Self::Pattern(..) | Self::NamedSymbol(_) | Self::Symbol(_) => false,
Self::String(string) => string.is_empty(),
Self::Metadata { rule, .. } | Self::Repeat(rule) | Self::Reserved { rule, .. } => {
rule.is_empty()
}
Self::Choice(rules) => rules.iter().any(Self::is_empty),
Self::Seq(rules) => rules.iter().all(Self::is_empty),
}
}
2019-01-02 12:34:40 -08:00
}
2018-12-05 12:50:12 -08:00
impl Alias {
2024-02-06 23:18:27 +01:00
#[must_use]
2024-02-04 01:30:33 -05:00
pub const fn kind(&self) -> VariableType {
if self.is_named {
VariableType::Named
} else {
VariableType::Anonymous
}
}
}
impl Precedence {
2024-02-06 23:18:27 +01:00
#[must_use]
2024-02-04 01:30:33 -05:00
pub const fn is_none(&self) -> bool {
matches!(self, Self::None)
}
}
2019-01-02 12:34:40 -08:00
#[cfg(test)]
impl Rule {
2024-02-06 23:18:27 +01:00
#[must_use]
pub const fn terminal(index: usize) -> Self {
2024-02-04 01:30:33 -05:00
Self::Symbol(Symbol::terminal(index))
2018-12-05 12:50:12 -08:00
}
2024-02-06 23:18:27 +01:00
#[must_use]
pub const fn non_terminal(index: usize) -> Self {
2024-02-04 01:30:33 -05:00
Self::Symbol(Symbol::non_terminal(index))
2018-12-05 12:50:12 -08:00
}
2024-02-06 23:18:27 +01:00
#[must_use]
pub const fn external(index: usize) -> Self {
2024-02-04 01:30:33 -05:00
Self::Symbol(Symbol::external(index))
2018-12-05 12:50:12 -08:00
}
2024-02-06 23:18:27 +01:00
#[must_use]
2018-12-05 12:50:12 -08:00
pub fn named(name: &'static str) -> Self {
2024-02-04 01:30:33 -05:00
Self::NamedSymbol(name.to_string())
2018-12-05 12:50:12 -08:00
}
2024-02-06 23:18:27 +01:00
#[must_use]
2018-12-05 12:50:12 -08:00
pub fn string(value: &'static str) -> Self {
2024-02-04 01:30:33 -05:00
Self::String(value.to_string())
2018-12-05 12:50:12 -08:00
}
2018-12-06 22:11:52 -08:00
2024-02-06 23:18:27 +01:00
#[must_use]
pub fn pattern(value: &'static str, flags: &'static str) -> Self {
2024-02-04 01:30:33 -05:00
Self::Pattern(value.to_string(), flags.to_string())
2018-12-06 22:11:52 -08:00
}
2018-12-05 12:50:12 -08:00
}
impl Symbol {
2024-02-06 23:18:27 +01:00
#[must_use]
2018-12-18 16:05:36 -08:00
pub fn is_terminal(&self) -> bool {
self.kind == SymbolType::Terminal
}
2024-02-06 23:18:27 +01:00
#[must_use]
2018-12-06 22:11:52 -08:00
pub fn is_non_terminal(&self) -> bool {
2018-12-18 16:05:36 -08:00
self.kind == SymbolType::NonTerminal
2018-12-06 22:11:52 -08:00
}
2024-02-06 23:18:27 +01:00
#[must_use]
2018-12-06 22:11:52 -08:00
pub fn is_external(&self) -> bool {
2018-12-18 16:05:36 -08:00
self.kind == SymbolType::External
2018-12-06 22:11:52 -08:00
}
2024-02-06 23:18:27 +01:00
#[must_use]
2019-01-03 10:31:14 -08:00
pub fn is_eof(&self) -> bool {
self.kind == SymbolType::End
}
2024-02-06 23:18:27 +01:00
#[must_use]
2024-02-04 01:30:33 -05:00
pub const fn non_terminal(index: usize) -> Self {
Self {
2018-12-18 16:05:36 -08:00
kind: SymbolType::NonTerminal,
index,
}
2018-12-05 12:50:12 -08:00
}
2024-02-06 23:18:27 +01:00
#[must_use]
2024-02-04 01:30:33 -05:00
pub const fn terminal(index: usize) -> Self {
Self {
2018-12-18 16:05:36 -08:00
kind: SymbolType::Terminal,
index,
}
2018-12-05 12:50:12 -08:00
}
2024-02-06 23:18:27 +01:00
#[must_use]
2024-02-04 01:30:33 -05:00
pub const fn external(index: usize) -> Self {
Self {
2018-12-18 16:05:36 -08:00
kind: SymbolType::External,
index,
}
2018-12-05 12:50:12 -08:00
}
2024-02-06 23:18:27 +01:00
#[must_use]
2024-02-04 01:30:33 -05:00
pub const fn end() -> Self {
Self {
kind: SymbolType::End,
index: 0,
}
}
2024-02-06 23:18:27 +01:00
#[must_use]
2024-02-04 01:30:33 -05:00
pub const fn end_of_nonterminal_extra() -> Self {
Self {
kind: SymbolType::EndOfNonTerminalExtra,
index: 0,
}
}
2018-12-05 12:50:12 -08:00
}
impl From<Symbol> for Rule {
2024-02-06 23:18:27 +01:00
#[must_use]
2018-12-05 12:50:12 -08:00
fn from(symbol: Symbol) -> Self {
2024-02-04 01:30:33 -05:00
Self::Symbol(symbol)
2018-12-05 12:50:12 -08:00
}
}
2019-08-29 15:25:45 -07:00
impl TokenSet {
2024-09-27 15:42:38 -04:00
#[must_use]
2024-09-07 20:13:58 -04:00
pub const fn new() -> Self {
2019-08-29 15:25:45 -07:00
Self {
terminal_bits: SmallBitVec::new(),
external_bits: SmallBitVec::new(),
eof: false,
end_of_nonterminal_extra: false,
2019-08-29 15:25:45 -07:00
}
}
2024-02-04 01:30:33 -05:00
pub fn iter(&self) -> impl Iterator<Item = Symbol> + '_ {
2019-08-29 15:25:45 -07:00
self.terminal_bits
.iter()
.enumerate()
.filter_map(|(i, value)| {
if value {
Some(Symbol::terminal(i))
} else {
None
}
})
.chain(
self.external_bits
.iter()
.enumerate()
.filter_map(|(i, value)| {
if value {
Some(Symbol::external(i))
} else {
None
}
}),
)
.chain(if self.eof { Some(Symbol::end()) } else { None })
.chain(if self.end_of_nonterminal_extra {
Some(Symbol::end_of_nonterminal_extra())
} else {
None
})
2019-08-29 15:25:45 -07:00
}
2024-02-04 01:30:33 -05:00
pub fn terminals(&self) -> impl Iterator<Item = Symbol> + '_ {
2019-08-29 15:25:45 -07:00
self.terminal_bits
.iter()
.enumerate()
.filter_map(|(i, value)| {
if value {
Some(Symbol::terminal(i))
} else {
None
}
})
}
pub fn contains(&self, symbol: &Symbol) -> bool {
match symbol.kind {
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
SymbolType::Terminal => self.terminal_bits.get(symbol.index).unwrap_or(false),
SymbolType::External => self.external_bits.get(symbol.index).unwrap_or(false),
SymbolType::End => self.eof,
SymbolType::EndOfNonTerminalExtra => self.end_of_nonterminal_extra,
2019-08-29 15:25:45 -07:00
}
}
pub fn contains_terminal(&self, index: usize) -> bool {
self.terminal_bits.get(index).unwrap_or(false)
}
pub fn insert(&mut self, other: Symbol) {
let vec = match other.kind {
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
SymbolType::Terminal => &mut self.terminal_bits,
SymbolType::External => &mut self.external_bits,
SymbolType::End => {
self.eof = true;
return;
}
SymbolType::EndOfNonTerminalExtra => {
self.end_of_nonterminal_extra = true;
return;
}
2019-08-29 15:25:45 -07:00
};
if other.index >= vec.len() {
vec.resize(other.index + 1, false);
}
vec.set(other.index, true);
}
pub fn remove(&mut self, other: &Symbol) -> bool {
2019-08-29 15:25:45 -07:00
let vec = match other.kind {
SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"),
SymbolType::Terminal => &mut self.terminal_bits,
SymbolType::External => &mut self.external_bits,
SymbolType::End => {
return if self.eof {
self.eof = false;
true
} else {
false
}
2019-08-29 15:25:45 -07:00
}
SymbolType::EndOfNonTerminalExtra => {
return if self.end_of_nonterminal_extra {
self.end_of_nonterminal_extra = false;
true
} else {
false
};
}
2019-08-29 15:25:45 -07:00
};
2024-02-04 01:30:33 -05:00
if other.index < vec.len() && vec[other.index] {
vec.set(other.index, false);
while vec.last() == Some(false) {
vec.pop();
}
2024-02-04 01:30:33 -05:00
return true;
2019-08-29 15:25:45 -07:00
}
false
2019-08-29 15:25:45 -07:00
}
pub fn is_empty(&self) -> bool {
!self.eof
&& !self.end_of_nonterminal_extra
&& !self.terminal_bits.iter().any(|a| a)
&& !self.external_bits.iter().any(|a| a)
2019-08-29 15:25:45 -07:00
}
pub fn len(&self) -> usize {
self.eof as usize
+ self.end_of_nonterminal_extra as usize
+ self.terminal_bits.iter().filter(|b| *b).count()
+ self.external_bits.iter().filter(|b| *b).count()
}
2024-02-04 01:30:33 -05:00
pub fn insert_all_terminals(&mut self, other: &Self) -> bool {
2019-08-29 15:25:45 -07:00
let mut result = false;
if other.terminal_bits.len() > self.terminal_bits.len() {
self.terminal_bits.resize(other.terminal_bits.len(), false);
}
for (i, element) in other.terminal_bits.iter().enumerate() {
if element {
result |= !self.terminal_bits[i];
self.terminal_bits.set(i, element);
}
}
result
}
2024-02-04 01:30:33 -05:00
fn insert_all_externals(&mut self, other: &Self) -> bool {
2019-08-29 15:25:45 -07:00
let mut result = false;
if other.external_bits.len() > self.external_bits.len() {
self.external_bits.resize(other.external_bits.len(), false);
}
for (i, element) in other.external_bits.iter().enumerate() {
if element {
result |= !self.external_bits[i];
self.external_bits.set(i, element);
}
}
result
}
2024-02-04 01:30:33 -05:00
pub fn insert_all(&mut self, other: &Self) -> bool {
2019-08-29 15:25:45 -07:00
let mut result = false;
if other.eof {
result |= !self.eof;
self.eof = true;
}
if other.end_of_nonterminal_extra {
result |= !self.end_of_nonterminal_extra;
self.end_of_nonterminal_extra = true;
}
2019-08-29 15:25:45 -07:00
result |= self.insert_all_terminals(other);
result |= self.insert_all_externals(other);
result
}
}
impl FromIterator<Symbol> for TokenSet {
fn from_iter<T: IntoIterator<Item = Symbol>>(iter: T) -> Self {
let mut result = Self::new();
for symbol in iter {
result.insert(symbol);
}
result
}
}
fn add_metadata<T: FnOnce(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
2018-12-05 12:50:12 -08:00
match input {
Rule::Metadata { rule, mut params } if !params.is_token => {
2018-12-05 12:50:12 -08:00
f(&mut params);
Rule::Metadata { rule, params }
2018-12-18 16:05:36 -08:00
}
2018-12-05 12:50:12 -08:00
_ => {
let mut params = MetadataParams::default();
f(&mut params);
2018-12-18 16:05:36 -08:00
Rule::Metadata {
rule: Box::new(input),
params,
}
2018-12-05 12:50:12 -08:00
}
}
}
fn choice_helper(result: &mut Vec<Rule>, rule: Rule) {
match rule {
2018-12-08 23:35:48 -08:00
Rule::Choice(elements) => {
2018-12-05 12:50:12 -08:00
for element in elements {
choice_helper(result, element);
}
2018-12-18 16:05:36 -08:00
}
2018-12-05 12:50:12 -08:00
_ => {
if !result.contains(&rule) {
result.push(rule);
}
}
}
}
impl fmt::Display for Precedence {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
2024-02-04 01:30:33 -05:00
Self::Integer(i) => write!(f, "{i}"),
Self::Name(s) => write!(f, "'{s}'"),
Self::None => write!(f, "none"),
}
}
}