fix: disallow tokens that match the empty string

This commit is contained in:
Amaan Qureshi 2024-11-02 00:53:21 -04:00
parent 8c802da174
commit 310a9f0704
5 changed files with 140 additions and 11 deletions

View file

@ -1,4 +1,5 @@
use anyhow::{anyhow, Context, Result};
use indoc::indoc;
use regex_syntax::{
hir::{Class, Hir, HirKind},
ParserBuilder,
@ -56,6 +57,17 @@ pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGram
let mut variables = Vec::new();
for (i, variable) in grammar.variables.into_iter().enumerate() {
if variable.rule.is_empty() {
return Err(anyhow!(
indoc! {"
The rule `{}` matches the empty string.
Tree-sitter does not support syntactic rules that match the empty string
unless they are used only as the grammar's start rule.
"},
variable.name
));
}
let is_immediate_token = match &variable.rule {
Rule::Metadata { params, .. } => params.is_main_token,
_ => false,

View file

@ -1,4 +1,5 @@
use anyhow::{anyhow, Result};
use indoc::indoc;
use super::ExtractedSyntaxGrammar;
use crate::{
@ -197,11 +198,12 @@ pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxG
for production in &variable.productions {
if production.steps.is_empty() && symbol_is_used(&variables, symbol) {
return Err(anyhow!(
"The rule `{}` matches the empty string.
indoc! {"
The rule `{}` matches the empty string.
Tree-sitter does not support syntactic rules that match the empty string
unless they are used only as the grammar's start rule.
",
Tree-sitter does not support syntactic rules that match the empty string
unless they are used only as the grammar's start rule.
"},
variable.name
));
}

View file

@ -149,6 +149,16 @@ impl Rule {
pub const fn seq(rules: Vec<Self>) -> Self {
Self::Seq(rules)
}
pub fn is_empty(&self) -> bool {
match self {
Self::Blank | Self::Pattern(..) | Self::NamedSymbol(_) | Self::Symbol(_) => false,
Self::String(string) => string.is_empty(),
Self::Metadata { rule, .. } | Self::Repeat(rule) => rule.is_empty(),
Self::Choice(rules) => rules.iter().any(Self::is_empty),
Self::Seq(rules) => rules.iter().all(Self::is_empty),
}
}
}
impl Alias {