fix: disallow tokens that match the empty string
This commit is contained in:
parent
8c802da174
commit
310a9f0704
5 changed files with 140 additions and 11 deletions
|
|
@ -1,4 +1,5 @@
|
|||
use anyhow::{anyhow, Context, Result};
|
||||
use indoc::indoc;
|
||||
use regex_syntax::{
|
||||
hir::{Class, Hir, HirKind},
|
||||
ParserBuilder,
|
||||
|
|
@ -56,6 +57,17 @@ pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGram
|
|||
|
||||
let mut variables = Vec::new();
|
||||
for (i, variable) in grammar.variables.into_iter().enumerate() {
|
||||
if variable.rule.is_empty() {
|
||||
return Err(anyhow!(
|
||||
indoc! {"
|
||||
The rule `{}` matches the empty string.
|
||||
Tree-sitter does not support syntactic rules that match the empty string
|
||||
unless they are used only as the grammar's start rule.
|
||||
"},
|
||||
variable.name
|
||||
));
|
||||
}
|
||||
|
||||
let is_immediate_token = match &variable.rule {
|
||||
Rule::Metadata { params, .. } => params.is_main_token,
|
||||
_ => false,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
use anyhow::{anyhow, Result};
|
||||
use indoc::indoc;
|
||||
|
||||
use super::ExtractedSyntaxGrammar;
|
||||
use crate::{
|
||||
|
|
@ -197,11 +198,12 @@ pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxG
|
|||
for production in &variable.productions {
|
||||
if production.steps.is_empty() && symbol_is_used(&variables, symbol) {
|
||||
return Err(anyhow!(
|
||||
"The rule `{}` matches the empty string.
|
||||
indoc! {"
|
||||
The rule `{}` matches the empty string.
|
||||
|
||||
Tree-sitter does not support syntactic rules that match the empty string
|
||||
unless they are used only as the grammar's start rule.
|
||||
",
|
||||
Tree-sitter does not support syntactic rules that match the empty string
|
||||
unless they are used only as the grammar's start rule.
|
||||
"},
|
||||
variable.name
|
||||
));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -149,6 +149,16 @@ impl Rule {
|
|||
pub const fn seq(rules: Vec<Self>) -> Self {
|
||||
Self::Seq(rules)
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
match self {
|
||||
Self::Blank | Self::Pattern(..) | Self::NamedSymbol(_) | Self::Symbol(_) => false,
|
||||
Self::String(string) => string.is_empty(),
|
||||
Self::Metadata { rule, .. } | Self::Repeat(rule) => rule.is_empty(),
|
||||
Self::Choice(rules) => rules.iter().any(Self::is_empty),
|
||||
Self::Seq(rules) => rules.iter().all(Self::is_empty),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Alias {
|
||||
|
|
|
|||
|
|
@ -22,7 +22,6 @@ fn test_node_in_fut() {
|
|||
let root_ref = &root;
|
||||
|
||||
let fut_val_fn = || async {
|
||||
// eprintln!("fut_val_fn: {}", root.child(0).unwrap().kind());
|
||||
yield_now().await;
|
||||
root.child(0).unwrap().kind()
|
||||
};
|
||||
|
|
@ -30,7 +29,6 @@ fn test_node_in_fut() {
|
|||
yield_now().await;
|
||||
|
||||
let fut_ref_fn = || async {
|
||||
// eprintln!("fut_ref_fn: {}", root_ref.child(0).unwrap().kind());
|
||||
yield_now().await;
|
||||
root_ref.child(0).unwrap().kind()
|
||||
};
|
||||
|
|
@ -40,13 +38,11 @@ fn test_node_in_fut() {
|
|||
assert_eq!(f1, f2);
|
||||
|
||||
let fut_val = async {
|
||||
// eprintln!("fut_val: {}", root.child(0).unwrap().kind());
|
||||
yield_now().await;
|
||||
root.child(0).unwrap().kind()
|
||||
};
|
||||
|
||||
let fut_ref = async {
|
||||
// eprintln!("fut_ref: {}", root_ref.child(0).unwrap().kind());
|
||||
yield_now().await;
|
||||
root_ref.child(0).unwrap().kind()
|
||||
};
|
||||
|
|
@ -58,7 +54,6 @@ fn test_node_in_fut() {
|
|||
f1
|
||||
})
|
||||
.join();
|
||||
// eprintln!("pended: {pended:?}");
|
||||
assert_eq!(ret, "comment");
|
||||
assert_eq!(pended, 5);
|
||||
}
|
||||
|
|
@ -215,7 +210,6 @@ where
|
|||
match future.as_mut().poll(&mut cx) {
|
||||
Poll::Pending => pending += 1,
|
||||
Poll::Ready(r) => {
|
||||
// eprintln!("ready, pended: {pending}");
|
||||
break r;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1679,7 +1679,6 @@ fn test_decode_utf32() {
|
|||
)
|
||||
}
|
||||
} else {
|
||||
println!("bad decode: {bytes:?}");
|
||||
(0, 0)
|
||||
}
|
||||
}
|
||||
|
|
@ -1816,6 +1815,118 @@ fn test_decode_utf24le() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_grammars_that_should_not_compile() {
|
||||
assert!(generate_parser_for_grammar(
|
||||
r#"
|
||||
{
|
||||
"name": "issue_1111",
|
||||
"rules": {
|
||||
"source_file": { "type": "STRING", "value": "" }
|
||||
},
|
||||
}
|
||||
"#
|
||||
)
|
||||
.is_err());
|
||||
|
||||
assert!(generate_parser_for_grammar(
|
||||
r#"
|
||||
{
|
||||
"name": "issue_1271",
|
||||
"rules": {
|
||||
"source_file": { "type": "SYMBOL", "name": "identifier" },
|
||||
"identifier": {
|
||||
"type": "TOKEN",
|
||||
"content": {
|
||||
"type": "REPEAT",
|
||||
"content": { "type": "PATTERN", "value": "a" }
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.is_err());
|
||||
|
||||
assert!(generate_parser_for_grammar(
|
||||
r#"
|
||||
{
|
||||
"name": "issue_1156_expl_1",
|
||||
"rules": {
|
||||
"source_file": {
|
||||
"type": "TOKEN",
|
||||
"content": {
|
||||
"type": "REPEAT",
|
||||
"content": { "type": "STRING", "value": "c" }
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
"#
|
||||
)
|
||||
.is_err());
|
||||
|
||||
assert!(generate_parser_for_grammar(
|
||||
r#"
|
||||
{
|
||||
"name": "issue_1156_expl_2",
|
||||
"rules": {
|
||||
"source_file": {
|
||||
"type": "TOKEN",
|
||||
"content": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{ "type": "STRING", "value": "e" },
|
||||
{ "type": "BLANK" }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
"#
|
||||
)
|
||||
.is_err());
|
||||
|
||||
assert!(generate_parser_for_grammar(
|
||||
r#"
|
||||
{
|
||||
"name": "issue_1156_expl_3",
|
||||
"rules": {
|
||||
"source_file": {
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "REPEAT",
|
||||
"content": { "type": "STRING", "value": "p" }
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
"#
|
||||
)
|
||||
.is_err());
|
||||
|
||||
assert!(generate_parser_for_grammar(
|
||||
r#"
|
||||
{
|
||||
"name": "issue_1156_expl_4",
|
||||
"rules": {
|
||||
"source_file": {
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{ "type": "STRING", "value": "r" },
|
||||
{ "type": "BLANK" }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
"#
|
||||
)
|
||||
.is_err());
|
||||
}
|
||||
|
||||
const fn simple_range(start: usize, end: usize) -> Range {
|
||||
Range {
|
||||
start_byte: start,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue