From 310a9f0704aeb8d9b1e32ff2bf9b6bd03c8032eb Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Sat, 2 Nov 2024 00:53:21 -0400 Subject: [PATCH] fix: disallow tokens that match the empty string --- .../src/prepare_grammar/expand_tokens.rs | 12 ++ .../src/prepare_grammar/flatten_grammar.rs | 10 +- cli/generate/src/rules.rs | 10 ++ cli/src/tests/async_context_test.rs | 6 - cli/src/tests/parser_test.rs | 113 +++++++++++++++++- 5 files changed, 140 insertions(+), 11 deletions(-) diff --git a/cli/generate/src/prepare_grammar/expand_tokens.rs b/cli/generate/src/prepare_grammar/expand_tokens.rs index 0a8a6e5a..84d05981 100644 --- a/cli/generate/src/prepare_grammar/expand_tokens.rs +++ b/cli/generate/src/prepare_grammar/expand_tokens.rs @@ -1,4 +1,5 @@ use anyhow::{anyhow, Context, Result}; +use indoc::indoc; use regex_syntax::{ hir::{Class, Hir, HirKind}, ParserBuilder, @@ -56,6 +57,17 @@ pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result params.is_main_token, _ => false, diff --git a/cli/generate/src/prepare_grammar/flatten_grammar.rs b/cli/generate/src/prepare_grammar/flatten_grammar.rs index e01bc0b0..86eb0c73 100644 --- a/cli/generate/src/prepare_grammar/flatten_grammar.rs +++ b/cli/generate/src/prepare_grammar/flatten_grammar.rs @@ -1,4 +1,5 @@ use anyhow::{anyhow, Result}; +use indoc::indoc; use super::ExtractedSyntaxGrammar; use crate::{ @@ -197,11 +198,12 @@ pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result) -> Self { Self::Seq(rules) } + + pub fn is_empty(&self) -> bool { + match self { + Self::Blank | Self::Pattern(..) | Self::NamedSymbol(_) | Self::Symbol(_) => false, + Self::String(string) => string.is_empty(), + Self::Metadata { rule, .. } | Self::Repeat(rule) => rule.is_empty(), + Self::Choice(rules) => rules.iter().any(Self::is_empty), + Self::Seq(rules) => rules.iter().all(Self::is_empty), + } + } } impl Alias { diff --git a/cli/src/tests/async_context_test.rs b/cli/src/tests/async_context_test.rs index cb2345cc..edcd5e4c 100644 --- a/cli/src/tests/async_context_test.rs +++ b/cli/src/tests/async_context_test.rs @@ -22,7 +22,6 @@ fn test_node_in_fut() { let root_ref = &root; let fut_val_fn = || async { - // eprintln!("fut_val_fn: {}", root.child(0).unwrap().kind()); yield_now().await; root.child(0).unwrap().kind() }; @@ -30,7 +29,6 @@ fn test_node_in_fut() { yield_now().await; let fut_ref_fn = || async { - // eprintln!("fut_ref_fn: {}", root_ref.child(0).unwrap().kind()); yield_now().await; root_ref.child(0).unwrap().kind() }; @@ -40,13 +38,11 @@ fn test_node_in_fut() { assert_eq!(f1, f2); let fut_val = async { - // eprintln!("fut_val: {}", root.child(0).unwrap().kind()); yield_now().await; root.child(0).unwrap().kind() }; let fut_ref = async { - // eprintln!("fut_ref: {}", root_ref.child(0).unwrap().kind()); yield_now().await; root_ref.child(0).unwrap().kind() }; @@ -58,7 +54,6 @@ fn test_node_in_fut() { f1 }) .join(); - // eprintln!("pended: {pended:?}"); assert_eq!(ret, "comment"); assert_eq!(pended, 5); } @@ -215,7 +210,6 @@ where match future.as_mut().poll(&mut cx) { Poll::Pending => pending += 1, Poll::Ready(r) => { - // eprintln!("ready, pended: {pending}"); break r; } } diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 1f2bc6e7..e5dc15d9 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -1679,7 +1679,6 @@ fn test_decode_utf32() { ) } } else { - println!("bad decode: {bytes:?}"); (0, 0) } } @@ -1816,6 +1815,118 @@ fn test_decode_utf24le() { ); } +#[test] +fn test_grammars_that_should_not_compile() { + assert!(generate_parser_for_grammar( + r#" + { + "name": "issue_1111", + "rules": { + "source_file": { "type": "STRING", "value": "" } + }, + } + "# + ) + .is_err()); + + assert!(generate_parser_for_grammar( + r#" + { + "name": "issue_1271", + "rules": { + "source_file": { "type": "SYMBOL", "name": "identifier" }, + "identifier": { + "type": "TOKEN", + "content": { + "type": "REPEAT", + "content": { "type": "PATTERN", "value": "a" } + } + } + }, + } + "#, + ) + .is_err()); + + assert!(generate_parser_for_grammar( + r#" + { + "name": "issue_1156_expl_1", + "rules": { + "source_file": { + "type": "TOKEN", + "content": { + "type": "REPEAT", + "content": { "type": "STRING", "value": "c" } + } + } + }, + } + "# + ) + .is_err()); + + assert!(generate_parser_for_grammar( + r#" + { + "name": "issue_1156_expl_2", + "rules": { + "source_file": { + "type": "TOKEN", + "content": { + "type": "CHOICE", + "members": [ + { "type": "STRING", "value": "e" }, + { "type": "BLANK" } + ] + } + } + }, + } + "# + ) + .is_err()); + + assert!(generate_parser_for_grammar( + r#" + { + "name": "issue_1156_expl_3", + "rules": { + "source_file": { + "type": "IMMEDIATE_TOKEN", + "content": { + "type": "REPEAT", + "content": { "type": "STRING", "value": "p" } + } + } + }, + } + "# + ) + .is_err()); + + assert!(generate_parser_for_grammar( + r#" + { + "name": "issue_1156_expl_4", + "rules": { + "source_file": { + "type": "IMMEDIATE_TOKEN", + "content": { + "type": "CHOICE", + "members": [ + { "type": "STRING", "value": "r" }, + { "type": "BLANK" } + ] + } + } + }, + } + "# + ) + .is_err()); +} + const fn simple_range(start: usize, end: usize) -> Range { Range { start_byte: start,