diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index cb9f6c72..f7e6c18e 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -879,14 +879,23 @@ impl Generator { add!(self, " ||{}", line_break); } if range.end == range.start { + if range.start == '\0' { + add!(self, "!eof && "); + } add!(self, "lookahead == "); self.add_character(range.start); } else if range.end as u32 == range.start as u32 + 1 { + if range.start == '\0' { + add!(self, "!eof && "); + } add!(self, "lookahead == "); self.add_character(range.start); add!(self, " ||{}lookahead == ", line_break); self.add_character(range.end); } else { + if range.start == '\0' { + add!(self, "!eof && "); + } add!(self, "("); self.add_character(range.start); add!(self, " <= lookahead && lookahead <= "); diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 69d2f4fc..d490b78f 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -1303,6 +1303,85 @@ fn test_parsing_with_included_ranges_and_missing_tokens() { assert_eq!(root.child(3).unwrap().start_byte(), 4); } +#[test] +fn test_grammars_that_can_hang_on_eof() { + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test_single_null_char_regex", + "rules": { + "source_file": { + "type": "SEQ", + "members": [ + { "type": "STRING", "value": "\"" }, + { "type": "PATTERN", "value": "[\\x00]*" }, + { "type": "STRING", "value": "\"" } + ] + } + }, + "extras": [ { "type": "PATTERN", "value": "\\s" } ] + } + "#, + ) + .unwrap(); + + let mut parser = Parser::new(); + parser + .set_language(get_test_language(&parser_name, &parser_code, None)) + .unwrap(); + parser.parse("\"", None).unwrap(); + + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test_null_char_with_next_char_regex", + "rules": { + "source_file": { + "type": "SEQ", + "members": [ + { "type": "STRING", "value": "\"" }, + { "type": "PATTERN", "value": "[\\x00-\\x01]*" }, + { "type": "STRING", "value": "\"" } + ] + } + }, + "extras": [ { "type": "PATTERN", "value": "\\s" } ] + } + "#, + ) + .unwrap(); + + parser + .set_language(get_test_language(&parser_name, &parser_code, None)) + .unwrap(); + parser.parse("\"", None).unwrap(); + + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test_null_char_with_range_regex", + "rules": { + "source_file": { + "type": "SEQ", + "members": [ + { "type": "STRING", "value": "\"" }, + { "type": "PATTERN", "value": "[\\x00-\\x7F]*" }, + { "type": "STRING", "value": "\"" } + ] + } + }, + "extras": [ { "type": "PATTERN", "value": "\\s" } ] + } + "#, + ) + .unwrap(); + + parser + .set_language(get_test_language(&parser_name, &parser_code, None)) + .unwrap(); + parser.parse("\"", None).unwrap(); +} + fn simple_range(start: usize, end: usize) -> Range { Range { start_byte: start,