Merge pull request #2445 from amaanq/eof-loop-token-rep

fix: do not allow eof to advance states if the new state is the same state
This commit is contained in:
Amaan Qureshi 2023-08-02 07:49:18 -04:00 committed by GitHub
commit ca16a2b86e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 88 additions and 0 deletions

View file

@ -879,14 +879,23 @@ impl Generator {
add!(self, " ||{}", line_break);
}
if range.end == range.start {
if range.start == '\0' {
add!(self, "!eof && ");
}
add!(self, "lookahead == ");
self.add_character(range.start);
} else if range.end as u32 == range.start as u32 + 1 {
if range.start == '\0' {
add!(self, "!eof && ");
}
add!(self, "lookahead == ");
self.add_character(range.start);
add!(self, " ||{}lookahead == ", line_break);
self.add_character(range.end);
} else {
if range.start == '\0' {
add!(self, "!eof && ");
}
add!(self, "(");
self.add_character(range.start);
add!(self, " <= lookahead && lookahead <= ");

View file

@ -1303,6 +1303,85 @@ fn test_parsing_with_included_ranges_and_missing_tokens() {
assert_eq!(root.child(3).unwrap().start_byte(), 4);
}
#[test]
fn test_grammars_that_can_hang_on_eof() {
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test_single_null_char_regex",
"rules": {
"source_file": {
"type": "SEQ",
"members": [
{ "type": "STRING", "value": "\"" },
{ "type": "PATTERN", "value": "[\\x00]*" },
{ "type": "STRING", "value": "\"" }
]
}
},
"extras": [ { "type": "PATTERN", "value": "\\s" } ]
}
"#,
)
.unwrap();
let mut parser = Parser::new();
parser
.set_language(get_test_language(&parser_name, &parser_code, None))
.unwrap();
parser.parse("\"", None).unwrap();
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test_null_char_with_next_char_regex",
"rules": {
"source_file": {
"type": "SEQ",
"members": [
{ "type": "STRING", "value": "\"" },
{ "type": "PATTERN", "value": "[\\x00-\\x01]*" },
{ "type": "STRING", "value": "\"" }
]
}
},
"extras": [ { "type": "PATTERN", "value": "\\s" } ]
}
"#,
)
.unwrap();
parser
.set_language(get_test_language(&parser_name, &parser_code, None))
.unwrap();
parser.parse("\"", None).unwrap();
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test_null_char_with_range_regex",
"rules": {
"source_file": {
"type": "SEQ",
"members": [
{ "type": "STRING", "value": "\"" },
{ "type": "PATTERN", "value": "[\\x00-\\x7F]*" },
{ "type": "STRING", "value": "\"" }
]
}
},
"extras": [ { "type": "PATTERN", "value": "\\s" } ]
}
"#,
)
.unwrap();
parser
.set_language(get_test_language(&parser_name, &parser_code, None))
.unwrap();
parser.parse("\"", None).unwrap();
}
fn simple_range(start: usize, end: usize) -> Range {
Range {
start_byte: start,