Remove infinite loop on certain lex errors

This commit is contained in:
Max Brunsfeld 2014-06-01 17:36:40 -07:00
parent c7266f791e
commit 868a09b0b0
3 changed files with 13 additions and 5 deletions

View file

@ -13,6 +13,7 @@ typedef struct {
size_t position_in_chunk;
size_t token_end_position;
size_t token_start_position;
int reached_end;
} ts_lexer;
static ts_lexer ts_lexer_make() {
@ -23,6 +24,7 @@ static ts_lexer ts_lexer_make() {
.position_in_chunk = 0,
.token_start_position = 0,
.token_end_position = 0,
.reached_end = 0,
};
return result;
}
@ -35,7 +37,7 @@ static char ts_lexer_lookahead_char(const ts_lexer *lexer) {
return lexer->chunk[lexer->position_in_chunk];
}
static void ts_lexer_advance(ts_lexer *lexer) {
static int ts_lexer_advance(ts_lexer *lexer) {
static const char empty_chunk[1] = "";
if (lexer->position_in_chunk + 1 < lexer->chunk_size) {
lexer->position_in_chunk++;
@ -43,11 +45,15 @@ static void ts_lexer_advance(ts_lexer *lexer) {
lexer->chunk_start += lexer->chunk_size;
lexer->chunk = lexer->input.read_fn(lexer->input.data, &lexer->chunk_size);
if (lexer->chunk_size == 0) {
if (lexer->reached_end)
return 0;
lexer->chunk = empty_chunk;
lexer->chunk_size = 1;
lexer->reached_end = 1;
}
lexer->position_in_chunk = 0;
}
return 1;
}
static void ts_lexer_start_token(ts_lexer *lexer) {

View file

@ -157,10 +157,12 @@ ts_lr_parser_handle_error(ts_lr_parser *parser) {
ts_tree_release(parser->lookahead);
size_t position = ts_lexer_position(&parser->lexer);
parser->lookahead = parser->config.lex_fn(&parser->lexer, ts_lex_state_error);
if (ts_lexer_position(&parser->lexer) == position)
ts_lexer_advance(&parser->lexer);
if (ts_tree_symbol(parser->lookahead) == ts_builtin_sym_end) {
int at_end = 0;
if (ts_lexer_position(&parser->lexer) == position)
at_end = !ts_lexer_advance(&parser->lexer);
if (at_end || ts_tree_symbol(parser->lookahead) == ts_builtin_sym_end) {
ts_stack_push(&parser->stack, 0, error);
return 0;
}

View file

@ -12,7 +12,7 @@ describe("parsing regex patterns", []() {
string pattern;
rule_ptr rule;
};
vector<ValidInputRow> valid_inputs = {
{
"character sets",