From c16b6b2059eb1f66c8eb9ab73acb19022b5c77b9 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 5 Dec 2016 11:50:24 -0800 Subject: [PATCH] Run external scanners during error recovery --- .../build_tables/build_parse_table.cc | 4 ++ src/runtime/parser.c | 44 ++++++++++++------- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 819ce345..4c3ba8c6 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -120,6 +120,10 @@ class ParseTableBuilder { } } + for (size_t i = 0; i < grammar.external_tokens.size(); i++) { + add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::External)); + } + for (size_t i = 0; i < grammar.variables.size(); i++) { add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::NonTerminal)); } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 6a358448..a3f38730 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -208,44 +208,54 @@ static bool parser__condense_stack(Parser *self) { return result; } -static Tree *parser__lex(Parser *self, TSStateId parse_state) { +static bool parser__try_lex(Parser *self, TSLexMode lex_mode) { Length start_position = self->lexer.current_position; ts_lexer_start(&self->lexer); - TSLexMode lex_mode = self->language->lex_modes[parse_state]; if (lex_mode.external_tokens) { - const bool *external_tokens = ts_language_enabled_external_tokens(self->language, lex_mode.external_tokens); + const bool *external_tokens = ts_language_enabled_external_tokens( + self->language, + lex_mode.external_tokens + ); + + LOG("lex external:%d, pos:%u", + lex_mode.external_tokens, + self->lexer.current_position.chars + ); + if (self->language->external_scanner.scan( self->external_scanner_payload, &self->lexer.data, external_tokens )) { - TSSymbol symbol = self->language->external_token_symbol_map[self->lexer.data.result_symbol]; - Length padding = length_sub(self->lexer.token_start_position, start_position); - Length size = length_sub(self->lexer.current_position, self->lexer.token_start_position); - TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, symbol); - Tree *result = ts_tree_make_leaf(symbol, padding, size, metadata); - result->parse_state = parse_state; - return result; + self->lexer.data.result_symbol = self->language->external_token_symbol_map[self->lexer.data.result_symbol]; + return true; } else { ts_lexer_reset(&self->lexer, start_position); + ts_lexer_start(&self->lexer); } } - TSStateId start_state = self->language->lex_modes[parse_state].lex_state; - TSStateId current_state = start_state; - LOG("lex state:%d", start_state); + LOG("lex state:%d, pos:%u", lex_mode.lex_state, self->lexer.current_position.chars); + return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state); +} +static Tree *parser__lex(Parser *self, TSStateId parse_state) { + TSLexMode lex_mode = self->language->lex_modes[parse_state]; + TSStateId start_state = lex_mode.lex_state; + Length start_position = self->lexer.current_position; + + bool found_error = false; bool skipped_error = false; int32_t first_error_character = 0; Length error_start_position, error_end_position; - while (!self->language->lex_fn(&self->lexer.data, current_state)) { - if (current_state != ERROR_STATE) { + while (!parser__try_lex(self, lex_mode)) { + if (!found_error) { LOG("retry_in_error_mode"); - current_state = ERROR_STATE; + found_error = true; + lex_mode = self->language->lex_modes[ERROR_STATE]; ts_lexer_reset(&self->lexer, start_position); - ts_lexer_start(&self->lexer); continue; }