From d5ce3a9b5a7ee9775588aa81be3f9b16ffafd042 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 12 Jun 2015 13:13:43 -0700 Subject: [PATCH] lexer: in error mode, continue until token is found --- include/tree_sitter/parser.h | 20 +++++++++++++++++--- spec/runtime/languages/language_specs.cc | 1 + src/runtime/lexer.c | 2 +- src/runtime/parser.c | 16 +++++++--------- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index 6dc557a0..c612ffbc 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -74,6 +74,7 @@ struct TSLanguage { */ #define START_LEXER() \ + const bool error_mode = (lex_state == ts_lex_state_error); \ lexer->start_fn(lexer, lex_state); \ int32_t lookahead; \ next_state: \ @@ -81,18 +82,31 @@ struct TSLanguage { #define START_TOKEN() lexer->start_token_fn(lexer); +#define GO_TO_STATE(state_index) \ + { \ + lex_state = state_index; \ + goto next_state; \ + } + #define ADVANCE(state_index) \ { \ lexer->advance_fn(lexer, state_index); \ - lex_state = state_index; \ - goto next_state; \ + GO_TO_STATE(state_index); \ } #define ACCEPT_TOKEN(symbol) \ return lexer->accept_fn(lexer, symbol, ts_hidden_symbol_flags[symbol], \ ts_symbol_names[symbol]); -#define LEX_ERROR() ACCEPT_TOKEN(ts_builtin_sym_error); +#define LEX_ERROR() \ + if (error_mode) { \ + if (lex_state == ts_lex_state_error) \ + ADVANCE(ts_lex_state_error) \ + else \ + GO_TO_STATE(ts_lex_state_error) \ + } else { \ + ACCEPT_TOKEN(ts_builtin_sym_error) \ + } /* * Parse Table Macros diff --git a/spec/runtime/languages/language_specs.cc b/spec/runtime/languages/language_specs.cc index 68416c4a..a479e64f 100644 --- a/spec/runtime/languages/language_specs.cc +++ b/spec/runtime/languages/language_specs.cc @@ -26,6 +26,7 @@ describe("Languages", [&]() { describe(language_name.c_str(), [&]() { before_each([&]() { ts_document_set_language(doc, language); + // ts_document_set_debugger(doc, log_debugger_make()); }); for (auto &entry : test_entries_for_language(language_name)) { diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index 7f7581e5..3ad9f31c 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -42,6 +42,7 @@ static void read_lookahead(TSLexer *lexer) { static void start(TSLexer *lexer, TSStateId lex_state) { DEBUG("start_lex state:%d", lex_state); + DEBUG_LOOKAHEAD(); } static void start_token(TSLexer *lexer) { @@ -76,7 +77,6 @@ static TSTree *accept(TSLexer *lexer, TSSymbol symbol, int is_hidden, lexer->token_end_position = lexer->current_position; if (symbol == ts_builtin_sym_error) { - DEBUG_LOOKAHEAD(); DEBUG("error_char"); return ts_tree_make_error(size, padding, lexer->lookahead); } else { diff --git a/src/runtime/parser.c b/src/runtime/parser.c index feadb41b..52b43052 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -269,7 +269,6 @@ static int handle_error(TSParser *parser) { * were consumed, advance the lexer to the next character. */ DEBUG("skip_token"); - TSLength prev_position = parser->lexer.current_position; if (parser->lookahead) ts_tree_release(parser->lookahead); parser->lookahead = get_next_node(parser, ts_lex_state_error); @@ -278,15 +277,14 @@ static int handle_error(TSParser *parser) { * If the current lookahead character cannot be the start of any token, * just skip it. If the end of input is reached, exit. */ - if (ts_length_eq(parser->lexer.current_position, prev_position)) - if (!parser->lexer.advance_fn(&parser->lexer, 0)) { - DEBUG("fail_to_recover"); + if (parser->lookahead->symbol == ts_builtin_sym_end) { + DEBUG("fail_to_recover"); - resize_error(parser, error); - ts_stack_push(&parser->stack, 0, error); - ts_tree_release(error); - return 0; - } + resize_error(parser, error); + ts_stack_push(&parser->stack, 0, error); + ts_tree_release(error); + return 0; + } } }