diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index 7b59e14f..fb9a8c58 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -94,7 +94,7 @@ struct TSLanguage { const unsigned short *parse_table; const TSParseActionEntry *parse_actions; const TSStateId *lex_states; - bool (*lex_fn)(TSLexer *, TSStateId, bool); + bool (*lex_fn)(TSLexer *, TSStateId); }; /* @@ -106,22 +106,18 @@ struct TSLanguage { next_state: \ lookahead = lexer->lookahead; -#define GO_TO_STATE(state_value) \ - { \ - state = state_value; \ - goto next_state; \ - } - #define ADVANCE(state_value) \ { \ lexer->advance(lexer, state_value, TSTransitionTypeMain); \ - GO_TO_STATE(state_value); \ + state = state_value; \ + goto next_state; \ } #define SKIP(state_value) \ { \ lexer->advance(lexer, state_value, TSTransitionTypeSeparator); \ - GO_TO_STATE(state_value); \ + state = state_value; \ + goto next_state; \ } #define ACCEPT_TOKEN(symbol_value) \ @@ -130,14 +126,7 @@ struct TSLanguage { return true; \ } -#define LEX_ERROR() \ - if (error_mode) { \ - if (state == TS_STATE_ERROR) \ - lexer->advance(lexer, state, TSTransitionTypeError); \ - GO_TO_STATE(TS_STATE_ERROR); \ - } else { \ - return false; \ - } +#define LEX_ERROR() return false /* * Parse Table Macros diff --git a/spec/runtime/parser_spec.cc b/spec/runtime/parser_spec.cc index ece011f3..837f3f4e 100644 --- a/spec/runtime/parser_spec.cc +++ b/spec/runtime/parser_spec.cc @@ -162,6 +162,15 @@ describe("Parser", [&]() { AssertThat(get_node_text(last), Equals("true")); }); }); + + describe("when there is an unterminated error", [&]() { + it("maintains a consistent tree", [&]() { + ts_document_set_language(doc, get_test_language("javascript")); + set_text("a; /* b"); + assert_root_node( + "(ERROR (program (expression_statement (identifier))) (UNEXPECTED EOF))"); + }); + }); }); describe("handling extra tokens", [&]() { diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index f0cfa129..7ee66d5a 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -184,7 +184,7 @@ class CCodeGenerator { void add_lex_function() { line( - "static bool ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {"); + "static bool ts_lex(TSLexer *lexer, TSStateId state) {"); indent([&]() { line("START_LEXER();"); _switch("state", [&]() { diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index dd58cfc9..57ce1cc2 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -56,7 +56,6 @@ static void ts_lexer__advance(TSLexer *self, TSStateId state, if (self->lookahead_size) { self->current_position.bytes += self->lookahead_size; self->current_position.chars++; - if (self->lookahead == '\n') { self->current_position.rows++; self->current_position.columns = 0; @@ -132,7 +131,6 @@ void ts_lexer_reset(TSLexer *self, TSLength position) { void ts_lexer_start(TSLexer *self, TSStateId lex_state) { LOG("start_lex state:%d, pos:%lu", lex_state, self->current_position.chars); - LOG_LOOKAHEAD(); self->starting_state = lex_state; self->token_start_position = self->current_position; diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 3a33b481..ede3c121 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -242,17 +242,33 @@ static bool parser__condense_stack(Parser *self) { return result; } -static TSTree *parser__lex(Parser *self, TSStateId parse_state, bool error_mode) { +static TSTree *parser__lex(Parser *self, TSStateId parse_state) { TSStateId state = self->language->lex_states[parse_state]; LOG("lex state:%d", state); + TSStateId current_state = state; TSLength position = self->lexer.current_position; - ts_lexer_start(&self->lexer, state); - if (!self->language->lex_fn(&self->lexer, state, error_mode)) { - ts_lexer_reset(&self->lexer, position); - ts_lexer_start(&self->lexer, state); - assert(self->language->lex_fn(&self->lexer, TS_STATE_ERROR, true)); + + while (!self->language->lex_fn(&self->lexer, current_state)) { + if (current_state != TS_STATE_ERROR) { + LOG("retry_in_error_mode"); + ts_lexer_reset(&self->lexer, position); + ts_lexer_start(&self->lexer, state); + current_state = TS_STATE_ERROR; + continue; + } + + if (self->lexer.lookahead == 0) { + self->lexer.result_symbol = ts_builtin_sym_error; + break; + } + + if (self->lexer.current_position.chars == position.chars) { + self->lexer.advance(&self->lexer, TS_STATE_ERROR, TSTransitionTypeError); + } + + position = self->lexer.current_position; } TSLexerResult lex_result; @@ -333,8 +349,7 @@ static TSTree *parser__get_lookahead(Parser *self, StackVersion version, ts_lexer_reset(&self->lexer, position); TSStateId parse_state = ts_stack_top_state(self->stack, version); - bool error_mode = parse_state == TS_STATE_ERROR; - return parser__lex(self, parse_state, error_mode); + return parser__lex(self, parse_state); error: return NULL;