From bd145d2c6ae5b4b0a7a388881862fa2b9ffbf5a3 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 26 Aug 2014 23:22:18 -0700 Subject: [PATCH] Preserve the initial error node in handle_error function --- include/tree_sitter/parser.h | 12 +++++++--- src/runtime/lexer.c | 6 ++--- src/runtime/parser.c | 46 +++++++++++++++++------------------- 3 files changed, 33 insertions(+), 31 deletions(-) diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index 419f4a1a..7ce0d3ca 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -21,7 +21,6 @@ typedef struct TSLexer { size_t position_in_chunk; size_t token_end_position; size_t token_start_position; - int reached_end; TSTree *(*accept_fn)(struct TSLexer *, TSSymbol, int); int (*advance_fn)(struct TSLexer *); @@ -48,6 +47,10 @@ static inline TSTree *ts_lexer_accept(TSLexer *lexer, TSSymbol symbol, return lexer->accept_fn(lexer, symbol, is_hidden); } +static inline int ts_lexer_is_done(const TSLexer *lexer) { + return lexer->chunk_size == 0 && lexer->position_in_chunk > 0; +} + typedef unsigned short TSStateId; typedef enum { @@ -108,8 +111,11 @@ struct TSLanguage { #define ADVANCE(state_index) \ { \ DEBUG_LEX("ADVANCE %d", state_index); \ - if (!ts_lexer_advance(lexer)) \ - ACCEPT_TOKEN(ts_builtin_sym_end); \ + if (ts_lexer_is_done(lexer)) { \ + DEBUG_LEX("END_OF_INPUT"); \ + return NULL; \ + } \ + ts_lexer_advance(lexer); \ lex_state = state_index; \ goto next_state; \ } diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index bff7f0ab..d69fb29b 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -2,18 +2,17 @@ #include "runtime/tree.h" static int advance(TSLexer *lexer) { - static const char *empty_chunk = ""; + static const char empty_chunk[2] = { '\0', '\0' }; if (lexer->position_in_chunk + 1 < lexer->chunk_size) { lexer->position_in_chunk++; } else { - if (lexer->reached_end) + if (lexer->chunk == empty_chunk) return 0; lexer->chunk_start += lexer->chunk_size; lexer->chunk = lexer->input.read_fn(lexer->input.data, &lexer->chunk_size); lexer->position_in_chunk = 0; if (lexer->chunk_size == 0) { - lexer->reached_end = 1; lexer->chunk = empty_chunk; } } @@ -36,7 +35,6 @@ TSLexer ts_lexer_make() { .position_in_chunk = 0, .token_start_position = 0, .token_end_position = 0, - .reached_end = 0, .advance_fn = advance, .accept_fn = accept, }; } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 1e70f811..06b39f84 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -51,7 +51,8 @@ static size_t breakdown_stack(TSParser *parser, TSInputEdit *edit) { static TSTree *build_error_node(TSParser *parser) { TSStateId state = ts_stack_top_state(&parser->stack); unsigned char lookahead = ts_lexer_lookahead_char(&parser->lexer); - TSSymbol *expected_symbols = malloc(parser->language->symbol_count * sizeof(TSSymbol *)); + TSSymbol *expected_symbols = + malloc(parser->language->symbol_count * sizeof(TSSymbol *)); size_t count = 0; const TSParseAction *actions = actions_for_state(parser->language, state); @@ -114,37 +115,19 @@ static int reduce_extra(TSParser *parser, TSSymbol symbol) { } static void lex(TSParser *parser, TSStateId lex_state) { - parser->lookahead = parser->language->lex_fn( - &parser->lexer, lex_state); + if (parser->lookahead) + ts_tree_release(parser->lookahead); + parser->lookahead = parser->language->lex_fn(&parser->lexer, lex_state); if (!parser->lookahead) { parser->lookahead = build_error_node(parser); } } static int handle_error(TSParser *parser) { - TSTree *error = build_error_node(parser); + TSTree *error = parser->lookahead; + ts_tree_retain(error); - /* - * Run the lexer until it produces a token that allows for - * error recovery. - */ for (;;) { - ts_tree_release(parser->lookahead); - size_t prev_position = ts_lexer_position(&parser->lexer); - lex(parser, ts_lex_state_error); - - /* - * If no characters are consumed, advance the lexer to the next - * character. - */ - int at_end = 0; - if (ts_lexer_position(&parser->lexer) == prev_position) - at_end = !ts_lexer_advance(&parser->lexer); - - if (at_end || parser->lookahead->symbol == ts_builtin_sym_end) { - ts_stack_push(&parser->stack, 0, error); - return 0; - } /* * Unwind the parse stack until a state is found in which an error is @@ -163,10 +146,25 @@ static int handle_error(TSParser *parser) { if (action_after_error.type != TSParseActionTypeError) { ts_stack_shrink(&parser->stack, i + 1); ts_stack_push(&parser->stack, state_after_error, error); + ts_tree_release(error); return 1; } } } + + /* + * If there is no state in the stack for which we can recover with the + * current lookahead token, advance to the next token. If no characters + * were consumed, advance the lexer to the next character. + */ + size_t prev_position = ts_lexer_position(&parser->lexer); + lex(parser, ts_lex_state_error); + if (ts_lexer_position(&parser->lexer) == prev_position) + if (!ts_lexer_advance(&parser->lexer)) { + ts_stack_push(&parser->stack, 0, error); + ts_tree_release(error); + return 0; + } } }