From 77941c85fff539a3710fb1e1283bd5097417a22b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 25 Aug 2014 23:35:00 -0700 Subject: [PATCH] Avoid building incomplete error nodes during lexing The lexer doesn't know the expected symbols, so it doesn't have enough information to construct error nodes. Now, when it encounters an invalid character, it returns NULL and the parser builds a correct error node. --- include/tree_sitter/parser.h | 8 ++++---- spec/runtime/languages/json/errors.txt | 4 ++-- src/runtime/parser.c | 16 +++++++++++----- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index 8872dbbd..e3cefe4b 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -119,10 +119,10 @@ struct TSLanguage { return ts_lexer_accept(lexer, symbol, ts_hidden_symbol_flags[symbol]); \ } -#define LEX_ERROR() \ - { \ - DEBUG_LEX("ERROR"); \ - return ts_lexer_accept(lexer, ts_builtin_sym_error, 0); \ +#define LEX_ERROR() \ + { \ + DEBUG_LEX("ERROR"); \ + return NULL; \ } #define LEX_PANIC() \ diff --git a/spec/runtime/languages/json/errors.txt b/spec/runtime/languages/json/errors.txt index 96a19a18..2acf95bd 100644 --- a/spec/runtime/languages/json/errors.txt +++ b/spec/runtime/languages/json/errors.txt @@ -3,7 +3,7 @@ recovers from top-level errors ========================================== [} --- -(ERROR ) (ERROR '}') +(ERROR '}') (ERROR '}') ========================================== recovers from unexpected tokens @@ -19,7 +19,7 @@ recovers from errors inside arrays --- (array (number) - (ERROR ) + (ERROR ',') (number)) ========================================== diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 7cedd5d4..5083395b 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -117,6 +117,14 @@ static int reduce_extra(TSParser *parser, TSSymbol symbol) { } } +static void lex(TSParser *parser, TSStateId lex_state) { + parser->lookahead = parser->language->lex_fn( + &parser->lexer, lex_state); + if (!parser->lookahead) { + parser->lookahead = build_error_node(parser); + } +} + static int handle_error(TSParser *parser) { TSTree *error = build_error_node(parser); @@ -127,8 +135,7 @@ static int handle_error(TSParser *parser) { for (;;) { ts_tree_release(parser->lookahead); size_t prev_position = ts_lexer_position(&parser->lexer); - parser->lookahead = - parser->language->lex_fn(&parser->lexer, ts_lex_state_error); + lex(parser, ts_lex_state_error); /* * If no characters are consumed, advance the lexer to the next @@ -169,7 +176,7 @@ static int handle_error(TSParser *parser) { static TSTree *get_root(TSParser *parser) { if (parser->stack.size == 0) - return NULL; + ts_stack_push(&parser->stack, 0, build_error_node(parser)); reduce(parser, ts_builtin_sym_document, parser->stack.size); parser->lookahead->options = 0; @@ -180,8 +187,7 @@ static TSTree *get_root(TSParser *parser) { static TSParseAction next_action(TSParser *parser) { TSStateId state = ts_stack_top_state(&parser->stack); if (!parser->lookahead) - parser->lookahead = parser->language->lex_fn( - &parser->lexer, parser->language->lex_states[state]); + lex(parser, parser->language->lex_states[state]); return actions_for_state(parser->language, state)[parser->lookahead->symbol]; }