lexer: in error mode, continue until token is found

This commit is contained in:
Max Brunsfeld 2015-06-12 13:13:43 -07:00
parent f7e4445358
commit d5ce3a9b5a
4 changed files with 26 additions and 13 deletions

View file

@ -74,6 +74,7 @@ struct TSLanguage {
*/
#define START_LEXER() \
const bool error_mode = (lex_state == ts_lex_state_error); \
lexer->start_fn(lexer, lex_state); \
int32_t lookahead; \
next_state: \
@ -81,18 +82,31 @@ struct TSLanguage {
#define START_TOKEN() lexer->start_token_fn(lexer);
#define GO_TO_STATE(state_index) \
{ \
lex_state = state_index; \
goto next_state; \
}
#define ADVANCE(state_index) \
{ \
lexer->advance_fn(lexer, state_index); \
lex_state = state_index; \
goto next_state; \
GO_TO_STATE(state_index); \
}
#define ACCEPT_TOKEN(symbol) \
return lexer->accept_fn(lexer, symbol, ts_hidden_symbol_flags[symbol], \
ts_symbol_names[symbol]);
#define LEX_ERROR() ACCEPT_TOKEN(ts_builtin_sym_error);
#define LEX_ERROR() \
if (error_mode) { \
if (lex_state == ts_lex_state_error) \
ADVANCE(ts_lex_state_error) \
else \
GO_TO_STATE(ts_lex_state_error) \
} else { \
ACCEPT_TOKEN(ts_builtin_sym_error) \
}
/*
* Parse Table Macros

View file

@ -26,6 +26,7 @@ describe("Languages", [&]() {
describe(language_name.c_str(), [&]() {
before_each([&]() {
ts_document_set_language(doc, language);
// ts_document_set_debugger(doc, log_debugger_make());
});
for (auto &entry : test_entries_for_language(language_name)) {

View file

@ -42,6 +42,7 @@ static void read_lookahead(TSLexer *lexer) {
static void start(TSLexer *lexer, TSStateId lex_state) {
DEBUG("start_lex state:%d", lex_state);
DEBUG_LOOKAHEAD();
}
static void start_token(TSLexer *lexer) {
@ -76,7 +77,6 @@ static TSTree *accept(TSLexer *lexer, TSSymbol symbol, int is_hidden,
lexer->token_end_position = lexer->current_position;
if (symbol == ts_builtin_sym_error) {
DEBUG_LOOKAHEAD();
DEBUG("error_char");
return ts_tree_make_error(size, padding, lexer->lookahead);
} else {

View file

@ -269,7 +269,6 @@ static int handle_error(TSParser *parser) {
* were consumed, advance the lexer to the next character.
*/
DEBUG("skip_token");
TSLength prev_position = parser->lexer.current_position;
if (parser->lookahead)
ts_tree_release(parser->lookahead);
parser->lookahead = get_next_node(parser, ts_lex_state_error);
@ -278,15 +277,14 @@ static int handle_error(TSParser *parser) {
* If the current lookahead character cannot be the start of any token,
* just skip it. If the end of input is reached, exit.
*/
if (ts_length_eq(parser->lexer.current_position, prev_position))
if (!parser->lexer.advance_fn(&parser->lexer, 0)) {
DEBUG("fail_to_recover");
if (parser->lookahead->symbol == ts_builtin_sym_end) {
DEBUG("fail_to_recover");
resize_error(parser, error);
ts_stack_push(&parser->stack, 0, error);
ts_tree_release(error);
return 0;
}
resize_error(parser, error);
ts_stack_push(&parser->stack, 0, error);
ts_tree_release(error);
return 0;
}
}
}