diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index fb9a8c58..5d89efc2 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -28,19 +28,11 @@ typedef struct { bool structural : 1; } TSSymbolMetadata; -typedef enum { - TSTransitionTypeMain, - TSTransitionTypeSeparator, - TSTransitionTypeError, -} TSTransitionType; - typedef struct TSLexer { - void (*advance)(struct TSLexer *, TSStateId, TSTransitionType); + void (*advance)(struct TSLexer *, TSStateId, bool); TSLength current_position; - TSLength token_end_position; TSLength token_start_position; - TSLength error_end_position; const char *chunk; size_t chunk_start; @@ -48,10 +40,7 @@ typedef struct TSLexer { size_t lookahead_size; int32_t lookahead; - TSStateId starting_state; TSSymbol result_symbol; - bool result_follows_error; - int32_t first_unexpected_character; TSInput input; TSDebugger debugger; @@ -108,14 +97,14 @@ struct TSLanguage { #define ADVANCE(state_value) \ { \ - lexer->advance(lexer, state_value, TSTransitionTypeMain); \ + lexer->advance(lexer, state_value, false); \ state = state_value; \ goto next_state; \ } #define SKIP(state_value) \ { \ - lexer->advance(lexer, state_value, TSTransitionTypeSeparator); \ + lexer->advance(lexer, state_value, true); \ state = state_value; \ goto next_state; \ } diff --git a/src/runtime/length.h b/src/runtime/length.h index fbf2313b..3c16a7d8 100644 --- a/src/runtime/length.h +++ b/src/runtime/length.h @@ -14,6 +14,10 @@ static inline void ts_length_set_unknown(TSLength *self) { self->columns = 0; } +static inline TSLength ts_length_min(TSLength len1, TSLength len2) { + return (len1.chars < len2.chars) ? len1 : len2; +} + static inline TSLength ts_length_add(TSLength len1, TSLength len2) { TSLength result; result.chars = len1.chars + len2.chars; diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index 57ce1cc2..85b207b6 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -47,8 +47,7 @@ static void ts_lexer__get_lookahead(TSLexer *self) { LOG_LOOKAHEAD(); } -static void ts_lexer__advance(TSLexer *self, TSStateId state, - TSTransitionType transition_type) { +static void ts_lexer__advance(TSLexer *self, TSStateId state, bool skip) { if (self->chunk == empty_chunk) return; @@ -64,25 +63,11 @@ static void ts_lexer__advance(TSLexer *self, TSStateId state, } } - switch (transition_type) { - case TSTransitionTypeSeparator: - if (self->result_follows_error) { - LOG("skip_error state:%d", state); - } else { - LOG("skip_separator state:%d", state); - self->token_start_position = self->current_position; - } - break; - case TSTransitionTypeError: - LOG("skip_error state:%d", state); - self->result_follows_error = true; - self->error_end_position = self->current_position; - if (!self->first_unexpected_character) - self->first_unexpected_character = self->lookahead; - break; - default: - LOG("advance state:%d", state); - break; + if (skip) { + LOG("skip_separator state:%d", state); + self->token_start_position = self->current_position; + } else { + LOG("advance state:%d", state); } if (self->current_position.bytes >= self->chunk_start + self->chunk_size) @@ -108,7 +93,6 @@ void ts_lexer_init(TSLexer *self) { static inline void ts_lexer__reset(TSLexer *self, TSLength position) { self->token_start_position = position; - self->token_end_position = position; self->current_position = position; self->chunk = 0; @@ -132,32 +116,11 @@ void ts_lexer_reset(TSLexer *self, TSLength position) { void ts_lexer_start(TSLexer *self, TSStateId lex_state) { LOG("start_lex state:%d, pos:%lu", lex_state, self->current_position.chars); - self->starting_state = lex_state; self->token_start_position = self->current_position; - self->result_follows_error = false; self->result_symbol = 0; - self->first_unexpected_character = 0; if (!self->chunk) ts_lexer__get_chunk(self); if (!self->lookahead_size) ts_lexer__get_lookahead(self); } - -void ts_lexer_finish(TSLexer *self, TSLexerResult *result) { - result->padding = - ts_length_sub(self->token_start_position, self->token_end_position); - - if (self->result_follows_error) { - result->symbol = ts_builtin_sym_error; - result->size = - ts_length_sub(self->error_end_position, self->token_start_position); - result->first_unexpected_character = self->first_unexpected_character; - ts_lexer_reset(self, self->error_end_position); - } else { - result->symbol = self->result_symbol; - result->size = - ts_length_sub(self->current_position, self->token_start_position); - self->token_end_position = self->current_position; - } -} diff --git a/src/runtime/lexer.h b/src/runtime/lexer.h index 75a03762..afecb19c 100644 --- a/src/runtime/lexer.h +++ b/src/runtime/lexer.h @@ -7,18 +7,10 @@ extern "C" { #include "tree_sitter/parser.h" -typedef struct { - TSSymbol symbol; - TSLength padding; - TSLength size; - int32_t first_unexpected_character; -} TSLexerResult; - void ts_lexer_init(TSLexer *); void ts_lexer_set_input(TSLexer *, TSInput); void ts_lexer_reset(TSLexer *, TSLength); void ts_lexer_start(TSLexer *, TSStateId); -void ts_lexer_finish(TSLexer *, TSLexerResult *); #ifdef __cplusplus } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index ede3c121..2df15c77 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -243,18 +243,23 @@ static bool parser__condense_stack(Parser *self) { } static TSTree *parser__lex(Parser *self, TSStateId parse_state) { - TSStateId state = self->language->lex_states[parse_state]; - LOG("lex state:%d", state); + TSStateId start_state = self->language->lex_states[parse_state]; + TSStateId current_state = start_state; + TSLength start_position = self->lexer.current_position; + TSLength position = start_position; + LOG("lex state:%d", start_state); - TSStateId current_state = state; - TSLength position = self->lexer.current_position; - ts_lexer_start(&self->lexer, state); + bool skipped_error = false; + int32_t first_error_character = 0; + TSLength error_start_position, error_end_position; + + ts_lexer_start(&self->lexer, start_state); while (!self->language->lex_fn(&self->lexer, current_state)) { if (current_state != TS_STATE_ERROR) { LOG("retry_in_error_mode"); ts_lexer_reset(&self->lexer, position); - ts_lexer_start(&self->lexer, state); + ts_lexer_start(&self->lexer, start_state); current_state = TS_STATE_ERROR; continue; } @@ -265,30 +270,39 @@ static TSTree *parser__lex(Parser *self, TSStateId parse_state) { } if (self->lexer.current_position.chars == position.chars) { - self->lexer.advance(&self->lexer, TS_STATE_ERROR, TSTransitionTypeError); + if (!skipped_error) { + error_start_position = self->lexer.current_position; + first_error_character = self->lexer.lookahead; + } + skipped_error = true; + self->lexer.advance(&self->lexer, TS_STATE_ERROR, false); + error_end_position = self->lexer.current_position; } position = self->lexer.current_position; } - TSLexerResult lex_result; - ts_lexer_finish(&self->lexer, &lex_result); - TSTree *result; - if (lex_result.symbol == ts_builtin_sym_error) { - result = ts_tree_make_error(lex_result.size, lex_result.padding, - lex_result.first_unexpected_character); + + if (skipped_error) { + error_start_position = ts_length_min(error_start_position, self->lexer.token_start_position); + TSLength padding = ts_length_sub(error_start_position, start_position); + TSLength size = ts_length_sub(error_end_position, error_start_position); + ts_lexer_reset(&self->lexer, error_end_position); + result = ts_tree_make_error(size, padding, first_error_character); } else { - result = ts_tree_make_leaf( - lex_result.symbol, lex_result.padding, lex_result.size, - ts_language_symbol_metadata(self->language, lex_result.symbol)); - if (!result) - return NULL; - result->parse_state = parse_state; + TSSymbol symbol = self->lexer.result_symbol; + TSLength padding = ts_length_sub(self->lexer.token_start_position, start_position); + TSLength size = ts_length_sub(self->lexer.current_position, self->lexer.token_start_position); + result = ts_tree_make_leaf(symbol, padding, size, + ts_language_symbol_metadata(self->language, symbol)); } - result->first_leaf.lex_state = state; + if (!result) + return NULL; + result->parse_state = parse_state; + result->first_leaf.lex_state = start_state; return result; }