Move logic for lexical error handling outside of lexer functions

This way, less logic needs to be exposed in parser.h
This commit is contained in:
Max Brunsfeld 2016-09-03 23:40:57 -07:00
parent 1c52c30111
commit 4f0c83ba01
5 changed files with 47 additions and 85 deletions

View file

@ -28,19 +28,11 @@ typedef struct {
bool structural : 1;
} TSSymbolMetadata;
typedef enum {
TSTransitionTypeMain,
TSTransitionTypeSeparator,
TSTransitionTypeError,
} TSTransitionType;
typedef struct TSLexer {
void (*advance)(struct TSLexer *, TSStateId, TSTransitionType);
void (*advance)(struct TSLexer *, TSStateId, bool);
TSLength current_position;
TSLength token_end_position;
TSLength token_start_position;
TSLength error_end_position;
const char *chunk;
size_t chunk_start;
@ -48,10 +40,7 @@ typedef struct TSLexer {
size_t lookahead_size;
int32_t lookahead;
TSStateId starting_state;
TSSymbol result_symbol;
bool result_follows_error;
int32_t first_unexpected_character;
TSInput input;
TSDebugger debugger;
@ -108,14 +97,14 @@ struct TSLanguage {
#define ADVANCE(state_value) \
{ \
lexer->advance(lexer, state_value, TSTransitionTypeMain); \
lexer->advance(lexer, state_value, false); \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
lexer->advance(lexer, state_value, TSTransitionTypeSeparator); \
lexer->advance(lexer, state_value, true); \
state = state_value; \
goto next_state; \
}

View file

@ -14,6 +14,10 @@ static inline void ts_length_set_unknown(TSLength *self) {
self->columns = 0;
}
static inline TSLength ts_length_min(TSLength len1, TSLength len2) {
return (len1.chars < len2.chars) ? len1 : len2;
}
static inline TSLength ts_length_add(TSLength len1, TSLength len2) {
TSLength result;
result.chars = len1.chars + len2.chars;

View file

@ -47,8 +47,7 @@ static void ts_lexer__get_lookahead(TSLexer *self) {
LOG_LOOKAHEAD();
}
static void ts_lexer__advance(TSLexer *self, TSStateId state,
TSTransitionType transition_type) {
static void ts_lexer__advance(TSLexer *self, TSStateId state, bool skip) {
if (self->chunk == empty_chunk)
return;
@ -64,25 +63,11 @@ static void ts_lexer__advance(TSLexer *self, TSStateId state,
}
}
switch (transition_type) {
case TSTransitionTypeSeparator:
if (self->result_follows_error) {
LOG("skip_error state:%d", state);
} else {
LOG("skip_separator state:%d", state);
self->token_start_position = self->current_position;
}
break;
case TSTransitionTypeError:
LOG("skip_error state:%d", state);
self->result_follows_error = true;
self->error_end_position = self->current_position;
if (!self->first_unexpected_character)
self->first_unexpected_character = self->lookahead;
break;
default:
LOG("advance state:%d", state);
break;
if (skip) {
LOG("skip_separator state:%d", state);
self->token_start_position = self->current_position;
} else {
LOG("advance state:%d", state);
}
if (self->current_position.bytes >= self->chunk_start + self->chunk_size)
@ -108,7 +93,6 @@ void ts_lexer_init(TSLexer *self) {
static inline void ts_lexer__reset(TSLexer *self, TSLength position) {
self->token_start_position = position;
self->token_end_position = position;
self->current_position = position;
self->chunk = 0;
@ -132,32 +116,11 @@ void ts_lexer_reset(TSLexer *self, TSLength position) {
void ts_lexer_start(TSLexer *self, TSStateId lex_state) {
LOG("start_lex state:%d, pos:%lu", lex_state, self->current_position.chars);
self->starting_state = lex_state;
self->token_start_position = self->current_position;
self->result_follows_error = false;
self->result_symbol = 0;
self->first_unexpected_character = 0;
if (!self->chunk)
ts_lexer__get_chunk(self);
if (!self->lookahead_size)
ts_lexer__get_lookahead(self);
}
void ts_lexer_finish(TSLexer *self, TSLexerResult *result) {
result->padding =
ts_length_sub(self->token_start_position, self->token_end_position);
if (self->result_follows_error) {
result->symbol = ts_builtin_sym_error;
result->size =
ts_length_sub(self->error_end_position, self->token_start_position);
result->first_unexpected_character = self->first_unexpected_character;
ts_lexer_reset(self, self->error_end_position);
} else {
result->symbol = self->result_symbol;
result->size =
ts_length_sub(self->current_position, self->token_start_position);
self->token_end_position = self->current_position;
}
}

View file

@ -7,18 +7,10 @@ extern "C" {
#include "tree_sitter/parser.h"
typedef struct {
TSSymbol symbol;
TSLength padding;
TSLength size;
int32_t first_unexpected_character;
} TSLexerResult;
void ts_lexer_init(TSLexer *);
void ts_lexer_set_input(TSLexer *, TSInput);
void ts_lexer_reset(TSLexer *, TSLength);
void ts_lexer_start(TSLexer *, TSStateId);
void ts_lexer_finish(TSLexer *, TSLexerResult *);
#ifdef __cplusplus
}

View file

@ -243,18 +243,23 @@ static bool parser__condense_stack(Parser *self) {
}
static TSTree *parser__lex(Parser *self, TSStateId parse_state) {
TSStateId state = self->language->lex_states[parse_state];
LOG("lex state:%d", state);
TSStateId start_state = self->language->lex_states[parse_state];
TSStateId current_state = start_state;
TSLength start_position = self->lexer.current_position;
TSLength position = start_position;
LOG("lex state:%d", start_state);
TSStateId current_state = state;
TSLength position = self->lexer.current_position;
ts_lexer_start(&self->lexer, state);
bool skipped_error = false;
int32_t first_error_character = 0;
TSLength error_start_position, error_end_position;
ts_lexer_start(&self->lexer, start_state);
while (!self->language->lex_fn(&self->lexer, current_state)) {
if (current_state != TS_STATE_ERROR) {
LOG("retry_in_error_mode");
ts_lexer_reset(&self->lexer, position);
ts_lexer_start(&self->lexer, state);
ts_lexer_start(&self->lexer, start_state);
current_state = TS_STATE_ERROR;
continue;
}
@ -265,30 +270,39 @@ static TSTree *parser__lex(Parser *self, TSStateId parse_state) {
}
if (self->lexer.current_position.chars == position.chars) {
self->lexer.advance(&self->lexer, TS_STATE_ERROR, TSTransitionTypeError);
if (!skipped_error) {
error_start_position = self->lexer.current_position;
first_error_character = self->lexer.lookahead;
}
skipped_error = true;
self->lexer.advance(&self->lexer, TS_STATE_ERROR, false);
error_end_position = self->lexer.current_position;
}
position = self->lexer.current_position;
}
TSLexerResult lex_result;
ts_lexer_finish(&self->lexer, &lex_result);
TSTree *result;
if (lex_result.symbol == ts_builtin_sym_error) {
result = ts_tree_make_error(lex_result.size, lex_result.padding,
lex_result.first_unexpected_character);
if (skipped_error) {
error_start_position = ts_length_min(error_start_position, self->lexer.token_start_position);
TSLength padding = ts_length_sub(error_start_position, start_position);
TSLength size = ts_length_sub(error_end_position, error_start_position);
ts_lexer_reset(&self->lexer, error_end_position);
result = ts_tree_make_error(size, padding, first_error_character);
} else {
result = ts_tree_make_leaf(
lex_result.symbol, lex_result.padding, lex_result.size,
ts_language_symbol_metadata(self->language, lex_result.symbol));
if (!result)
return NULL;
result->parse_state = parse_state;
TSSymbol symbol = self->lexer.result_symbol;
TSLength padding = ts_length_sub(self->lexer.token_start_position, start_position);
TSLength size = ts_length_sub(self->lexer.current_position, self->lexer.token_start_position);
result = ts_tree_make_leaf(symbol, padding, size,
ts_language_symbol_metadata(self->language, symbol));
}
result->first_leaf.lex_state = state;
if (!result)
return NULL;
result->parse_state = parse_state;
result->first_leaf.lex_state = start_state;
return result;
}