Create separate lexer function for keywords

This commit is contained in:
Max Brunsfeld 2018-03-07 11:56:59 -08:00
parent 16cdd2ffbe
commit c0cc35ff07
11 changed files with 231 additions and 114 deletions

View file

@ -358,9 +358,6 @@ static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_sta
);
ts_lexer_start(&self->lexer);
if (self->language->lex_fn(&self->lexer.data, lex_mode.lex_state)) {
if (length_is_undefined(self->lexer.token_end_position)) {
self->lexer.token_end_position = self->lexer.current_position;
}
break;
}
@ -398,23 +395,39 @@ static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_sta
error_end_position = self->lexer.current_position;
}
if (self->lexer.current_position.bytes > last_byte_scanned) {
last_byte_scanned = self->lexer.current_position.bytes;
}
Tree *result;
if (skipped_error) {
Length padding = length_sub(error_start_position, start_position);
Length size = length_sub(error_end_position, error_start_position);
result = ts_tree_make_error(&self->tree_pool, size, padding, first_error_character, self->language);
} else {
TSSymbol symbol = self->lexer.data.result_symbol;
if (found_external_token) {
symbol = self->language->external_scanner.symbol_map[symbol];
}
if (self->lexer.token_end_position.bytes < self->lexer.token_start_position.bytes) {
self->lexer.token_start_position = self->lexer.token_end_position;
}
TSSymbol symbol = self->lexer.data.result_symbol;
Length padding = length_sub(self->lexer.token_start_position, start_position);
Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position);
if (found_external_token) {
symbol = self->language->external_scanner.symbol_map[symbol];
} else if (symbol == self->language->keyword_capture_token && symbol != 0) {
uint32_t end_byte = self->lexer.token_end_position.bytes;
ts_lexer_reset(&self->lexer, self->lexer.token_start_position);
ts_lexer_start(&self->lexer);
if (
self->language->keyword_lex_fn(&self->lexer.data, 0) &&
self->lexer.token_end_position.bytes == end_byte &&
ts_language_has_actions(self->language, parse_state, self->lexer.data.result_symbol)
) {
symbol = self->lexer.data.result_symbol;
}
}
result = ts_tree_make_leaf(&self->tree_pool, symbol, padding, size, self->language);
if (found_external_token) {
@ -427,9 +440,6 @@ static Tree *parser__lex(Parser *self, StackVersion version, TSStateId parse_sta
}
}
if (self->lexer.current_position.bytes > last_byte_scanned) {
last_byte_scanned = self->lexer.current_position.bytes;
}
result->bytes_scanned = last_byte_scanned - start_position.bytes + 1;
result->parse_state = parse_state;
result->first_leaf.lex_mode = lex_mode;
@ -466,7 +476,9 @@ static bool parser__can_reuse_first_leaf(Parser *self, TSStateId state, Tree *tr
TSLexMode current_lex_mode = self->language->lex_modes[state];
return
(tree->first_leaf.lex_mode.lex_state == current_lex_mode.lex_state &&
tree->first_leaf.lex_mode.external_lex_state == current_lex_mode.external_lex_state) ||
tree->first_leaf.lex_mode.external_lex_state == current_lex_mode.external_lex_state &&
(tree->first_leaf.symbol != self->language->keyword_capture_token ||
tree->parse_state == state)) ||
(current_lex_mode.external_lex_state == 0 &&
tree->size.bytes > 0 &&
table_entry->is_reusable &&