Allow lexer to accept tokens that ended at previous positions

* Track lookahead in each tree
* Add 'mark_end' API that external scanners can use
This commit is contained in:
Max Brunsfeld 2017-03-13 17:03:47 -07:00
parent 12d2a9d93f
commit d222dbb9fd
12 changed files with 96 additions and 71 deletions

View file

@ -16,6 +16,8 @@
static const char empty_chunk[2] = { 0, 0 };
static Length unknown_length = {UINT32_MAX, 0, {0, 0}};
static void ts_lexer__get_chunk(Lexer *self) {
TSInput input = self->input;
if (!self->chunk ||
@ -70,6 +72,11 @@ static void ts_lexer__advance(void *payload, bool skip) {
ts_lexer__get_lookahead(self);
}
static void ts_lexer__mark_end(void *payload) {
Lexer *self = (Lexer *)payload;
self->token_end_position = self->current_position;
}
/*
* The lexer's advance method is stored as a struct field so that generated
* parsers can call it without needing to be linked against this library.
@ -79,6 +86,7 @@ void ts_lexer_init(Lexer *self) {
*self = (Lexer){
.data = {
.advance = ts_lexer__advance,
.mark_end = ts_lexer__mark_end,
.lookahead = 0,
.result_symbol = 0,
},
@ -95,6 +103,7 @@ void ts_lexer_init(Lexer *self) {
static inline void ts_lexer__reset(Lexer *self, Length position) {
self->token_start_position = position;
self->token_end_position = unknown_length;
self->current_position = position;
if (self->chunk && (position.bytes < self->chunk_start ||
@ -122,6 +131,7 @@ void ts_lexer_reset(Lexer *self, Length position) {
void ts_lexer_start(Lexer *self) {
self->token_start_position = self->current_position;
self->token_end_position = unknown_length;
self->data.result_symbol = 0;
if (!self->chunk)

View file

@ -15,11 +15,11 @@ typedef struct {
TSLexer data;
Length current_position;
Length token_start_position;
Length token_end_position;
const char *chunk;
uint32_t chunk_start;
uint32_t chunk_size;
uint32_t lookahead_size;
TSInput input;

View file

@ -279,7 +279,6 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
if (skipped_error) {
Length padding = length_sub(error_start_position, start_position);
Length size = length_sub(error_end_position, error_start_position);
ts_lexer_reset(&self->lexer, error_end_position);
result = ts_tree_make_error(size, padding, first_error_character);
} else {
TSSymbol symbol = self->lexer.data.result_symbol;
@ -287,8 +286,11 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
symbol = self->language->external_scanner.symbol_map[symbol];
}
if (length_has_unknown_chars(self->lexer.token_end_position)) {
self->lexer.token_end_position = self->lexer.current_position;
}
Length padding = length_sub(self->lexer.token_start_position, start_position);
Length size = length_sub(self->lexer.current_position, self->lexer.token_start_position);
Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position);
TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, symbol);
result = ts_tree_make_leaf(symbol, padding, size, metadata);
@ -301,6 +303,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
}
}
result->bytes_scanned = self->lexer.current_position.bytes - start_position.bytes + 1;
result->parse_state = parse_state;
result->first_leaf.lex_mode = lex_mode;

View file

@ -155,7 +155,10 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
if (i == 0) {
self->padding = child->padding;
self->size = child->size;
self->bytes_scanned = child->bytes_scanned;
} else {
uint32_t bytes_scanned = ts_tree_total_bytes(self) + child->bytes_scanned;
if (bytes_scanned > self->bytes_scanned) self->bytes_scanned = bytes_scanned;
self->size = length_add(self->size, ts_tree_total_size(child));
}
@ -344,6 +347,21 @@ static inline long min(long a, long b) {
return a <= b ? a : b;
}
bool ts_tree_invalidate_lookahead(Tree *self, uint32_t edit_byte_offset) {
if (edit_byte_offset >= self->bytes_scanned) return false;
self->has_changes = true;
if (self->child_count > 0) {
uint32_t child_start_byte = 0;
for (uint32_t i = 0; i < self->child_count; i++) {
Tree *child = self->children[i];
if (child_start_byte > edit_byte_offset) break;
ts_tree_invalidate_lookahead(child, edit_byte_offset - child_start_byte);
child_start_byte += ts_tree_total_bytes(child);
}
}
return true;
}
void ts_tree_edit(Tree *self, const TSInputEdit *edit) {
uint32_t old_end_byte = edit->start_byte + edit->bytes_removed;
@ -390,29 +408,27 @@ void ts_tree_edit(Tree *self, const TSInputEdit *edit) {
for (uint32_t i = 0; i < self->child_count; i++) {
Tree *child = self->children[i];
child_left = child_right;
child_right = length_add(child_left, ts_tree_total_size(child));
if (!found_first_child) {
child_right = length_add(child_left, ts_tree_total_size(child));
if (child_right.bytes >= edit->start_byte) {
found_first_child = true;
TSInputEdit child_edit = {
.start_byte = edit->start_byte - child_left.bytes,
.bytes_added = edit->bytes_added,
.bytes_removed = edit->bytes_removed,
.start_point = point_sub(edit->start_point, child_left.extent),
.extent_added = edit->extent_added,
.extent_removed = edit->extent_removed,
};
if (!found_first_child && child_right.bytes >= edit->start_byte) {
found_first_child = true;
TSInputEdit child_edit = {
.start_byte = edit->start_byte - child_left.bytes,
.bytes_added = edit->bytes_added,
.bytes_removed = edit->bytes_removed,
.start_point = point_sub(edit->start_point, child_left.extent),
.extent_added = edit->extent_added,
.extent_removed = edit->extent_removed,
};
if (old_end_byte > child_right.bytes) {
child_edit.bytes_removed = child_right.bytes - edit->start_byte;
child_edit.extent_removed = point_sub(child_right.extent, edit->start_point);
remaining_bytes_to_delete = old_end_byte - child_right.bytes;
remaining_extent_to_delete = point_sub(old_end_point, child_right.extent);
}
ts_tree_edit(child, &child_edit);
if (old_end_byte > child_right.bytes) {
child_edit.bytes_removed = child_right.bytes - edit->start_byte;
child_edit.extent_removed = point_sub(child_right.extent, edit->start_point);
remaining_bytes_to_delete = old_end_byte - child_right.bytes;
remaining_extent_to_delete = point_sub(old_end_point, child_right.extent);
}
ts_tree_edit(child, &child_edit);
} else if (remaining_bytes_to_delete > 0) {
TSInputEdit child_edit = {
.start_byte = 0,
@ -425,6 +441,8 @@ void ts_tree_edit(Tree *self, const TSInputEdit *edit) {
remaining_bytes_to_delete -= child_edit.bytes_removed;
remaining_extent_to_delete = point_sub(remaining_extent_to_delete, child_edit.extent_removed);
ts_tree_edit(child, &child_edit);
} else {
ts_tree_invalidate_lookahead(child, edit->start_byte - child_left.bytes);
}
child_right = length_add(child_left, ts_tree_total_size(child));

View file

@ -34,6 +34,7 @@ typedef struct Tree {
Length padding;
Length size;
uint32_t bytes_scanned;
TSSymbol symbol;
TSStateId parse_state;