Allow lexer to accept tokens that ended at previous positions
* Track lookahead in each tree * Add 'mark_end' API that external scanners can use
This commit is contained in:
parent
12d2a9d93f
commit
d222dbb9fd
12 changed files with 96 additions and 71 deletions
|
|
@ -16,6 +16,8 @@
|
|||
|
||||
static const char empty_chunk[2] = { 0, 0 };
|
||||
|
||||
static Length unknown_length = {UINT32_MAX, 0, {0, 0}};
|
||||
|
||||
static void ts_lexer__get_chunk(Lexer *self) {
|
||||
TSInput input = self->input;
|
||||
if (!self->chunk ||
|
||||
|
|
@ -70,6 +72,11 @@ static void ts_lexer__advance(void *payload, bool skip) {
|
|||
ts_lexer__get_lookahead(self);
|
||||
}
|
||||
|
||||
static void ts_lexer__mark_end(void *payload) {
|
||||
Lexer *self = (Lexer *)payload;
|
||||
self->token_end_position = self->current_position;
|
||||
}
|
||||
|
||||
/*
|
||||
* The lexer's advance method is stored as a struct field so that generated
|
||||
* parsers can call it without needing to be linked against this library.
|
||||
|
|
@ -79,6 +86,7 @@ void ts_lexer_init(Lexer *self) {
|
|||
*self = (Lexer){
|
||||
.data = {
|
||||
.advance = ts_lexer__advance,
|
||||
.mark_end = ts_lexer__mark_end,
|
||||
.lookahead = 0,
|
||||
.result_symbol = 0,
|
||||
},
|
||||
|
|
@ -95,6 +103,7 @@ void ts_lexer_init(Lexer *self) {
|
|||
|
||||
static inline void ts_lexer__reset(Lexer *self, Length position) {
|
||||
self->token_start_position = position;
|
||||
self->token_end_position = unknown_length;
|
||||
self->current_position = position;
|
||||
|
||||
if (self->chunk && (position.bytes < self->chunk_start ||
|
||||
|
|
@ -122,6 +131,7 @@ void ts_lexer_reset(Lexer *self, Length position) {
|
|||
|
||||
void ts_lexer_start(Lexer *self) {
|
||||
self->token_start_position = self->current_position;
|
||||
self->token_end_position = unknown_length;
|
||||
self->data.result_symbol = 0;
|
||||
|
||||
if (!self->chunk)
|
||||
|
|
|
|||
|
|
@ -15,11 +15,11 @@ typedef struct {
|
|||
TSLexer data;
|
||||
Length current_position;
|
||||
Length token_start_position;
|
||||
Length token_end_position;
|
||||
|
||||
const char *chunk;
|
||||
uint32_t chunk_start;
|
||||
uint32_t chunk_size;
|
||||
|
||||
uint32_t lookahead_size;
|
||||
|
||||
TSInput input;
|
||||
|
|
|
|||
|
|
@ -279,7 +279,6 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
|
|||
if (skipped_error) {
|
||||
Length padding = length_sub(error_start_position, start_position);
|
||||
Length size = length_sub(error_end_position, error_start_position);
|
||||
ts_lexer_reset(&self->lexer, error_end_position);
|
||||
result = ts_tree_make_error(size, padding, first_error_character);
|
||||
} else {
|
||||
TSSymbol symbol = self->lexer.data.result_symbol;
|
||||
|
|
@ -287,8 +286,11 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
|
|||
symbol = self->language->external_scanner.symbol_map[symbol];
|
||||
}
|
||||
|
||||
if (length_has_unknown_chars(self->lexer.token_end_position)) {
|
||||
self->lexer.token_end_position = self->lexer.current_position;
|
||||
}
|
||||
Length padding = length_sub(self->lexer.token_start_position, start_position);
|
||||
Length size = length_sub(self->lexer.current_position, self->lexer.token_start_position);
|
||||
Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position);
|
||||
TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, symbol);
|
||||
result = ts_tree_make_leaf(symbol, padding, size, metadata);
|
||||
|
||||
|
|
@ -301,6 +303,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
|
|||
}
|
||||
}
|
||||
|
||||
result->bytes_scanned = self->lexer.current_position.bytes - start_position.bytes + 1;
|
||||
result->parse_state = parse_state;
|
||||
result->first_leaf.lex_mode = lex_mode;
|
||||
|
||||
|
|
|
|||
|
|
@ -155,7 +155,10 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
|
|||
if (i == 0) {
|
||||
self->padding = child->padding;
|
||||
self->size = child->size;
|
||||
self->bytes_scanned = child->bytes_scanned;
|
||||
} else {
|
||||
uint32_t bytes_scanned = ts_tree_total_bytes(self) + child->bytes_scanned;
|
||||
if (bytes_scanned > self->bytes_scanned) self->bytes_scanned = bytes_scanned;
|
||||
self->size = length_add(self->size, ts_tree_total_size(child));
|
||||
}
|
||||
|
||||
|
|
@ -344,6 +347,21 @@ static inline long min(long a, long b) {
|
|||
return a <= b ? a : b;
|
||||
}
|
||||
|
||||
bool ts_tree_invalidate_lookahead(Tree *self, uint32_t edit_byte_offset) {
|
||||
if (edit_byte_offset >= self->bytes_scanned) return false;
|
||||
self->has_changes = true;
|
||||
if (self->child_count > 0) {
|
||||
uint32_t child_start_byte = 0;
|
||||
for (uint32_t i = 0; i < self->child_count; i++) {
|
||||
Tree *child = self->children[i];
|
||||
if (child_start_byte > edit_byte_offset) break;
|
||||
ts_tree_invalidate_lookahead(child, edit_byte_offset - child_start_byte);
|
||||
child_start_byte += ts_tree_total_bytes(child);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void ts_tree_edit(Tree *self, const TSInputEdit *edit) {
|
||||
uint32_t old_end_byte = edit->start_byte + edit->bytes_removed;
|
||||
|
|
@ -390,29 +408,27 @@ void ts_tree_edit(Tree *self, const TSInputEdit *edit) {
|
|||
for (uint32_t i = 0; i < self->child_count; i++) {
|
||||
Tree *child = self->children[i];
|
||||
child_left = child_right;
|
||||
child_right = length_add(child_left, ts_tree_total_size(child));
|
||||
|
||||
if (!found_first_child) {
|
||||
child_right = length_add(child_left, ts_tree_total_size(child));
|
||||
if (child_right.bytes >= edit->start_byte) {
|
||||
found_first_child = true;
|
||||
TSInputEdit child_edit = {
|
||||
.start_byte = edit->start_byte - child_left.bytes,
|
||||
.bytes_added = edit->bytes_added,
|
||||
.bytes_removed = edit->bytes_removed,
|
||||
.start_point = point_sub(edit->start_point, child_left.extent),
|
||||
.extent_added = edit->extent_added,
|
||||
.extent_removed = edit->extent_removed,
|
||||
};
|
||||
if (!found_first_child && child_right.bytes >= edit->start_byte) {
|
||||
found_first_child = true;
|
||||
TSInputEdit child_edit = {
|
||||
.start_byte = edit->start_byte - child_left.bytes,
|
||||
.bytes_added = edit->bytes_added,
|
||||
.bytes_removed = edit->bytes_removed,
|
||||
.start_point = point_sub(edit->start_point, child_left.extent),
|
||||
.extent_added = edit->extent_added,
|
||||
.extent_removed = edit->extent_removed,
|
||||
};
|
||||
|
||||
if (old_end_byte > child_right.bytes) {
|
||||
child_edit.bytes_removed = child_right.bytes - edit->start_byte;
|
||||
child_edit.extent_removed = point_sub(child_right.extent, edit->start_point);
|
||||
remaining_bytes_to_delete = old_end_byte - child_right.bytes;
|
||||
remaining_extent_to_delete = point_sub(old_end_point, child_right.extent);
|
||||
}
|
||||
|
||||
ts_tree_edit(child, &child_edit);
|
||||
if (old_end_byte > child_right.bytes) {
|
||||
child_edit.bytes_removed = child_right.bytes - edit->start_byte;
|
||||
child_edit.extent_removed = point_sub(child_right.extent, edit->start_point);
|
||||
remaining_bytes_to_delete = old_end_byte - child_right.bytes;
|
||||
remaining_extent_to_delete = point_sub(old_end_point, child_right.extent);
|
||||
}
|
||||
|
||||
ts_tree_edit(child, &child_edit);
|
||||
} else if (remaining_bytes_to_delete > 0) {
|
||||
TSInputEdit child_edit = {
|
||||
.start_byte = 0,
|
||||
|
|
@ -425,6 +441,8 @@ void ts_tree_edit(Tree *self, const TSInputEdit *edit) {
|
|||
remaining_bytes_to_delete -= child_edit.bytes_removed;
|
||||
remaining_extent_to_delete = point_sub(remaining_extent_to_delete, child_edit.extent_removed);
|
||||
ts_tree_edit(child, &child_edit);
|
||||
} else {
|
||||
ts_tree_invalidate_lookahead(child, edit->start_byte - child_left.bytes);
|
||||
}
|
||||
|
||||
child_right = length_add(child_left, ts_tree_total_size(child));
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ typedef struct Tree {
|
|||
|
||||
Length padding;
|
||||
Length size;
|
||||
uint32_t bytes_scanned;
|
||||
|
||||
TSSymbol symbol;
|
||||
TSStateId parse_state;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue