feat: implement a cache for get_column
This commit is contained in:
parent
a767f89967
commit
aaba7cd2f9
3 changed files with 84 additions and 22 deletions
|
|
@ -37,6 +37,35 @@ static const TSRange DEFAULT_RANGE = {
|
|||
.end_byte = UINT32_MAX
|
||||
};
|
||||
|
||||
/**
|
||||
* Sets the column data to the given value and marks it valid.
|
||||
* @param self The lexer state.
|
||||
* @param val The new value of the column data.
|
||||
*/
|
||||
static void ts_lexer__set_column_data(Lexer *self, uint32_t val) {
|
||||
self->column_data.valid = true;
|
||||
self->column_data.value = val;
|
||||
}
|
||||
|
||||
/**
|
||||
* Increments the value of the column data; no-op if invalid.
|
||||
* @param self The lexer state.
|
||||
*/
|
||||
static void ts_lexer__increment_column_data(Lexer *self) {
|
||||
if (self->column_data.valid) {
|
||||
self->column_data.value++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Marks the column data as invalid.
|
||||
* @param self The lexer state.
|
||||
*/
|
||||
static void ts_lexer__invalidate_column_data(Lexer *self) {
|
||||
self->column_data.valid = false;
|
||||
self->column_data.value = 0;
|
||||
}
|
||||
|
||||
// Check if the lexer has reached EOF. This state is stored
|
||||
// by setting the lexer's `current_included_range_index` such that
|
||||
// it has consumed all of its available ranges.
|
||||
|
|
@ -104,6 +133,10 @@ static void ts_lexer__get_lookahead(Lexer *self) {
|
|||
}
|
||||
|
||||
static void ts_lexer_goto(Lexer *self, Length position) {
|
||||
if (position.bytes != self->current_position.bytes) {
|
||||
ts_lexer__invalidate_column_data(self);
|
||||
}
|
||||
|
||||
self->current_position = position;
|
||||
|
||||
// Move to the first valid position at or after the given position.
|
||||
|
|
@ -156,16 +189,24 @@ static void ts_lexer_goto(Lexer *self, Length position) {
|
|||
}
|
||||
}
|
||||
|
||||
// Intended to be called only from functions that control logging.
|
||||
/**
|
||||
* Actually advances the lexer. Does not log anything.
|
||||
* @param self The lexer state.
|
||||
* @param skip Whether to mark the consumed codepoint as whitespace.
|
||||
*/
|
||||
static void ts_lexer__do_advance(Lexer *self, bool skip) {
|
||||
if (self->lookahead_size) {
|
||||
self->current_position.bytes += self->lookahead_size;
|
||||
if (self->data.lookahead == '\n') {
|
||||
self->current_position.extent.row++;
|
||||
self->current_position.extent.column = 0;
|
||||
ts_lexer__set_column_data(self, 0);
|
||||
} else {
|
||||
bool is_bom = self->current_position.bytes == 0 &&
|
||||
self->data.lookahead == BYTE_ORDER_MARK;
|
||||
if (!is_bom) ts_lexer__increment_column_data(self);
|
||||
self->current_position.extent.column += self->lookahead_size;
|
||||
}
|
||||
self->current_position.bytes += self->lookahead_size;
|
||||
}
|
||||
|
||||
const TSRange *current_range = &self->included_ranges[self->current_included_range_index];
|
||||
|
|
@ -249,27 +290,33 @@ static void ts_lexer__mark_end(TSLexer *_self) {
|
|||
static uint32_t ts_lexer__get_column(TSLexer *_self) {
|
||||
Lexer *self = (Lexer *)_self;
|
||||
|
||||
uint32_t goal_byte = self->current_position.bytes;
|
||||
|
||||
self->did_get_column = true;
|
||||
Length start_of_col = {
|
||||
self->current_position.bytes - self->current_position.extent.column,
|
||||
{self->current_position.extent.row, 0},
|
||||
};
|
||||
ts_lexer_goto(self, start_of_col);
|
||||
ts_lexer__get_chunk(self);
|
||||
|
||||
uint32_t result = 0;
|
||||
if (!ts_lexer__eof(_self)) {
|
||||
ts_lexer__get_lookahead(self);
|
||||
while (self->current_position.bytes < goal_byte && self->chunk) {
|
||||
result++;
|
||||
ts_lexer__do_advance(self, false);
|
||||
if (ts_lexer__eof(_self)) break;
|
||||
if (!self->column_data.valid) {
|
||||
// Record current position
|
||||
uint32_t goal_byte = self->current_position.bytes;
|
||||
|
||||
// Back up to the beginning of the line
|
||||
Length start_of_col = {
|
||||
self->current_position.bytes - self->current_position.extent.column,
|
||||
{self->current_position.extent.row, 0},
|
||||
};
|
||||
ts_lexer_goto(self, start_of_col);
|
||||
ts_lexer__set_column_data(self, 0);
|
||||
ts_lexer__get_chunk(self);
|
||||
|
||||
if (!ts_lexer__eof(_self)) {
|
||||
ts_lexer__get_lookahead(self);
|
||||
|
||||
// Advance to the recorded position
|
||||
while (self->current_position.bytes < goal_byte && !ts_lexer__eof(_self) && self->chunk) {
|
||||
ts_lexer__do_advance(self, false);
|
||||
if (ts_lexer__eof(_self)) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
return self->column_data.value;
|
||||
}
|
||||
|
||||
// Is the lexer at a boundary between two disjoint included ranges of
|
||||
|
|
@ -322,6 +369,11 @@ void ts_lexer_init(Lexer *self) {
|
|||
.included_ranges = NULL,
|
||||
.included_range_count = 0,
|
||||
.current_included_range_index = 0,
|
||||
.did_get_column = false,
|
||||
.column_data = {
|
||||
.valid = false,
|
||||
.value = 0
|
||||
}
|
||||
};
|
||||
ts_lexer_set_included_ranges(self, NULL, 0);
|
||||
}
|
||||
|
|
@ -352,10 +404,12 @@ void ts_lexer_start(Lexer *self) {
|
|||
if (!ts_lexer__eof(&self->data)) {
|
||||
if (!self->chunk_size) ts_lexer__get_chunk(self);
|
||||
if (!self->lookahead_size) ts_lexer__get_lookahead(self);
|
||||
if (
|
||||
self->current_position.bytes == 0 &&
|
||||
self->data.lookahead == BYTE_ORDER_MARK
|
||||
) ts_lexer__advance(&self->data, true);
|
||||
if (self->current_position.bytes == 0) {
|
||||
if (self->data.lookahead == BYTE_ORDER_MARK) {
|
||||
ts_lexer__advance(&self->data, true);
|
||||
}
|
||||
ts_lexer__set_column_data(self, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -10,6 +10,11 @@ extern "C" {
|
|||
#include "tree_sitter/api.h"
|
||||
#include "./parser.h"
|
||||
|
||||
typedef struct {
|
||||
uint32_t value;
|
||||
bool valid;
|
||||
} ColumnData;
|
||||
|
||||
typedef struct {
|
||||
TSLexer data;
|
||||
Length current_position;
|
||||
|
|
@ -27,6 +32,7 @@ typedef struct {
|
|||
uint32_t chunk_size;
|
||||
uint32_t lookahead_size;
|
||||
bool did_get_column;
|
||||
ColumnData column_data;
|
||||
|
||||
char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
|
||||
} Lexer;
|
||||
|
|
|
|||
|
|
@ -529,6 +529,7 @@ static Subtree ts_parser__lex(
|
|||
for (;;) {
|
||||
bool found_token = false;
|
||||
Length current_position = self->lexer.current_position;
|
||||
ColumnData column_data = self->lexer.column_data;
|
||||
|
||||
if (lex_mode.external_lex_state != 0) {
|
||||
LOG(
|
||||
|
|
@ -582,6 +583,7 @@ static Subtree ts_parser__lex(
|
|||
}
|
||||
|
||||
ts_lexer_reset(&self->lexer, current_position);
|
||||
self->lexer.column_data = column_data;
|
||||
}
|
||||
|
||||
LOG(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue