Represent byte, char and tree counts as 32 bit numbers

The parser spends the majority of its time allocating and freeing trees and stack nodes.
Also, the memory footprint of the AST is a significant concern when using tree-sitter
with large files. This library is already unlikely to work very well with source files
larger than 4GB, so representing rows, columns, byte lengths and child indices as
unsigned 32 bit integers seems like the right choice.
This commit is contained in:
Max Brunsfeld 2016-11-14 12:15:24 -08:00
parent 11e767bd81
commit 535879a2bd
25 changed files with 268 additions and 263 deletions

View file

@ -33,9 +33,9 @@ static void ts_lexer__get_chunk(Lexer *self) {
}
static void ts_lexer__get_lookahead(Lexer *self) {
size_t position_in_chunk = self->current_position.bytes - self->chunk_start;
uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start;
const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
size_t size = self->chunk_size - position_in_chunk + 1;
uint32_t size = self->chunk_size - position_in_chunk + 1;
if (self->input.encoding == TSInputEncodingUTF8)
self->lookahead_size =
@ -120,7 +120,7 @@ void ts_lexer_reset(Lexer *self, Length position) {
}
void ts_lexer_start(Lexer *self, TSStateId lex_state) {
LOG("start_lex state:%d, pos:%lu", lex_state, self->current_position.chars);
LOG("start_lex state:%d, pos:%u", lex_state, self->current_position.chars);
self->token_start_position = self->current_position;
self->data.result_symbol = 0;