Represent byte, char and tree counts as 32 bit numbers

The parser spends the majority of its time allocating and freeing trees and stack nodes.
Also, the memory footprint of the AST is a significant concern when using tree-sitter
with large files. This library is already unlikely to work very well with source files
larger than 4GB, so representing rows, columns, byte lengths and child indices as
unsigned 32 bit integers seems like the right choice.
This commit is contained in:
Max Brunsfeld 2016-11-14 12:15:24 -08:00
parent 11e767bd81
commit 535879a2bd
25 changed files with 268 additions and 263 deletions

View file

@ -58,8 +58,8 @@ typedef union {
} TSParseActionEntry;
typedef struct TSLanguage {
size_t symbol_count;
size_t token_count;
uint32_t symbol_count;
uint32_t token_count;
const char **symbol_names;
const TSSymbolMetadata *symbol_metadata;
const unsigned short *parse_table;

View file

@ -6,6 +6,7 @@ extern "C" {
#endif
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
typedef unsigned short TSSymbol;
@ -19,8 +20,8 @@ typedef enum {
typedef struct {
void *payload;
const char *(*read)(void *payload, size_t *bytes_read);
int (*seek)(void *payload, size_t character_index, size_t byte_index);
const char *(*read)(void *payload, uint32_t *bytes_read);
int (*seek)(void *payload, uint32_t character_index, uint32_t byte_index);
TSInputEncoding encoding;
} TSInput;
@ -35,14 +36,14 @@ typedef struct {
} TSLogger;
typedef struct {
size_t row;
size_t column;
uint32_t row;
uint32_t column;
} TSPoint;
typedef struct {
size_t start_byte;
size_t bytes_removed;
size_t bytes_added;
uint32_t start_byte;
uint32_t bytes_removed;
uint32_t bytes_added;
TSPoint start_point;
TSPoint extent_removed;
TSPoint extent_added;
@ -55,7 +56,7 @@ typedef struct {
typedef struct {
const void *data;
size_t offset[3];
uint32_t offset[3];
} TSNode;
typedef struct {
@ -64,11 +65,11 @@ typedef struct {
void *data;
} TSSymbolIterator;
size_t ts_node_start_char(TSNode);
size_t ts_node_start_byte(TSNode);
uint32_t ts_node_start_char(TSNode);
uint32_t ts_node_start_byte(TSNode);
TSPoint ts_node_start_point(TSNode);
size_t ts_node_end_char(TSNode);
size_t ts_node_end_byte(TSNode);
uint32_t ts_node_end_char(TSNode);
uint32_t ts_node_end_byte(TSNode);
TSPoint ts_node_end_point(TSNode);
TSSymbol ts_node_symbol(TSNode);
TSSymbolIterator ts_node_symbols(TSNode);
@ -79,18 +80,18 @@ bool ts_node_eq(TSNode, TSNode);
bool ts_node_is_named(TSNode);
bool ts_node_has_changes(TSNode);
TSNode ts_node_parent(TSNode);
TSNode ts_node_child(TSNode, size_t);
TSNode ts_node_named_child(TSNode, size_t);
size_t ts_node_child_count(TSNode);
size_t ts_node_named_child_count(TSNode);
TSNode ts_node_child(TSNode, uint32_t);
TSNode ts_node_named_child(TSNode, uint32_t);
uint32_t ts_node_child_count(TSNode);
uint32_t ts_node_named_child_count(TSNode);
TSNode ts_node_next_sibling(TSNode);
TSNode ts_node_next_named_sibling(TSNode);
TSNode ts_node_prev_sibling(TSNode);
TSNode ts_node_prev_named_sibling(TSNode);
TSNode ts_node_descendant_for_char_range(TSNode, size_t, size_t);
TSNode ts_node_named_descendant_for_char_range(TSNode, size_t, size_t);
TSNode ts_node_descendant_for_byte_range(TSNode, size_t, size_t);
TSNode ts_node_named_descendant_for_byte_range(TSNode, size_t, size_t);
TSNode ts_node_descendant_for_char_range(TSNode, uint32_t, uint32_t);
TSNode ts_node_named_descendant_for_char_range(TSNode, uint32_t, uint32_t);
TSNode ts_node_descendant_for_byte_range(TSNode, uint32_t, uint32_t);
TSNode ts_node_named_descendant_for_byte_range(TSNode, uint32_t, uint32_t);
TSNode ts_node_descendant_for_point_range(TSNode, TSPoint, TSPoint);
TSNode ts_node_named_descendant_for_point_range(TSNode, TSPoint, TSPoint);
@ -106,12 +107,12 @@ void ts_document_set_logger(TSDocument *, TSLogger);
void ts_document_print_debugging_graphs(TSDocument *, bool);
void ts_document_edit(TSDocument *, TSInputEdit);
void ts_document_parse(TSDocument *);
void ts_document_parse_and_get_changed_ranges(TSDocument *, TSRange **, size_t *);
void ts_document_parse_and_get_changed_ranges(TSDocument *, TSRange **, uint32_t *);
void ts_document_invalidate(TSDocument *);
TSNode ts_document_root_node(const TSDocument *);
size_t ts_document_parse_count(const TSDocument *);
uint32_t ts_document_parse_count(const TSDocument *);
size_t ts_language_symbol_count(const TSLanguage *);
uint32_t ts_language_symbol_count(const TSLanguage *);
const char *ts_language_symbol_name(const TSLanguage *, TSSymbol);
#ifdef __cplusplus