Remove length restriction in external scanner serialization API

This commit is contained in:
Max Brunsfeld 2017-07-17 17:12:36 -07:00
parent e355929a30
commit 9a04231ab1
12 changed files with 85 additions and 56 deletions

View file

@ -143,7 +143,8 @@ void ts_document_parse_with_options(TSDocument *self, TSParseOptions options) {
tree_path_init(&self->parser.tree_path1, old_tree);
tree_path_init(&self->parser.tree_path2, tree);
tree_path_get_changes(&self->parser.tree_path1, &self->parser.tree_path2,
options.changed_ranges, options.changed_range_count);
options.changed_ranges, options.changed_range_count,
self->parser.language);
}
ts_tree_release(old_tree);

View file

@ -5,9 +5,9 @@
#include "runtime/utf16.h"
#include "utf8proc.h"
#define LOG(...) \
if (self->logger.log) { \
snprintf(self->debug_buffer, TS_DEBUG_BUFFER_SIZE, __VA_ARGS__); \
#define LOG(...) \
if (self->logger.log) { \
snprintf(self->debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \
self->logger.log(self->logger.payload, TSLogTypeLex, self->debug_buffer); \
}

View file

@ -10,8 +10,6 @@ extern "C" {
#include "runtime/length.h"
#include "runtime/tree.h"
#define TS_DEBUG_BUFFER_SIZE 512
typedef struct {
TSLexer data;
Length current_position;
@ -25,7 +23,7 @@ typedef struct {
TSInput input;
TSLogger logger;
char debug_buffer[TS_DEBUG_BUFFER_SIZE];
char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
Tree *last_external_token;
} Lexer;

View file

@ -13,16 +13,16 @@
#include "runtime/reduce_action.h"
#include "runtime/error_costs.h"
#define LOG(...) \
if (self->lexer.logger.log) { \
snprintf(self->lexer.debug_buffer, TS_DEBUG_BUFFER_SIZE, __VA_ARGS__); \
self->lexer.logger.log(self->lexer.logger.payload, TSLogTypeParse, \
self->lexer.debug_buffer); \
} \
if (self->print_debugging_graphs) { \
fprintf(stderr, "graph {\nlabel=\""); \
fprintf(stderr, __VA_ARGS__); \
fprintf(stderr, "\"\n}\n\n"); \
#define LOG(...) \
if (self->lexer.logger.log) { \
snprintf(self->lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \
self->lexer.logger.log(self->lexer.logger.payload, TSLogTypeParse, \
self->lexer.debug_buffer); \
} \
if (self->print_debugging_graphs) { \
fprintf(stderr, "graph {\nlabel=\""); \
fprintf(stderr, __VA_ARGS__); \
fprintf(stderr, "\"\n}\n\n"); \
}
#define LOG_STACK() \
@ -233,10 +233,11 @@ static void parser__restore_external_scanner(Parser *self, Tree *external_token)
if (external_token) {
self->language->external_scanner.deserialize(
self->external_scanner_payload,
external_token->external_token_state
ts_external_token_state_data(&external_token->external_token_state),
external_token->external_token_state.length
);
} else {
self->language->external_scanner.reset(self->external_scanner_payload);
self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0);
}
}
@ -351,11 +352,11 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
if (found_external_token) {
result->has_external_tokens = true;
memset(result->external_token_state, 0, sizeof(TSExternalTokenState));
self->language->external_scanner.serialize(
unsigned length = self->language->external_scanner.serialize(
self->external_scanner_payload,
result->external_token_state
self->lexer.debug_buffer
);
ts_external_token_state_init(&result->external_token_state, self->lexer.debug_buffer, length);
ts_lexer_set_last_external_token(&self->lexer, result);
}
}
@ -876,8 +877,8 @@ static void parser__start(Parser *self, TSInput input, Tree *previous_tree) {
LOG("new_parse");
}
if (self->language->external_scanner.reset) {
self->language->external_scanner.reset(self->external_scanner_payload);
if (self->language->external_scanner.deserialize) {
self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0);
}
ts_lexer_set_input(&self->lexer, input);

View file

@ -612,13 +612,10 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
i, head->node, i, head->push_count, head->depth);
if (head->last_external_token) {
const TSExternalTokenState *s = &head->last_external_token->external_token_state;
fprintf(f,
"\nexternal_token_state: "
"%2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X",
(*s)[0], (*s)[1], (*s)[2], (*s)[3], (*s)[4], (*s)[5], (*s)[6], (*s)[7],
(*s)[8], (*s)[9], (*s)[10], (*s)[11], (*s)[12], (*s)[13], (*s)[14], (*s)[15]
);
TSExternalTokenState *state = &head->last_external_token->external_token_state;
const char *data = ts_external_token_state_data(state);
fprintf(f, "\nexternal_token_state:");
for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]);
}
fprintf(f, "\"]\n");

View file

@ -12,6 +12,36 @@
TSStateId TS_TREE_STATE_NONE = USHRT_MAX;
void ts_external_token_state_init(TSExternalTokenState *self, const char *content, unsigned length) {
self->length = length;
if (length > sizeof(self->short_data)) {
self->long_data = ts_malloc(length);
memcpy(self->long_data, content, length);
} else {
memcpy(self->short_data, content, length);
}
}
void ts_external_token_state_delete(TSExternalTokenState *self) {
if (self->length > sizeof(self->short_data)) {
ts_free(self->long_data);
}
}
const char *ts_external_token_state_data(const TSExternalTokenState *self) {
if (self->length > sizeof(self->short_data)) {
return self->long_data;
} else {
return self->short_data;
}
}
bool ts_external_token_state_eq(const TSExternalTokenState *a, const TSExternalTokenState *b) {
return a == b ||
(a->length == b->length &&
memcmp(ts_external_token_state_data(a), ts_external_token_state_data(b), a->length) == 0);
}
Tree *ts_tree_make_leaf(TSSymbol sym, Length padding, Length size,
TSSymbolMetadata metadata) {
Tree *result = ts_malloc(sizeof(Tree));
@ -258,9 +288,10 @@ recur:
Tree *last_child = self->children[self->child_count - 1];
ts_free(self->children);
ts_free(self);
self = last_child;
goto recur;
} else if (self->has_external_tokens) {
ts_external_token_state_delete(&self->external_token_state);
}
ts_free(self);
@ -553,19 +584,12 @@ void ts_tree_print_dot_graph(const Tree *self, const TSLanguage *language,
fprintf(f, "}\n");
}
TSExternalTokenState empty_state = {
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
};
TSExternalTokenState empty_state = {.length = 0, .short_data = {}};
bool ts_tree_external_token_state_eq(const Tree *self, const Tree *other) {
const TSExternalTokenState *state1 = &empty_state;
const TSExternalTokenState *state2 = &empty_state;
if (self && self->has_external_tokens) state1 = &self->external_token_state;
if (other && other->has_external_tokens) state2 = &other->external_token_state;
return
state1 == state2 ||
memcmp(state1, state2, sizeof(TSExternalTokenState)) == 0;
return ts_external_token_state_eq(state1, state2);
}

View file

@ -14,6 +14,17 @@ extern "C" {
extern TSStateId TS_TREE_STATE_NONE;
typedef struct {
union {
char *long_data;
char short_data[sizeof(char *) + sizeof(unsigned)];
};
unsigned length;
} TSExternalTokenState;
void ts_external_token_state_init(TSExternalTokenState *, const char *, unsigned);
const char *ts_external_token_state_data(const TSExternalTokenState *);
typedef struct Tree {
struct {
struct Tree *parent;
@ -25,10 +36,10 @@ typedef struct Tree {
uint32_t child_count;
union {
struct {
struct Tree **children;
uint32_t visible_child_count;
uint32_t named_child_count;
unsigned short rename_sequence_id;
struct Tree **children;
};
TSExternalTokenState external_token_state;
int32_t lookahead_char;