Rework API completely

This commit is contained in:
Max Brunsfeld 2018-05-10 22:22:37 -07:00
parent 33f7643040
commit e75ecd1bb1
31 changed files with 841 additions and 1075 deletions

View file

@ -5,6 +5,7 @@
extern "C" {
#endif
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
@ -13,7 +14,8 @@ extern "C" {
typedef unsigned short TSSymbol;
typedef struct TSLanguage TSLanguage;
typedef struct TSDocument TSDocument;
typedef struct TSParser TSParser;
typedef struct TSTree TSTree;
typedef struct TSTreeCursor TSTreeCursor;
typedef enum {
@ -65,17 +67,29 @@ typedef struct {
typedef struct {
const void *subtree;
const TSDocument *document;
const TSTree *tree;
TSPoint position;
uint32_t byte;
TSSymbol alias_symbol;
} TSNode;
typedef struct {
TSRange **changed_ranges;
uint32_t *changed_range_count;
bool halt_on_error;
} TSParseOptions;
TSParser *ts_parser_new();
void ts_parser_delete(TSParser *);
const TSLanguage *ts_parser_language(const TSParser *);
bool ts_parser_set_language(TSParser *, const TSLanguage *);
TSLogger ts_parser_logger(const TSParser *);
void ts_parser_set_logger(TSParser *, TSLogger);
void ts_parser_print_debugging_graphs(TSParser *, bool);
void ts_parser_halt_on_error(TSParser *, bool);
TSTree *ts_parser_parse(TSParser *, const TSTree *, TSInput);
TSTree *ts_parser_parse_string(TSParser *, const TSTree *, const char *, uint32_t);
TSTree *ts_tree_copy(const TSTree *);
void ts_tree_delete(const TSTree *);
TSNode ts_tree_root_node(const TSTree *);
void ts_tree_edit(TSTree *, const TSInputEdit *);
TSRange *ts_tree_get_changed_ranges(const TSTree *, const TSTree *, uint32_t *);
void ts_tree_print_dot_graph(const TSTree *, FILE *);
uint32_t ts_node_start_byte(TSNode);
TSPoint ts_node_start_point(TSNode);
@ -105,26 +119,7 @@ TSNode ts_node_named_descendant_for_byte_range(TSNode, uint32_t, uint32_t);
TSNode ts_node_descendant_for_point_range(TSNode, TSPoint, TSPoint);
TSNode ts_node_named_descendant_for_point_range(TSNode, TSPoint, TSPoint);
TSDocument *ts_document_new();
void ts_document_free(TSDocument *);
const TSLanguage *ts_document_language(TSDocument *);
void ts_document_set_language(TSDocument *, const TSLanguage *);
TSInput ts_document_input(TSDocument *);
void ts_document_set_input(TSDocument *, TSInput);
void ts_document_set_input_string(TSDocument *, const char *);
void ts_document_set_input_string_with_length(TSDocument *, const char *, uint32_t);
TSLogger ts_document_logger(const TSDocument *);
void ts_document_set_logger(TSDocument *, TSLogger);
void ts_document_print_debugging_graphs(TSDocument *, bool);
void ts_document_edit(TSDocument *, TSInputEdit);
void ts_document_parse(TSDocument *);
void ts_document_parse_and_get_changed_ranges(TSDocument *, TSRange **, uint32_t *);
void ts_document_parse_with_options(TSDocument *, TSParseOptions);
void ts_document_invalidate(TSDocument *);
TSNode ts_document_root_node(const TSDocument *);
TSTreeCursor *ts_document_tree_cursor(const TSDocument *);
uint32_t ts_document_parse_count(const TSDocument *);
TSTreeCursor *ts_tree_cursor_new(const TSTree *);
void ts_tree_cursor_delete(TSTreeCursor *);
bool ts_tree_cursor_goto_first_child(TSTreeCursor *);
bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *);

View file

@ -87,7 +87,6 @@
'externals/utf8proc',
],
'sources': [
'src/runtime/document.c',
'src/runtime/get_changed_ranges.c',
'src/runtime/language.c',
'src/runtime/lexer.c',
@ -96,6 +95,7 @@
'src/runtime/parser.c',
'src/runtime/string_input.c',
'src/runtime/subtree.c',
'src/runtime/tree.c',
'src/runtime/tree_cursor.c',
'src/runtime/utf16.c',
'externals/utf8proc/utf8proc.c',

View file

@ -1,188 +0,0 @@
#include "runtime/alloc.h"
#include "runtime/subtree.h"
#include "runtime/parser.h"
#include "runtime/string_input.h"
#include "runtime/document.h"
#include "runtime/tree_cursor.h"
#include "runtime/get_changed_ranges.h"
#define LOG(...) \
snprintf(self->parser.lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \
self->parser.lexer.logger.log(self->parser.lexer.logger.payload, TSLogTypeLex, self->parser.lexer.debug_buffer); \
TSDocument *ts_document_new() {
TSDocument *self = ts_calloc(1, sizeof(TSDocument));
ts_parser_init(&self->parser);
array_init(&self->cursor1.stack);
array_init(&self->cursor2.stack);
return self;
}
void ts_document_free(TSDocument *self) {
if (self->tree) ts_subtree_release(&self->parser.tree_pool, self->tree);
if (self->cursor1.stack.contents) array_delete(&self->cursor1.stack);
if (self->cursor2.stack.contents) array_delete(&self->cursor2.stack);
ts_parser_destroy(&self->parser);
ts_document_set_input(self, (TSInput){
NULL,
NULL,
NULL,
TSInputEncodingUTF8,
});
ts_free(self);
}
const TSLanguage *ts_document_language(TSDocument *self) {
return self->parser.language;
}
void ts_document_set_language(TSDocument *self, const TSLanguage *language) {
if (language->version != TREE_SITTER_LANGUAGE_VERSION) return;
ts_document_invalidate(self);
ts_parser_set_language(&self->parser, language);
if (self->tree) {
ts_subtree_release(&self->parser.tree_pool, self->tree);
self->tree = NULL;
}
}
TSLogger ts_document_logger(const TSDocument *self) {
return self->parser.lexer.logger;
}
void ts_document_set_logger(TSDocument *self, TSLogger logger) {
self->parser.lexer.logger = logger;
}
void ts_document_print_debugging_graphs(TSDocument *self, bool should_print) {
self->parser.print_debugging_graphs = should_print;
}
TSInput ts_document_input(TSDocument *self) {
return self->input;
}
void ts_document_set_input(TSDocument *self, TSInput input) {
if (self->owns_input)
ts_free(self->input.payload);
self->input = input;
self->owns_input = false;
}
void ts_document_set_input_string(TSDocument *self, const char *text) {
ts_document_invalidate(self);
TSInput input = ts_string_input_make(text);
ts_document_set_input(self, input);
if (input.payload) {
self->owns_input = true;
}
}
void ts_document_set_input_string_with_length(TSDocument *self, const char *text, uint32_t length) {
ts_document_invalidate(self);
TSInput input = ts_string_input_make_with_length(text, length);
ts_document_set_input(self, input);
if (input.payload) {
self->owns_input = true;
}
}
void ts_document_edit(TSDocument *self, TSInputEdit edit) {
if (!self->tree)
return;
uint32_t max_bytes = ts_subtree_total_bytes(self->tree);
if (edit.start_byte > max_bytes)
return;
if (edit.bytes_removed > max_bytes - edit.start_byte)
edit.bytes_removed = max_bytes - edit.start_byte;
self->tree = ts_subtree_edit(self->tree, &edit, &self->parser.tree_pool);
if (self->parser.print_debugging_graphs) {
ts_subtree_print_dot_graph(self->tree, self->parser.language, stderr);
}
}
void ts_document_parse(TSDocument *self) {
ts_document_parse_with_options(self, (TSParseOptions){
.halt_on_error = false,
.changed_ranges = NULL,
.changed_range_count = NULL,
});
}
void ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges,
uint32_t *range_count) {
ts_document_parse_with_options(self, (TSParseOptions){
.halt_on_error = false,
.changed_ranges = ranges,
.changed_range_count = range_count,
});
}
void ts_document_parse_with_options(TSDocument *self, TSParseOptions options) {
if (options.changed_ranges && options.changed_range_count) {
*options.changed_ranges = NULL;
*options.changed_range_count = 0;
}
if (!self->input.read || !self->parser.language)
return;
Subtree *reusable_tree = self->valid ? self->tree : NULL;
if (reusable_tree && !reusable_tree->has_changes)
return;
Subtree *tree = ts_parser_parse(&self->parser, self->input, reusable_tree, options.halt_on_error);
if (self->tree) {
Subtree *old_tree = self->tree;
self->tree = tree;
if (options.changed_ranges && options.changed_range_count) {
*options.changed_range_count = ts_subtree_get_changed_ranges(
old_tree, tree, &self->cursor1, &self->cursor2,
self->parser.language, options.changed_ranges
);
if (self->parser.lexer.logger.log) {
for (unsigned i = 0; i < *options.changed_range_count; i++) {
TSRange range = (*options.changed_ranges)[i];
LOG(
"changed_range start:[%u %u], end:[%u %u]",
range.start.row, range.start.column,
range.end.row, range.end.column
);
}
}
}
ts_subtree_release(&self->parser.tree_pool, old_tree);
}
self->tree = tree;
self->parse_count++;
self->valid = true;
}
void ts_document_invalidate(TSDocument *self) {
self->valid = false;
}
TSNode ts_document_root_node(const TSDocument *self) {
return (TSNode) {
.subtree = self->tree,
.document = self,
.position = {0, 0},
.byte = 0,
};
}
uint32_t ts_document_parse_count(const TSDocument *self) {
return self->parse_count;
}
TSTreeCursor *ts_document_tree_cursor(const TSDocument *self) {
return ts_tree_cursor_new(self);
}

View file

@ -1,20 +0,0 @@
#ifndef RUNTIME_DOCUMENT_H_
#define RUNTIME_DOCUMENT_H_
#include "runtime/parser.h"
#include "runtime/subtree.h"
#include "runtime/tree_cursor.h"
#include <stdbool.h>
struct TSDocument {
TSParser parser;
TSInput input;
Subtree *tree;
TSTreeCursor cursor1;
TSTreeCursor cursor2;
size_t parse_count;
bool valid;
bool owns_input;
};
#endif

View file

@ -34,7 +34,7 @@ typedef struct {
static Iterator iterator_new(TSTreeCursor *cursor, Subtree *tree, const TSLanguage *language) {
array_clear(&cursor->stack);
array_push(&cursor->stack, ((TreeCursorEntry){
.tree = tree,
.subtree = tree,
.position = length_zero(),
.child_index = 0,
.structural_child_index = 0,
@ -56,25 +56,25 @@ Length iterator_start_position(Iterator *self) {
if (self->in_padding) {
return entry.position;
} else {
return length_add(entry.position, entry.tree->padding);
return length_add(entry.position, entry.subtree->padding);
}
}
Length iterator_end_position(Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
Length result = length_add(entry.position, entry.tree->padding);
Length result = length_add(entry.position, entry.subtree->padding);
if (self->in_padding) {
return result;
} else {
return length_add(result, entry.tree->size);
return length_add(result, entry.subtree->size);
}
}
static bool iterator_tree_is_visible(const Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
if (entry.tree->visible) return true;
if (entry.subtree->visible) return true;
if (self->cursor.stack.size > 1) {
Subtree *parent = self->cursor.stack.contents[self->cursor.stack.size - 2].tree;
Subtree *parent = self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
const TSSymbol *alias_sequence = ts_language_alias_sequence(self->language, parent->alias_sequence_id);
return alias_sequence && alias_sequence[entry.structural_child_index] != 0;
}
@ -94,7 +94,7 @@ static void iterator_get_visible_state(const Iterator *self, Subtree **tree,
TreeCursorEntry entry = self->cursor.stack.contents[i];
if (i > 0) {
Subtree *parent = self->cursor.stack.contents[i - 1].tree;
Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->language,
parent->alias_sequence_id
@ -104,8 +104,8 @@ static void iterator_get_visible_state(const Iterator *self, Subtree **tree,
}
}
if (entry.tree->visible || *alias_symbol) {
*tree = entry.tree;
if (entry.subtree->visible || *alias_symbol) {
*tree = entry.subtree;
*start_byte = entry.position.bytes;
break;
}
@ -128,14 +128,14 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
Length position = entry.position;
uint32_t structural_child_index = 0;
for (uint32_t i = 0; i < entry.tree->children.size; i++) {
Subtree *child = entry.tree->children.contents[i];
for (uint32_t i = 0; i < entry.subtree->children.size; i++) {
Subtree *child = entry.subtree->children.contents[i];
Length child_left = length_add(position, child->padding);
Length child_right = length_add(child_left, child->size);
if (child_right.bytes > goal_position) {
array_push(&self->cursor.stack, ((TreeCursorEntry){
.tree = child,
.subtree = child,
.position = position,
.child_index = i,
.structural_child_index = structural_child_index,
@ -178,16 +178,16 @@ static void iterator_advance(Iterator *self) {
TreeCursorEntry entry = array_pop(&self->cursor.stack);
if (iterator_done(self)) return;
Subtree *parent = array_back(&self->cursor.stack)->tree;
Subtree *parent = array_back(&self->cursor.stack)->subtree;
uint32_t child_index = entry.child_index + 1;
if (parent->children.size > child_index) {
Length position = length_add(entry.position, ts_subtree_total_size(entry.tree));
Length position = length_add(entry.position, ts_subtree_total_size(entry.subtree));
uint32_t structural_child_index = entry.structural_child_index;
if (!entry.tree->extra) structural_child_index++;
if (!entry.subtree->extra) structural_child_index++;
Subtree *next_child = parent->children.contents[child_index];
array_push(&self->cursor.stack, ((TreeCursorEntry){
.tree = next_child,
.subtree = next_child,
.position = position,
.child_index = child_index,
.structural_child_index = structural_child_index,
@ -250,7 +250,7 @@ static inline void iterator_print_state(Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
TSPoint start = iterator_start_position(self).extent;
TSPoint end = iterator_end_position(self).extent;
const char *name = ts_language_symbol_name(self->language, entry.tree->symbol);
const char *name = ts_language_symbol_name(self->language, entry.subtree->symbol);
printf(
"(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
name, self->in_padding ? "(p)" : " ",

View file

@ -1,13 +1,13 @@
#include <stdbool.h>
#include "runtime/subtree.h"
#include "runtime/document.h"
#include "runtime/tree.h"
#include "runtime/language.h"
// NodeChildIterator
typedef struct {
const Subtree *parent;
const TSDocument *document;
const TSTree *tree;
Length position;
uint32_t child_index;
uint32_t structural_child_index;
@ -19,7 +19,7 @@ typedef struct {
static inline TSNode ts_node__null() {
return (TSNode) {
.subtree = NULL,
.document = NULL,
.tree = NULL,
.position = {0, 0},
.byte = 0,
};
@ -32,12 +32,12 @@ static inline const Subtree *ts_node__tree(TSNode self) {
static inline NodeChildIterator ts_node_child_iterator_begin(const TSNode *node) {
const Subtree *tree = ts_node__tree(*node);
const TSSymbol *alias_sequence = ts_language_alias_sequence(
node->document->parser.language,
node->tree->language,
tree->alias_sequence_id
);
return (NodeChildIterator) {
.parent = tree,
.document = node->document,
.tree = node->tree,
.position = {node->byte, node->position},
.child_index = 0,
.structural_child_index = 0,
@ -57,7 +57,7 @@ static inline bool ts_node_child_iterator_next(NodeChildIterator *self, TSNode *
}
*result = (TSNode) {
.subtree = child,
.document = self->document,
.tree = self->tree,
.position = self->position.extent,
.byte = self->position.bytes,
.alias_symbol = alias_symbol,
@ -77,7 +77,7 @@ static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) {
(
self.alias_symbol &&
ts_language_symbol_metadata(
self.document->parser.language,
self.tree->language,
self.alias_symbol
).named
)
@ -343,11 +343,11 @@ TSSymbol ts_node_symbol(TSNode self) {
}
const char *ts_node_type(TSNode self) {
return ts_language_symbol_name(self.document->parser.language, ts_node_symbol(self));
return ts_language_symbol_name(self.tree->language, ts_node_symbol(self));
}
char *ts_node_string(TSNode self) {
return ts_subtree_string(ts_node__tree(self), self.document->parser.language, false);
return ts_subtree_string(ts_node__tree(self), self.tree->language, false);
}
bool ts_node_eq(TSNode self, TSNode other) {
@ -360,7 +360,7 @@ bool ts_node_eq(TSNode self, TSNode other) {
bool ts_node_is_named(TSNode self) {
const Subtree *tree = ts_node__tree(self);
return self.alias_symbol
? ts_language_symbol_metadata(self.document->parser.language, self.alias_symbol).named
? ts_language_symbol_metadata(self.tree->language, self.alias_symbol).named
: tree->named;
}
@ -378,7 +378,7 @@ bool ts_node_has_error(TSNode self) {
}
TSNode ts_node_parent(TSNode self) {
TSNode node = ts_document_root_node(self.document);
TSNode node = ts_tree_root_node(self.tree);
uint32_t end_byte = ts_node_end_byte(self);
if (node.subtree == self.subtree) return ts_node__null();

View file

@ -1,4 +1,3 @@
#include "runtime/parser.h"
#include <assert.h>
#include <stdio.h>
#include <limits.h>
@ -10,8 +9,12 @@
#include "runtime/array.h"
#include "runtime/language.h"
#include "runtime/alloc.h"
#include "runtime/stack.h"
#include "runtime/reusable_node.h"
#include "runtime/reduce_action.h"
#include "runtime/error_costs.h"
#include "runtime/string_input.h"
#include "runtime/tree.h"
#define LOG(...) \
if (self->lexer.logger.log || self->print_debugging_graphs) { \
@ -37,6 +40,29 @@ static const unsigned MAX_VERSION_COUNT = 6;
static const unsigned MAX_SUMMARY_DEPTH = 16;
static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE;
typedef struct {
Subtree *token;
Subtree *last_external_token;
uint32_t byte_index;
} TokenCache;
struct TSParser {
Lexer lexer;
Stack *stack;
SubtreePool tree_pool;
const TSLanguage *language;
ReduceActionSet reduce_actions;
Subtree *finished_tree;
Subtree scratch_tree;
TokenCache token_cache;
ReusableNode reusable_node;
void *external_scanner_payload;
bool in_ambiguity;
bool print_debugging_graphs;
bool halt_on_error;
unsigned accept_count;
};
typedef struct {
unsigned cost;
unsigned node_count;
@ -52,6 +78,8 @@ typedef enum {
ErrorComparisonTakeRight,
} ErrorComparison;
// Parser - Private
static void ts_parser__log(TSParser *self) {
if (self->lexer.logger.log) {
self->lexer.logger.log(
@ -670,7 +698,7 @@ static StackSliceArray ts_parser__reduce(TSParser *self, StackVersion version, T
return pop;
}
static void ts_parser__start(TSParser *self, TSInput input, Subtree *previous_tree) {
static void ts_parser__start(TSParser *self, TSInput input, const Subtree *previous_tree) {
if (previous_tree) {
LOG("parse_after_edit");
} else {
@ -1258,42 +1286,76 @@ static unsigned ts_parser__condense_stack(TSParser *self) {
return min_error_cost;
}
bool ts_parser_init(TSParser *self) {
// Parser - Public
TSParser *ts_parser_new() {
TSParser *self = ts_calloc(1, sizeof(TSParser));
ts_lexer_init(&self->lexer);
array_init(&self->reduce_actions);
array_reserve(&self->reduce_actions, 4);
ts_subtree_pool_init(&self->tree_pool);
self->tree_pool = ts_subtree_pool_new(32);
self->stack = ts_stack_new(&self->tree_pool);
self->finished_tree = NULL;
self->reusable_node = reusable_node_new();
self->print_debugging_graphs = false;
self->halt_on_error = false;
ts_parser__set_cached_token(self, 0, NULL, NULL);
return true;
return self;
}
void ts_parser_set_language(TSParser *self, const TSLanguage *language) {
if (self->external_scanner_payload && self->language->external_scanner.destroy)
self->language->external_scanner.destroy(self->external_scanner_payload);
if (language && language->external_scanner.create)
self->external_scanner_payload = language->external_scanner.create();
else
self->external_scanner_payload = NULL;
self->language = language;
}
void ts_parser_destroy(TSParser *self) {
if (self->stack)
void ts_parser_delete(TSParser *self) {
if (self->stack) {
ts_stack_delete(self->stack);
if (self->reduce_actions.contents)
}
if (self->reduce_actions.contents) {
array_delete(&self->reduce_actions);
}
ts_subtree_pool_delete(&self->tree_pool);
reusable_node_delete(&self->reusable_node);
ts_parser_set_language(self, NULL);
ts_free(self);
}
Subtree *ts_parser_parse(TSParser *self, TSInput input, Subtree *old_tree, bool halt_on_error) {
ts_parser__start(self, input, old_tree);
const TSLanguage *ts_parser_language(const TSParser *self) {
return self->language;
}
bool ts_parser_set_language(TSParser *self, const TSLanguage *language) {
if (language && language->version != TREE_SITTER_LANGUAGE_VERSION) return false;
if (self->external_scanner_payload && self->language->external_scanner.destroy) {
self->language->external_scanner.destroy(self->external_scanner_payload);
}
if (language && language->external_scanner.create) {
self->external_scanner_payload = language->external_scanner.create();
} else {
self->external_scanner_payload = NULL;
}
self->language = language;
return true;
}
TSLogger ts_parser_logger(const TSParser *self) {
return self->lexer.logger;
}
void ts_parser_set_logger(TSParser *self, TSLogger logger) {
self->lexer.logger = logger;
}
void ts_parser_print_debugging_graphs(TSParser *self, bool should_print) {
self->print_debugging_graphs = should_print;
}
void ts_parser_halt_on_error(TSParser *self, bool should_halt_on_error) {
self->halt_on_error = should_halt_on_error;
}
TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
if (!self->language) return NULL;
ts_parser__start(self, input, old_tree ? old_tree->root : NULL);
StackVersion version = STACK_VERSION_NONE;
uint32_t position = 0, last_position = 0;
@ -1327,7 +1389,7 @@ Subtree *ts_parser_parse(TSParser *self, TSInput input, Subtree *old_tree, bool
unsigned min_error_cost = ts_parser__condense_stack(self);
if (self->finished_tree && self->finished_tree->error_cost < min_error_cost) {
break;
} else if (halt_on_error && min_error_cost > 0) {
} else if (self->halt_on_error && min_error_cost > 0) {
ts_parser__halt_parse(self);
break;
}
@ -1342,5 +1404,13 @@ Subtree *ts_parser_parse(TSParser *self, TSInput input, Subtree *old_tree, bool
LOG("done");
LOG_TREE();
return self->finished_tree;
return ts_tree_new(self->finished_tree, self->language);
}
TSTree *ts_parser_parse_string(TSParser *self, const TSTree *old_tree,
const char *string, uint32_t length) {
TSStringInput input;
ts_string_input_init(&input, string, length);
return ts_parser_parse(self, old_tree, input.input);
}

View file

@ -1,46 +0,0 @@
#ifndef RUNTIME_PARSER_H_
#define RUNTIME_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "runtime/stack.h"
#include "runtime/array.h"
#include "runtime/lexer.h"
#include "runtime/reusable_node.h"
#include "runtime/reduce_action.h"
#include "runtime/subtree.h"
typedef struct {
Subtree *token;
Subtree *last_external_token;
uint32_t byte_index;
} TokenCache;
typedef struct {
Lexer lexer;
Stack *stack;
SubtreePool tree_pool;
const TSLanguage *language;
ReduceActionSet reduce_actions;
Subtree *finished_tree;
Subtree scratch_tree;
TokenCache token_cache;
ReusableNode reusable_node;
void *external_scanner_payload;
bool in_ambiguity;
bool print_debugging_graphs;
unsigned accept_count;
} TSParser;
bool ts_parser_init(TSParser *);
void ts_parser_destroy(TSParser *);
Subtree *ts_parser_parse(TSParser *, TSInput, Subtree *, bool halt_on_error);
void ts_parser_set_language(TSParser *, const TSLanguage *);
#ifdef __cplusplus
}
#endif
#endif // RUNTIME_PARSER_H_

View file

@ -24,7 +24,7 @@ static inline void reusable_node_reset(ReusableNode *self, Subtree *tree) {
}));
}
static inline Subtree *reusable_node_tree(ReusableNode *self) {
static inline const Subtree *reusable_node_tree(ReusableNode *self) {
return self->stack.size > 0
? self->stack.contents[self->stack.size - 1].tree
: NULL;

View file

@ -1,13 +1,7 @@
#include "tree_sitter/runtime.h"
#include "runtime/string_input.h"
#include "runtime/alloc.h"
#include <string.h>
typedef struct {
const char *string;
uint32_t position;
uint32_t length;
} TSStringInput;
static const char *ts_string_input__read(void *payload, uint32_t *bytes_read) {
TSStringInput *input = (TSStringInput *)payload;
if (input->position >= input->length) {
@ -26,17 +20,12 @@ static int ts_string_input__seek(void *payload, uint32_t byte, TSPoint _) {
return (byte < input->length);
}
TSInput ts_string_input_make(const char *string) {
return ts_string_input_make_with_length(string, strlen(string));
}
TSInput ts_string_input_make_with_length(const char *string, uint32_t length) {
TSStringInput *input = ts_malloc(sizeof(TSStringInput));
input->string = string;
input->position = 0;
input->length = length;
return (TSInput){
.payload = input,
void ts_string_input_init(TSStringInput *self, const char *string, uint32_t length) {
self->string = string;
self->position = 0;
self->length = length;
self->input = (TSInput) {
.payload = self,
.read = ts_string_input__read,
.seek = ts_string_input__seek,
.encoding = TSInputEncodingUTF8,

View file

@ -7,8 +7,14 @@ extern "C" {
#include "tree_sitter/runtime.h"
TSInput ts_string_input_make(const char *);
TSInput ts_string_input_make_with_length(const char *, uint32_t);
typedef struct {
const char *string;
uint32_t position;
uint32_t length;
TSInput input;
} TSStringInput;
void ts_string_input_init(TSStringInput *, const char *, uint32_t);
#ifdef __cplusplus
}

View file

@ -19,6 +19,10 @@ typedef struct {
TSStateId TS_TREE_STATE_NONE = USHRT_MAX;
static const uint32_t MAX_TREE_POOL_SIZE = 1024;
static const TSExternalTokenState empty_state = {.length = 0, .short_data = {0}};
// ExternalTokenState
void ts_external_token_state_init(TSExternalTokenState *self, const char *content, unsigned length) {
@ -102,11 +106,10 @@ void ts_subtree_array_reverse(SubtreeArray *self) {
// SubtreePool
static const uint32_t MAX_TREE_POOL_SIZE = 1024;
void ts_subtree_pool_init(SubtreePool *self) {
array_init(&self->free_trees);
array_init(&self->tree_stack);
SubtreePool ts_subtree_pool_new(uint32_t capacity) {
SubtreePool self = {array_new(), array_new()};
array_reserve(&self.free_trees, capacity);
return self;
}
void ts_subtree_pool_delete(SubtreePool *self) {
@ -128,7 +131,7 @@ Subtree *ts_subtree_pool_allocate(SubtreePool *self) {
}
void ts_subtree_pool_free(SubtreePool *self, Subtree *tree) {
if (self->free_trees.size < MAX_TREE_POOL_SIZE) {
if (self->free_trees.capacity > 0 && self->free_trees.size < MAX_TREE_POOL_SIZE) {
array_push(&self->free_trees, tree);
} else {
ts_free(tree);
@ -691,8 +694,6 @@ void ts_subtree_print_dot_graph(const Subtree *self, const TSLanguage *language,
fprintf(f, "}\n");
}
static const TSExternalTokenState empty_state = {.length = 0, .short_data = {0}};
bool ts_subtree_external_token_state_eq(const Subtree *self, const Subtree *other) {
const TSExternalTokenState *state1 = &empty_state;
const TSExternalTokenState *state2 = &empty_state;

View file

@ -1,5 +1,5 @@
#ifndef RUNTIME_TREE_H_
#define RUNTIME_TREE_H_
#ifndef RUNTIME_SUBTREE_H_
#define RUNTIME_SUBTREE_H_
#ifdef __cplusplus
extern "C" {
@ -83,7 +83,7 @@ void ts_subtree_array_delete(SubtreePool *, SubtreeArray *);
SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *);
void ts_subtree_array_reverse(SubtreeArray *);
void ts_subtree_pool_init(SubtreePool *);
SubtreePool ts_subtree_pool_new(uint32_t capacity);
void ts_subtree_pool_delete(SubtreePool *);
Subtree *ts_subtree_pool_allocate(SubtreePool *);
void ts_subtree_pool_free(SubtreePool *, Subtree *);
@ -122,4 +122,4 @@ static inline Length ts_subtree_total_size(const Subtree *self) {
}
#endif
#endif // RUNTIME_TREE_H_
#endif // RUNTIME_SUBTREE_H_

59
src/runtime/tree.c Normal file
View file

@ -0,0 +1,59 @@
#include "tree_sitter/runtime.h"
#include "runtime/array.h"
#include "runtime/get_changed_ranges.h"
#include "runtime/subtree.h"
#include "runtime/tree_cursor.h"
#include "runtime/tree.h"
TSTree *ts_tree_new(const Subtree *root, const TSLanguage *language) {
TSTree *result = ts_malloc(sizeof(TSTree));
result->root = root;
result->language = language;
return result;
}
TSTree *ts_tree_copy(const TSTree *self) {
ts_subtree_retain(self->root);
return ts_tree_new(self->root, self->language);
}
void ts_tree_delete(const TSTree *self) {
SubtreePool pool = ts_subtree_pool_new(0);
ts_subtree_release(&pool, self->root);
ts_subtree_pool_delete(&pool);
ts_free(self);
}
TSNode ts_tree_root_node(const TSTree *self) {
return (TSNode) {
.subtree = self->root,
.tree = self,
.position = {0, 0},
.byte = 0,
.alias_symbol = 0,
};
}
void ts_tree_edit(TSTree *self, const TSInputEdit *edit) {
SubtreePool pool = ts_subtree_pool_new(0);
self->root = ts_subtree_edit(self->root, edit, &pool);
assert(pool.tree_stack.capacity == 0 && pool.free_trees.capacity == 0);
}
TSRange *ts_tree_get_changed_ranges(const TSTree *self, const TSTree *other, uint32_t *count) {
TSRange *result;
TSTreeCursor cursor1, cursor2;
ts_tree_cursor_init(&cursor1, self);
ts_tree_cursor_init(&cursor2, self);
*count = ts_subtree_get_changed_ranges(
self->root, other->root, &cursor1, &cursor2,
self->language, &result
);
array_delete(&cursor1.stack);
array_delete(&cursor2.stack);
return result;
}
void ts_tree_print_dot_graph(const TSTree *self, FILE *file) {
ts_subtree_print_dot_graph(self->root, self->language, file);
}

19
src/runtime/tree.h Normal file
View file

@ -0,0 +1,19 @@
#ifndef RUNTIME_TREE_H_
#define RUNTIME_TREE_H_
#ifdef __cplusplus
extern "C" {
#endif
struct TSTree {
const Subtree *root;
const TSLanguage *language;
};
TSTree *ts_tree_new(const Subtree *root, const TSLanguage *language);
#ifdef __cplusplus
}
#endif
#endif // RUNTIME_TREE_H_

View file

@ -1,20 +1,24 @@
#include "tree_sitter/runtime.h"
#include "runtime/alloc.h"
#include "runtime/tree_cursor.h"
#include "runtime/document.h"
#include "runtime/language.h"
#include "runtime/tree.h"
TSTreeCursor *ts_tree_cursor_new(const TSDocument *document) {
TSTreeCursor *ts_tree_cursor_new(const TSTree *tree) {
TSTreeCursor *self = ts_malloc(sizeof(TSTreeCursor));
self->document = document;
ts_tree_cursor_init(self, tree);
return self;
}
void ts_tree_cursor_init(TSTreeCursor *self, const TSTree *tree) {
self->tree = tree;
array_init(&self->stack);
array_push(&self->stack, ((TreeCursorEntry) {
.tree = document->tree,
.subtree = tree->root,
.position = length_zero(),
.child_index = 0,
.structural_child_index = 0,
}));
return self;
}
void ts_tree_cursor_delete(TSTreeCursor *self) {
@ -24,7 +28,7 @@ void ts_tree_cursor_delete(TSTreeCursor *self) {
bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) {
TreeCursorEntry *last_entry = array_back(&self->stack);
Subtree *tree = last_entry->tree;
Subtree *tree = last_entry->subtree;
Length position = last_entry->position;
bool did_descend;
@ -36,7 +40,7 @@ bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) {
Subtree *child = tree->children.contents[i];
if (child->visible || child->visible_child_count > 0) {
array_push(&self->stack, ((TreeCursorEntry) {
.tree = child,
.subtree = child,
.child_index = i,
.structural_child_index = structural_child_index,
.position = position,
@ -64,7 +68,7 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) {
for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) {
TreeCursorEntry *parent_entry = &self->stack.contents[i];
Subtree *parent = parent_entry->tree;
Subtree *parent = parent_entry->subtree;
uint32_t child_index = child_entry->child_index;
uint32_t structural_child_index = child_entry->structural_child_index;
Length position = child_entry->position;
@ -77,7 +81,7 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) {
if (child->visible || child->visible_child_count > 0) {
self->stack.contents[i + 1] = (TreeCursorEntry) {
.tree = child,
.subtree = child,
.child_index = child_index,
.structural_child_index = structural_child_index,
.position = position,
@ -103,7 +107,7 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) {
bool ts_tree_cursor_goto_parent(TSTreeCursor *self) {
for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) {
TreeCursorEntry *entry = &self->stack.contents[i];
if (entry->tree->visible) {
if (entry->subtree->visible) {
self->stack.size = i + 1;
return true;
}
@ -117,16 +121,16 @@ TSNode ts_tree_cursor_current_node(TSTreeCursor *self) {
if (self->stack.size > 1) {
TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2];
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->document->parser.language,
parent_entry->tree->alias_sequence_id
self->tree->language,
parent_entry->subtree->alias_sequence_id
);
if (alias_sequence) {
alias_symbol = alias_sequence[last_entry->structural_child_index];
}
}
return (TSNode) {
.document = self->document,
.subtree = last_entry->tree,
.tree = self->tree,
.subtree = last_entry->subtree,
.position = last_entry->position.extent,
.byte = last_entry->position.bytes,
.alias_symbol = alias_symbol,

View file

@ -4,17 +4,17 @@
#include "runtime/subtree.h"
typedef struct {
Subtree *tree;
Subtree *subtree;
Length position;
uint32_t child_index;
uint32_t structural_child_index;
} TreeCursorEntry;
struct TSTreeCursor {
const TSDocument *document;
const TSTree *tree;
Array(TreeCursorEntry) stack;
};
TSTreeCursor *ts_tree_cursor_new(const TSDocument *);
void ts_tree_cursor_init(TSTreeCursor *, const TSTree *);
#endif // RUNTIME_TREE_CURSOR_H_

View file

@ -43,12 +43,12 @@ int main(int argc, char *arg[]) {
vector<size_t> error_speeds;
vector<size_t> non_error_speeds;
auto document = ts_document_new();
TSParser *parser = ts_parser_new();
if (getenv("TREE_SITTER_BENCHMARK_SVG")) {
ts_document_print_debugging_graphs(document, true);
ts_parser_print_debugging_graphs(parser, true);
} else if (getenv("TREE_SITTER_BENCHMARK_LOG")) {
ts_document_set_logger(document, stderr_logger_new(false));
ts_parser_set_logger(parser, stderr_logger_new(false));
}
auto language_filter = getenv("TREE_SITTER_BENCHMARK_LANGUAGE");
@ -61,7 +61,7 @@ int main(int argc, char *arg[]) {
for (auto &language_name : language_names) {
if (language_filter && language_name != language_filter) continue;
ts_document_set_language(document, load_real_language(language_name));
ts_parser_set_language(parser, load_real_language(language_name));
printf("%s\n", language_name.c_str());
@ -69,20 +69,16 @@ int main(int argc, char *arg[]) {
if (file_name_filter && example.file_name != file_name_filter) continue;
if (example.input.size() < 256) continue;
ts_document_invalidate(document);
ts_document_set_input_string(document, "");
ts_document_parse(document);
ts_document_invalidate(document);
ts_document_set_input_string(document, example.input.c_str());
clock_t start_time = clock();
ts_document_parse(document);
TSTree *tree = ts_parser_parse_string(parser, nullptr, example.input.c_str(), example.input.size());
clock_t end_time = clock();
unsigned duration = (end_time - start_time) * 1000 / CLOCKS_PER_SEC;
assert(!ts_node_has_error(ts_document_root_node(document)));
assert(!ts_node_has_error(ts_tree_root_node(tree)));
ts_tree_delete(tree);
size_t duration = (end_time - start_time) * 1000 / CLOCKS_PER_SEC;
size_t speed = static_cast<double>(example.input.size()) / duration;
printf(" %-30s\t%u ms\t\t%lu bytes/ms\n", example.file_name.c_str(), duration, speed);
printf(" %-30s\t%lu ms\t\t%lu bytes/ms\n", example.file_name.c_str(), duration, speed);
if (speed != 0) non_error_speeds.push_back(speed);
}
@ -93,15 +89,15 @@ int main(int argc, char *arg[]) {
if (file_name_filter && example.file_name != file_name_filter) continue;
if (example.input.size() < 256) continue;
ts_document_invalidate(document);
ts_document_set_input_string(document, example.input.c_str());
clock_t start_time = clock();
ts_document_parse(document);
TSTree *tree = ts_parser_parse_string(parser, nullptr, example.input.c_str(), example.input.size());
clock_t end_time = clock();
unsigned duration = (end_time - start_time) * 1000 / CLOCKS_PER_SEC;
ts_tree_delete(tree);
size_t duration = (end_time - start_time) * 1000 / CLOCKS_PER_SEC;
size_t speed = static_cast<double>(example.input.size()) / duration;
printf(" %-30s\t%u ms\t\t%lu bytes/ms\n", example.file_name.c_str(), duration, speed);
printf(" %-30s\t%lu ms\t\t%lu bytes/ms\n", example.file_name.c_str(), duration, speed);
if (speed != 0) error_speeds.push_back(speed);
}
}
@ -118,5 +114,6 @@ int main(int argc, char *arg[]) {
printf(" %-30s\t%lu bytes/ms\n", "average speed", mean(error_speeds));
printf(" %-30s\t%lu bytes/ms\n", "worst speed", min(error_speeds));
ts_parser_delete(parser);
return 0;
}

View file

@ -21,8 +21,7 @@ static void append_text_to_scope_sequence(ScopeSequence *sequence,
static void append_to_scope_sequence(ScopeSequence *sequence,
ScopeStack *current_scopes,
TSNode node, TSDocument *document,
const std::string &text) {
TSNode node, const std::string &text) {
append_text_to_scope_sequence(
sequence, current_scopes, text, ts_node_start_byte(node) - sequence->size()
);
@ -31,7 +30,7 @@ static void append_to_scope_sequence(ScopeSequence *sequence,
for (size_t i = 0, n = ts_node_child_count(node); i < n; i++) {
TSNode child = ts_node_child(node, i);
append_to_scope_sequence(sequence, current_scopes, child, document, text);
append_to_scope_sequence(sequence, current_scopes, child, text);
}
append_text_to_scope_sequence(
@ -41,11 +40,11 @@ static void append_to_scope_sequence(ScopeSequence *sequence,
current_scopes->pop_back();
}
ScopeSequence build_scope_sequence(TSDocument *document, const std::string &text) {
ScopeSequence build_scope_sequence(TSTree *tree, const std::string &text) {
ScopeSequence sequence;
ScopeStack current_scopes;
TSNode node = ts_document_root_node(document);
append_to_scope_sequence(&sequence, &current_scopes, node, document, text);
TSNode node = ts_tree_root_node(tree);
append_to_scope_sequence(&sequence, &current_scopes, node, text);
return sequence;
}

View file

@ -9,7 +9,7 @@ typedef std::string Scope;
typedef std::vector<Scope> ScopeStack;
typedef std::vector<ScopeStack> ScopeSequence;
ScopeSequence build_scope_sequence(TSDocument *document, const std::string &text);
ScopeSequence build_scope_sequence(TSTree *tree, const std::string &text);
void verify_changed_ranges(const ScopeSequence &old, const ScopeSequence &new_sequence, const std::string &text, TSRange *ranges, size_t range_count);

View file

@ -29,14 +29,14 @@ describe("examples found via fuzzing", [&]() {
for (unsigned i = 0, n = examples.size(); i < n; i++) {
it(("parses example number " + to_string(i)).c_str(), [&]() {
TSDocument *document = ts_document_new();
TSParser *parser = ts_parser_new();
if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) {
ts_document_print_debugging_graphs(document, true);
ts_parser_print_debugging_graphs(parser, true);
}
const string &language_name = examples[i].first;
ts_document_set_language(document, load_real_language(language_name));
ts_parser_set_language(parser, load_real_language(language_name));
string input;
const string &base64_input = examples[i].second;
@ -47,18 +47,12 @@ describe("examples found via fuzzing", [&]() {
base64_input.size()
));
ts_document_set_input_string_with_length(
document,
input.c_str(),
input.size()
);
ts_document_parse(document);
TSNode node = ts_document_root_node(document);
TSTree *tree = ts_parser_parse_string(parser, nullptr, input.c_str(), input.size());
TSNode node = ts_tree_root_node(tree);
assert_consistent_tree_sizes(node);
ts_document_free(document);
ts_tree_delete(tree);
ts_parser_delete(parser);
});
}

View file

@ -12,8 +12,8 @@
#include "helpers/tree_helpers.h"
#include <set>
static void assert_correct_tree_size(TSDocument *document, string content) {
TSNode root_node = ts_document_root_node(document);
static void assert_correct_tree_size(TSTree *tree, string content) {
TSNode root_node = ts_tree_root_node(tree);
AssertThat(ts_node_end_byte(root_node), Equals(content.size()));
assert_consistent_tree_sizes(root_node);
}
@ -33,48 +33,43 @@ vector<string> test_languages({
for (auto &language_name : test_languages) {
describe(("the " + language_name + " language").c_str(), [&]() {
TSDocument *document;
TSParser *parser;
const bool debug_graphs_enabled = getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS");
before_each([&]() {
record_alloc::start();
document = ts_document_new();
ts_document_set_language(document, load_real_language(language_name));
parser = ts_parser_new();
ts_parser_set_language(parser, load_real_language(language_name));
// ts_document_set_logger(document, stderr_logger_new(true));
// ts_parser_set_logger(parser, stderr_logger_new(true));
if (debug_graphs_enabled) {
ts_document_print_debugging_graphs(document, true);
ts_parser_print_debugging_graphs(parser, true);
}
});
after_each([&]() {
ts_document_free(document);
ts_parser_delete(parser);
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
});
for (auto &entry : read_real_language_corpus(language_name)) {
SpyInput *input;
auto it_handles_edit_sequence = [&](string name, std::function<void()> edit_sequence){
it(("parses " + entry.description + ": " + name).c_str(), [&]() {
input = new SpyInput(entry.input, 3);
if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str());
ts_document_set_input(document, input->input());
edit_sequence();
it(("parses " + entry.description + ": initial parse").c_str(), [&]() {
input = new SpyInput(entry.input, 3);
if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str());
TSNode root_node = ts_document_root_node(document);
const char *node_string = ts_node_string(root_node);
string result(node_string);
ts_free((void *)node_string);
AssertThat(result, Equals(entry.tree_string));
TSTree *tree = ts_parser_parse(parser, nullptr, input->input());
assert_correct_tree_size(tree, input->content);
assert_correct_tree_size(document, input->content);
delete input;
});
};
TSNode root_node = ts_tree_root_node(tree);
const char *node_string = ts_node_string(root_node);
string result(node_string);
ts_free((void *)node_string);
AssertThat(result, Equals(entry.tree_string));
it_handles_edit_sequence("initial parse", [&]() {
ts_document_parse(document);
ts_tree_delete(tree);
delete input;
});
set<pair<size_t, size_t>> deletions;
@ -86,54 +81,88 @@ for (auto &language_name : test_languages) {
string inserted_text = random_words(random_unsigned(4) + 1);
if (insertions.insert({edit_position, inserted_text}).second) {
string description = "\"" + inserted_text + "\" at " + to_string(edit_position);
it_handles_edit_sequence("repairing an insertion of " + description, [&]() {
ts_document_edit(document, input->replace(edit_position, 0, inserted_text));
ts_document_parse(document);
assert_correct_tree_size(document, input->content);
it(("parses " + entry.description +
": repairing an insertion of \"" + inserted_text + "\"" +
" at " + to_string(edit_position)).c_str(), [&]() {
input = new SpyInput(entry.input, 3);
if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str());
ts_document_edit(document, input->undo());
assert_correct_tree_size(document, input->content);
input->replace(edit_position, 0, inserted_text);
TSTree *tree = ts_parser_parse(parser, nullptr, input->input());
assert_correct_tree_size(tree, input->content);
if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str());
TSRange *ranges;
TSInputEdit edit = input->undo();
ts_tree_edit(tree, &edit);
assert_correct_tree_size(tree, input->content);
if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str());
TSTree *new_tree = ts_parser_parse(parser, tree, input->input());
assert_correct_tree_size(new_tree, input->content);
uint32_t range_count;
ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
assert_correct_tree_size(document, input->content);
TSRange *ranges = ts_tree_get_changed_ranges(tree, new_tree, &range_count);
ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
verify_changed_ranges(old_scope_sequence, new_scope_sequence,
input->content, ranges, range_count);
ScopeSequence old_scope_sequence = build_scope_sequence(tree, input->content);
ScopeSequence new_scope_sequence = build_scope_sequence(new_tree, input->content);
verify_changed_ranges(
old_scope_sequence, new_scope_sequence,
input->content, ranges, range_count
);
ts_free(ranges);
TSNode root_node = ts_tree_root_node(new_tree);
const char *node_string = ts_node_string(root_node);
string result(node_string);
ts_free((void *)node_string);
AssertThat(result, Equals(entry.tree_string));
ts_tree_delete(tree);
ts_tree_delete(new_tree);
delete input;
});
}
if (deletions.insert({edit_position, deletion_size}).second) {
string desription = to_string(edit_position) + "-" + to_string(edit_position + deletion_size);
it_handles_edit_sequence("repairing a deletion of " + desription, [&]() {
ts_document_edit(document, input->replace(edit_position, deletion_size, ""));
ts_document_parse(document);
assert_correct_tree_size(document, input->content);
it(("parses " + entry.description +
": repairing a deletion of " +
to_string(edit_position) + "-" + to_string(edit_position + deletion_size)).c_str(), [&]() {
input = new SpyInput(entry.input, 3);
if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str());
ts_document_edit(document, input->undo());
assert_correct_tree_size(document, input->content);
input->replace(edit_position, deletion_size, "");
TSTree *tree = ts_parser_parse(parser, nullptr, input->input());
assert_correct_tree_size(tree, input->content);
if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str());
TSRange *ranges;
TSInputEdit edit = input->undo();
ts_tree_edit(tree, &edit);
assert_correct_tree_size(tree, input->content);
if (debug_graphs_enabled) printf("%s\n\n", input->content.c_str());
TSTree *new_tree = ts_parser_parse(parser, tree, input->input());
assert_correct_tree_size(new_tree, input->content);
uint32_t range_count;
ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
assert_correct_tree_size(document, input->content);
TSRange *ranges = ts_tree_get_changed_ranges(tree, new_tree, &range_count);
ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
verify_changed_ranges(old_scope_sequence, new_scope_sequence,
input->content, ranges, range_count);
ScopeSequence old_scope_sequence = build_scope_sequence(tree, input->content);
ScopeSequence new_scope_sequence = build_scope_sequence(new_tree, input->content);
verify_changed_ranges(
old_scope_sequence, new_scope_sequence,
input->content, ranges, range_count
);
ts_free(ranges);
TSNode root_node = ts_tree_root_node(new_tree);
const char *node_string = ts_node_string(root_node);
string result(node_string);
ts_free((void *)node_string);
AssertThat(result, Equals(entry.tree_string));
ts_tree_delete(tree);
ts_tree_delete(new_tree);
delete input;
});
}
}

View file

@ -52,26 +52,26 @@ for (auto &language_name : test_languages) {
);
}
TSDocument *document = ts_document_new();
ts_document_set_language(document, language);
ts_document_set_input_string_with_length(document, entry.input.c_str(), entry.input.size());
TSParser *parser = ts_parser_new();
ts_parser_set_language(parser, language);
// ts_document_print_debugging_graphs(document, true);
if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) {
ts_document_print_debugging_graphs(document, true);
ts_parser_print_debugging_graphs(parser, true);
}
ts_document_parse(document);
TSTree *tree = ts_parser_parse_string(parser, nullptr, entry.input.c_str(), entry.input.size());
TSNode root_node = ts_document_root_node(document);
TSNode root_node = ts_tree_root_node(tree);
AssertThat(ts_node_end_byte(root_node), Equals(entry.input.size()));
assert_consistent_tree_sizes(root_node);
const char *node_string = ts_node_string(root_node);
string result(node_string);
ts_free((void *)node_string);
ts_document_free(document);
AssertThat(result, Equals(entry.tree_string));
ts_tree_delete(tree);
ts_parser_delete(parser);
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
});
}

View file

@ -1,490 +0,0 @@
#include "test_helper.h"
#include "runtime/alloc.h"
#include "helpers/record_alloc.h"
#include "helpers/stream_methods.h"
#include "helpers/tree_helpers.h"
#include "helpers/point_helpers.h"
#include "helpers/spy_logger.h"
#include "helpers/stderr_logger.h"
#include "helpers/spy_input.h"
#include "helpers/load_language.h"
TSPoint point(size_t row, size_t column) {
return TSPoint{static_cast<uint32_t>(row), static_cast<uint32_t>(column)};
}
START_TEST
describe("Document", [&]() {
TSDocument *document;
TSNode root;
before_each([&]() {
record_alloc::start();
document = ts_document_new();
if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) {
ts_document_print_debugging_graphs(document, true);
}
});
after_each([&]() {
ts_document_free(document);
record_alloc::stop();
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
});
auto assert_node_string_equals = [&](TSNode node, const string &expected) {
char *str = ts_node_string(node);
string actual(str);
ts_free(str);
AssertThat(actual, Equals(expected));
};
describe("set_input(input)", [&]() {
SpyInput *spy_input;
before_each([&]() {
spy_input = new SpyInput("{\"key\": [null, 2]}", 3);
ts_document_set_language(document, load_real_language("json"));
ts_document_set_input_string(document, "{\"key\": [1, 2]}");
ts_document_parse(document);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(value (object (pair (string) (array (number) (number)))))");
});
after_each([&]() {
delete spy_input;
});
it("handles both UTF8 and UTF16 encodings", [&]() {
const char16_t content[] = u"[true, false]";
spy_input->content = string((const char *)content, sizeof(content));
spy_input->encoding = TSInputEncodingUTF16;
ts_document_set_input(document, spy_input->input());
ts_document_invalidate(document);
ts_document_parse(document);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(value (array (true) (false)))");
});
it("handles truncated UTF16 data", [&]() {
const char content[1] = { '\0' };
spy_input->content = string(content, sizeof(content));
spy_input->encoding = TSInputEncodingUTF16;
ts_document_set_input(document, spy_input->input());
ts_document_invalidate(document);
ts_document_parse(document);
});
it("measures columns in bytes", [&]() {
const char16_t content[] = u"[true, false]";
spy_input->content = string((const char *)content, sizeof(content));
spy_input->encoding = TSInputEncodingUTF16;
TSInput input = spy_input->input();
ts_document_set_input(document, input);
ts_document_invalidate(document);
ts_document_parse(document);
root = ts_document_root_node(document);
AssertThat(ts_node_end_point(root), Equals<TSPoint>({0, 28}));
});
it("allows the input to be retrieved later", [&]() {
ts_document_set_input(document, spy_input->input());
AssertThat(ts_document_input(document).payload, Equals<void *>(spy_input));
AssertThat(ts_document_input(document).read, Equals(spy_input->input().read));
AssertThat(ts_document_input(document).seek, Equals(spy_input->input().seek));
});
it("does not assume that the document's text has changed", [&]() {
ts_document_set_input(document, spy_input->input());
AssertThat(ts_document_root_node(document), Equals<TSNode>(root));
AssertThat(ts_node_has_changes(root), IsFalse());
AssertThat(spy_input->strings_read(), IsEmpty());
});
it("reads text from the new input for future parses", [&]() {
ts_document_set_input(document, spy_input->input());
// Insert 'null', delete '1'.
TSInputEdit edit = {};
edit.start_point.column = edit.start_byte = strlen("{\"key\": [");
edit.extent_added.column = edit.bytes_added = 4;
edit.extent_removed.column = edit.bytes_removed = 1;
ts_document_edit(document, edit);
ts_document_parse(document);
TSNode new_root = ts_document_root_node(document);
assert_node_string_equals(
new_root,
"(value (object (pair (string) (array (null) (number)))))");
AssertThat(spy_input->strings_read(), Equals(vector<string>({" [null, 2" })));
});
it("allows setting input string with length", [&]() {
const char content[] = { '1' };
ts_document_set_input_string_with_length(document, content, 1);
ts_document_parse(document);
TSNode new_root = ts_document_root_node(document);
AssertThat(ts_node_end_byte(new_root), Equals<size_t>(1));
assert_node_string_equals(
new_root,
"(value (number))");
});
it("reads from the new input correctly when the old input was blank", [&]() {
ts_document_set_input_string(document, "");
ts_document_parse(document);
TSNode new_root = ts_document_root_node(document);
AssertThat(ts_node_end_byte(new_root), Equals<size_t>(0));
assert_node_string_equals(
new_root,
"(ERROR)");
ts_document_set_input_string(document, "1");
ts_document_parse(document);
new_root = ts_document_root_node(document);
AssertThat(ts_node_end_byte(new_root), Equals<size_t>(1));
assert_node_string_equals(
new_root,
"(value (number))");
});
});
describe("set_language(language)", [&]() {
before_each([&]() {
ts_document_set_input_string(document, "{\"key\": [1, 2]}\n");
});
it("uses the given language for future parses", [&]() {
ts_document_set_language(document, load_real_language("json"));
ts_document_parse(document);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(value (object (pair (string) (array (number) (number)))))");
});
it("clears out any previous tree", [&]() {
ts_document_set_language(document, load_real_language("json"));
ts_document_parse(document);
ts_document_set_language(document, load_real_language("javascript"));
AssertThat(ts_document_root_node(document).subtree, Equals<void *>(nullptr));
ts_document_parse(document);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(program (expression_statement "
"(object (pair (string) (array (number) (number))))))");
});
it("does not allow setting a language with a different version number", [&]() {
TSLanguage language = *load_real_language("json");
AssertThat(ts_language_version(&language), Equals<uint32_t>(TREE_SITTER_LANGUAGE_VERSION));
language.version++;
AssertThat(ts_language_version(&language), !Equals<uint32_t>(TREE_SITTER_LANGUAGE_VERSION));
ts_document_set_language(document, &language);
AssertThat(ts_document_language(document), Equals<const TSLanguage *>(nullptr));
});
});
describe("set_logger(TSLogger)", [&]() {
SpyLogger *logger;
before_each([&]() {
logger = new SpyLogger();
ts_document_set_language(document, load_real_language("json"));
ts_document_set_input_string(document, "[1, 2]");
});
after_each([&]() {
delete logger;
});
it("calls the debugger with a message for each parse action", [&]() {
ts_document_set_logger(document, logger->logger());
ts_document_parse(document);
AssertThat(logger->messages, Contains("new_parse"));
AssertThat(logger->messages, Contains("skip character:' '"));
AssertThat(logger->messages, Contains("consume character:'['"));
AssertThat(logger->messages, Contains("consume character:'1'"));
AssertThat(logger->messages, Contains("reduce sym:array, child_count:4"));
AssertThat(logger->messages, Contains("accept"));
});
it("allows the debugger to be retrieved later", [&]() {
ts_document_set_logger(document, logger->logger());
AssertThat(ts_document_logger(document).payload, Equals(logger));
});
describe("disabling debugging", [&]() {
before_each([&]() {
ts_document_set_logger(document, logger->logger());
ts_document_set_logger(document, {NULL, NULL});
});
it("does not call the debugger any more", [&]() {
ts_document_parse(document);
AssertThat(logger->messages, IsEmpty());
});
});
});
describe("parse_and_get_changed_ranges()", [&]() {
SpyInput *input;
before_each([&]() {
ts_document_set_language(document, load_real_language("javascript"));
input = new SpyInput("{a: null};\n", 3);
ts_document_set_input(document, input->input());
ts_document_parse(document);
assert_node_string_equals(
ts_document_root_node(document),
"(program (expression_statement (object (pair (property_identifier) (null)))))");
});
after_each([&]() {
delete input;
});
auto get_invalidated_ranges_for_edit = [&](std::function<TSInputEdit()> callback) -> vector<TSRange> {
TSInputEdit edit = callback();
ts_document_edit(document, edit);
TSRange *ranges;
uint32_t range_count = 0;
ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
vector<TSRange> result;
for (size_t i = 0; i < range_count; i++) {
result.push_back(ranges[i]);
}
ts_free(ranges);
return result;
};
it("reports changes when one token has been updated", [&]() {
// Replace `null` with `nothing`
auto ranges = get_invalidated_ranges_for_edit([&]() {
return input->replace(input->content.find("ull"), 1, "othing");
});
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find("nothing")),
point(0, input->content.find("}"))
},
})));
// Replace `nothing` with `null` again
ranges = get_invalidated_ranges_for_edit([&]() {
return input->undo();
});
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find("null")),
point(0, input->content.find("}"))
},
})));
});
it("reports no changes when leading whitespace has changed (regression)", [&]() {
input->chars_per_chunk = 80;
// Insert leading whitespace
auto ranges = get_invalidated_ranges_for_edit([&]() {
return input->replace(0, 0, "\n");
});
assert_node_string_equals(
ts_document_root_node(document),
"(program (expression_statement (object (pair (property_identifier) (null)))))");
AssertThat(ranges, Equals(vector<TSRange>({})));
// Remove leading whitespace
ranges = get_invalidated_ranges_for_edit([&]() {
return input->undo();
});
assert_node_string_equals(
ts_document_root_node(document),
"(program (expression_statement (object (pair (property_identifier) (null)))))");
AssertThat(ranges, Equals(vector<TSRange>({})));
// Insert leading whitespace again
ranges = get_invalidated_ranges_for_edit([&]() {
return input->replace(0, 0, "\n");
});
assert_node_string_equals(
ts_document_root_node(document),
"(program (expression_statement (object (pair (property_identifier) (null)))))");
AssertThat(ranges, Equals(vector<TSRange>({})));
});
it("reports changes when tokens have been appended", [&]() {
// Add a second key-value pair
auto ranges = get_invalidated_ranges_for_edit([&]() {
return input->replace(input->content.find("}"), 0, ", b: false");
});
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find(",")),
point(0, input->content.find("}"))
},
})));
// Add a third key-value pair in between the first two
ranges = get_invalidated_ranges_for_edit([&]() {
return input->replace(input->content.find(", b"), 0, ", c: 1");
});
assert_node_string_equals(
ts_document_root_node(document),
"(program (expression_statement (object "
"(pair (property_identifier) (null)) "
"(pair (property_identifier) (number)) "
"(pair (property_identifier) (false)))))");
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find(", c")),
point(0, input->content.find(", b"))
},
})));
// Delete the middle pair.
ranges = get_invalidated_ranges_for_edit([&]() {
return input->undo();
});
assert_node_string_equals(
ts_document_root_node(document),
"(program (expression_statement (object "
"(pair (property_identifier) (null)) "
"(pair (property_identifier) (false)))))");
AssertThat(ranges, IsEmpty());
// Delete the second pair.
ranges = get_invalidated_ranges_for_edit([&]() {
return input->undo();
});
assert_node_string_equals(
ts_document_root_node(document),
"(program (expression_statement (object "
"(pair (property_identifier) (null)))))");
AssertThat(ranges, IsEmpty());
});
it("reports changes when trees have been wrapped", [&]() {
// Wrap the object in an assignment expression.
auto ranges = get_invalidated_ranges_for_edit([&]() {
return input->replace(input->content.find("null"), 0, "b === ");
});
assert_node_string_equals(
ts_document_root_node(document),
"(program (expression_statement (object "
"(pair (property_identifier) (binary_expression (identifier) (null))))))");
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find("b ===")),
point(0, input->content.find("}"))
},
})));
});
});
describe("parse_with_options(options)", [&]() {
it("halts as soon as an error is found if the halt_on_error flag is set", [&]() {
string input_string = "[1, null, error, 3]";
ts_document_set_language(document, load_real_language("json"));
ts_document_set_input_string(document, input_string.c_str());
TSParseOptions options = {};
options.changed_ranges = nullptr;
options.halt_on_error = false;
ts_document_parse_with_options(document, options);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(value (array (number) (null) (ERROR (UNEXPECTED 'e')) (number)))");
ts_document_invalidate(document);
options.halt_on_error = true;
ts_document_parse_with_options(document, options);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(ERROR (number) (null))");
AssertThat(ts_node_end_byte(root), Equals(input_string.size()));
});
it("does not insert missing tokens if the halt_on_error flag is set", [&]() {
string input_string = "[1, null, 3";
ts_document_set_language(document, load_real_language("json"));
ts_document_set_input_string(document, input_string.c_str());
TSParseOptions options = {};
options.changed_ranges = nullptr;
options.halt_on_error = false;
ts_document_parse_with_options(document, options);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(value (array (number) (null) (number) (MISSING)))");
ts_document_invalidate(document);
options.halt_on_error = true;
ts_document_parse_with_options(document, options);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(ERROR (number) (null) (number))");
AssertThat(ts_node_end_byte(root), Equals(input_string.size()));
});
it("can parse valid code with the halt_on_error flag set", [&]() {
string input_string = "[1, null, 3]";
ts_document_set_language(document, load_real_language("json"));
ts_document_set_input_string(document, input_string.c_str());
TSParseOptions options = {};
options.changed_ranges = nullptr;
options.halt_on_error = true;
ts_document_parse_with_options(document, options);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(value (array (number) (null) (number)))");
});
});
});
END_TEST

View file

@ -28,13 +28,12 @@ describe("Language", []() {
}
})JSON");
TSDocument *document = ts_document_new();
TSParser *parser = ts_parser_new();
const TSLanguage *language = load_test_language("aliased_rules", compile_result);
ts_document_set_language(document, language);
ts_document_set_input_string(document, "b");
ts_document_parse(document);
ts_parser_set_language(parser, language);
TSTree *tree = ts_parser_parse_string(parser, nullptr, "b", 1);
TSNode root_node = ts_document_root_node(document);
TSNode root_node = ts_tree_root_node(tree);
char *string = ts_node_string(root_node);
AssertThat(string, Equals("(a (c))"));
@ -47,7 +46,8 @@ describe("Language", []() {
AssertThat(ts_language_symbol_type(language, aliased_symbol), Equals(TSSymbolTypeRegular));
ts_free(string);
ts_document_free(document);
ts_parser_delete(parser);
ts_tree_delete(tree);
});
});
});

View file

@ -62,21 +62,22 @@ string grammar_with_aliases_and_extras = R"JSON({
})JSON";
describe("Node", [&]() {
TSDocument *document;
TSParser *parser;
TSTree *tree;
TSNode root_node;
before_each([&]() {
record_alloc::start();
document = ts_document_new();
ts_document_set_language(document, load_real_language("json"));
ts_document_set_input_string(document, json_string.c_str());
ts_document_parse(document);
root_node = ts_node_child(ts_document_root_node(document), 0);
parser = ts_parser_new();
ts_parser_set_language(parser, load_real_language("json"));
tree = ts_parser_parse_string(parser, nullptr, json_string.c_str(), json_string.size());
root_node = ts_node_child(ts_tree_root_node(tree), 0);
});
after_each([&]() {
ts_document_free(document);
ts_parser_delete(parser);
ts_tree_delete(tree);
record_alloc::stop();
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
@ -157,16 +158,17 @@ describe("Node", [&]() {
AssertThat(ts_node_parent(number_node), Equals(root_node));
AssertThat(ts_node_parent(false_node), Equals(root_node));
AssertThat(ts_node_parent(object_node), Equals(root_node));
AssertThat(ts_node_parent(ts_document_root_node(document)).subtree, Equals<void *>(nullptr));
AssertThat(ts_node_parent(ts_tree_root_node(tree)).subtree, Equals<void *>(nullptr));
});
it("works correctly when the node contains aliased children and extras", [&]() {
TSCompileResult compile_result = ts_compile_grammar(grammar_with_aliases_and_extras.c_str());
const TSLanguage *language = load_test_language("aliases_and_extras", compile_result);
ts_document_set_language(document, language);
ts_document_set_input_string(document, "b ... b ... b");
ts_document_parse(document);
root_node = ts_document_root_node(document);
ts_parser_set_language(parser, language);
ts_tree_delete(tree);
tree = ts_parser_parse_string(parser, nullptr, "b ... b ... b", 13);
root_node = ts_tree_root_node(tree);
char *node_string = ts_node_string(root_node);
AssertThat(node_string, Equals("(a (b) (comment) (B) (comment) (b))"));
@ -179,7 +181,10 @@ describe("Node", [&]() {
AssertThat(ts_node_type(ts_node_named_child(root_node, 3)), Equals("comment"));
AssertThat(ts_node_type(ts_node_named_child(root_node, 4)), Equals("b"));
AssertThat(ts_node_symbol(ts_node_named_child(root_node, 0)), !Equals(ts_node_symbol(ts_node_named_child(root_node, 2))));
AssertThat(
ts_node_symbol(ts_node_named_child(root_node, 0)),
!Equals(ts_node_symbol(ts_node_named_child(root_node, 2)))
);
});
});
@ -323,7 +328,7 @@ describe("Node", [&]() {
AssertThat(ts_node_parent(child5), Equals(root_node));
AssertThat(ts_node_parent(child6), Equals(root_node));
AssertThat(ts_node_parent(child7), Equals(root_node));
AssertThat(ts_node_parent(ts_document_root_node(document)).subtree, Equals<void *>(nullptr));
AssertThat(ts_node_parent(ts_tree_root_node(tree)).subtree, Equals<void *>(nullptr));
});
});
@ -483,9 +488,10 @@ describe("Node", [&]() {
it("works in the presence of multi-byte characters", [&]() {
string input_string = "[\"αβγδ\", \"αβγδ\"]";
ts_document_set_input_string(document, input_string.c_str());
ts_document_parse(document);
TSNode root_node = ts_document_root_node(document);
ts_tree_delete(tree);
tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size());
TSNode root_node = ts_tree_root_node(tree);
uint32_t comma_position = input_string.find(",");
TSNode node1 = ts_node_descendant_for_byte_range(root_node, comma_position, comma_position);
@ -518,23 +524,23 @@ describe("Node", [&]() {
});
describe("TreeCursor", [&]() {
TSDocument *document;
TSParser *parser;
TSTree *tree;
TSTreeCursor *cursor;
before_each([&]() {
record_alloc::start();
document = ts_document_new();
ts_document_set_language(document, load_real_language("json"));
ts_document_set_input_string(document, json_string.c_str());
ts_document_parse(document);
cursor = ts_document_tree_cursor(document);
parser = ts_parser_new();
ts_parser_set_language(parser, load_real_language("json"));
tree = ts_parser_parse_string(parser, nullptr, json_string.c_str(), json_string.size());
cursor = ts_tree_cursor_new(tree);
});
after_each([&]() {
ts_tree_delete(tree);
ts_tree_cursor_delete(cursor);
ts_document_free(document);
ts_parser_delete(parser);
record_alloc::stop();
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());

View file

@ -1,17 +1,20 @@
#include "test_helper.h"
#include "runtime/alloc.h"
#include "runtime/language.h"
#include "helpers/record_alloc.h"
#include "helpers/spy_input.h"
#include "helpers/load_language.h"
#include "helpers/record_alloc.h"
#include "helpers/point_helpers.h"
#include "helpers/spy_logger.h"
#include "helpers/stderr_logger.h"
#include "helpers/dedent.h"
START_TEST
describe("Parser", [&]() {
TSDocument *document;
TSParser *parser;
TSTree *tree;
SpyInput *input;
TSNode root;
size_t chunk_size;
@ -21,14 +24,16 @@ describe("Parser", [&]() {
chunk_size = 3;
input = nullptr;
document = ts_document_new();
tree = nullptr;
parser = ts_parser_new();
if (getenv("TREE_SITTER_ENABLE_DEBUG_GRAPHS")) {
ts_document_print_debugging_graphs(document, true);
ts_parser_print_debugging_graphs(parser, true);
}
});
after_each([&]() {
if (document) ts_document_free(document);
if (parser) ts_parser_delete(parser);
if (tree) ts_tree_delete(tree);
if (input) delete input;
record_alloc::stop();
@ -37,10 +42,8 @@ describe("Parser", [&]() {
auto set_text = [&](string text) {
input = new SpyInput(text, chunk_size);
ts_document_set_input(document, input->input());
ts_document_parse(document);
root = ts_document_root_node(document);
tree = ts_parser_parse(parser, nullptr, input->input());
root = ts_tree_root_node(tree);
AssertThat(ts_node_end_byte(root), Equals(text.size()));
input->clear();
};
@ -48,10 +51,13 @@ describe("Parser", [&]() {
auto replace_text = [&](size_t position, size_t length, string new_text) {
size_t prev_size = ts_node_end_byte(root);
ts_document_edit(document, input->replace(position, length, new_text));
ts_document_parse(document);
TSInputEdit edit = input->replace(position, length, new_text);
ts_tree_edit(tree, &edit);
TSTree *new_tree = ts_parser_parse(parser, tree, input->input());
ts_tree_delete(tree);
tree = new_tree;
root = ts_document_root_node(document);
root = ts_tree_root_node(tree);
size_t new_size = ts_node_end_byte(root);
AssertThat(new_size, Equals(prev_size - length + new_text.size()));
};
@ -65,12 +71,15 @@ describe("Parser", [&]() {
};
auto undo = [&]() {
ts_document_edit(document, input->undo());
ts_document_parse(document);
TSInputEdit edit = input->undo();
ts_tree_edit(tree, &edit);
TSTree *new_tree = ts_parser_parse(parser, tree, input->input());
ts_tree_delete(tree);
tree = new_tree;
};
auto assert_root_node = [&](const string &expected) {
TSNode node = ts_document_root_node(document);
TSNode node = ts_tree_root_node(tree);
char *node_string = ts_node_string(node);
string actual(node_string);
ts_free(node_string);
@ -86,11 +95,9 @@ describe("Parser", [&]() {
describe("handling errors", [&]() {
describe("when there is an invalid substring right before a valid token", [&]() {
it("computes the error node's size and position correctly", [&]() {
ts_document_set_language(document, load_real_language("json"));
ts_parser_set_language(parser, load_real_language("json"));
set_text(" [123, @@@@@, true]");
assert_root_node(
"(value (array (number) (ERROR (UNEXPECTED '@')) (true)))");
assert_root_node("(value (array (number) (ERROR (UNEXPECTED '@')) (true)))");
TSNode error = ts_node_named_child(ts_node_child(root, 0), 1);
AssertThat(ts_node_type(error), Equals("ERROR"));
@ -111,7 +118,7 @@ describe("Parser", [&]() {
describe("when there is an unexpected string in the middle of a token", [&]() {
it("computes the error node's size and position correctly", [&]() {
ts_document_set_language(document, load_real_language("json"));
ts_parser_set_language(parser, load_real_language("json"));
set_text(" [123, faaaaalse, true]");
assert_root_node(
@ -138,11 +145,10 @@ describe("Parser", [&]() {
describe("when there is one unexpected token between two valid tokens", [&]() {
it("computes the error node's size and position correctly", [&]() {
ts_document_set_language(document, load_real_language("json"));
ts_parser_set_language(parser, load_real_language("json"));
set_text(" [123, true false, true]");
assert_root_node(
"(value (array (number) (true) (ERROR (false)) (true)))");
assert_root_node("(value (array (number) (true) (ERROR (false)) (true)))");
TSNode error = ts_node_named_child(ts_node_child(root, 0), 2);
AssertThat(ts_node_type(error), Equals("ERROR"));
@ -157,26 +163,23 @@ describe("Parser", [&]() {
describe("when there is an unexpected string at the end of a token", [&]() {
it("computes the error's size and position correctly", [&]() {
ts_document_set_language(document, load_real_language("json"));
ts_parser_set_language(parser, load_real_language("json"));
set_text(" [123, \"hi\n, true]");
assert_root_node(
"(value (array (number) (ERROR (UNEXPECTED '\\n')) (true)))");
assert_root_node("(value (array (number) (ERROR (UNEXPECTED '\\n')) (true)))");
});
});
describe("when there is an unterminated error", [&]() {
it("maintains a consistent tree", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("a; ' this string never ends");
assert_root_node(
"(program (expression_statement (identifier)) (ERROR (UNEXPECTED EOF)))");
assert_root_node("(program (expression_statement (identifier)) (ERROR (UNEXPECTED EOF)))");
});
});
describe("when there are extra tokens at the end of the viable prefix", [&]() {
it("does not include them in the error node", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text(
"var x;\n"
"\n"
@ -196,20 +199,64 @@ describe("Parser", [&]() {
char *string = (char *)malloc(1);
string[0] = '\xdf';
ts_document_set_language(document, load_real_language("json"));
ts_document_set_input_string_with_length(document, string, 1);
ts_document_parse(document);
ts_parser_set_language(parser, load_real_language("json"));
tree = ts_parser_parse_string(parser, nullptr, string, 1);
free(string);
assert_root_node("(ERROR (UNEXPECTED INVALID))");
});
describe("when halt_on_error is set to true", [&]() {
it("halts as soon as an error is found if the halt_on_error flag is set", [&]() {
string input_string = "[1, null, error, 3]";
ts_parser_set_language(parser, load_real_language("json"));
tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size());
root = ts_tree_root_node(tree);
assert_root_node("(value (array (number) (null) (ERROR (UNEXPECTED 'e')) (number)))");
ts_parser_halt_on_error(parser, true);
ts_tree_delete(tree);
tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size());
root = ts_tree_root_node(tree);
assert_root_node("(ERROR (number) (null))");
AssertThat(ts_node_end_byte(root), Equals(input_string.size()));
});
it("does not insert missing tokens if the halt_on_error flag is set", [&]() {
string input_string = "[1, null, 3";
ts_parser_set_language(parser, load_real_language("json"));
tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size());
root = ts_tree_root_node(tree);
assert_root_node("(value (array (number) (null) (number) (MISSING)))");
ts_parser_halt_on_error(parser, true);
ts_tree_delete(tree);
tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size());
root = ts_tree_root_node(tree);
assert_root_node("(ERROR (number) (null) (number))");
AssertThat(ts_node_end_byte(root), Equals(input_string.size()));
});
it("can parse valid code with the halt_on_error flag set", [&]() {
string input_string = "[1, null, 3]";
ts_parser_set_language(parser, load_real_language("json"));
ts_parser_halt_on_error(parser, true);
tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size());
root = ts_tree_root_node(tree);
assert_root_node("(value (array (number) (null) (number)))");
});
});
});
describe("editing", [&]() {
describe("creating new tokens near the end of the input", [&]() {
it("updates the parse tree and re-reads only the changed portion of the text", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("x * (100 + abc);");
assert_root_node(
@ -242,7 +289,7 @@ describe("Parser", [&]() {
it("updates the parse tree and re-reads only the changed portion of the input", [&]() {
chunk_size = 2;
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("123 + 456 * (10 + x);");
assert_root_node(
@ -268,7 +315,7 @@ describe("Parser", [&]() {
describe("introducing an error", [&]() {
it("gives the error the right size", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("var x = y;");
assert_root_node(
@ -291,7 +338,7 @@ describe("Parser", [&]() {
describe("into the middle of an existing token", [&]() {
it("updates the parse tree", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("abc * 123;");
assert_root_node(
@ -310,7 +357,7 @@ describe("Parser", [&]() {
describe("at the end of an existing token", [&]() {
it("updates the parse tree", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("abc * 123;");
assert_root_node(
@ -329,7 +376,7 @@ describe("Parser", [&]() {
describe("inserting text into a node containing a extra token", [&]() {
it("updates the parse tree", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("123 *\n"
"// a-comment\n"
"abc;");
@ -356,7 +403,7 @@ describe("Parser", [&]() {
describe("when a critical token is removed", [&]() {
it("updates the parse tree, creating an error", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("123 * 456; 789 * 123;");
assert_root_node(
@ -376,7 +423,7 @@ describe("Parser", [&]() {
describe("with external tokens", [&]() {
it("maintains the external scanner's state during incremental parsing", [&]() {
ts_document_set_language(document, load_real_language("python"));
ts_parser_set_language(parser, load_real_language("python"));
string text = dedent(R"PYTHON(
if a:
print b
@ -404,7 +451,7 @@ describe("Parser", [&]() {
});
it("does not try to reuse nodes that are within the edited region", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("{ x: (b.c) };");
assert_root_node(
@ -417,23 +464,12 @@ describe("Parser", [&]() {
"(program (expression_statement (object (pair "
"(property_identifier) (member_expression (identifier) (property_identifier))))))");
});
it("updates the document's parse count", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
AssertThat(ts_document_parse_count(document), Equals<size_t>(0));
set_text("{ x: (b.c) };");
AssertThat(ts_document_parse_count(document), Equals<size_t>(1));
insert_text(strlen("{ x"), "yz");
AssertThat(ts_document_parse_count(document), Equals<size_t>(2));
});
});
describe("lexing", [&]() {
describe("handling tokens containing wildcard patterns (e.g. comments)", [&]() {
it("terminates them at the end of the document", [&]() {
ts_document_set_language(document, load_real_language("javascript"));
it("terminates them at the end of the string", [&]() {
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("x; // this is a comment");
assert_root_node(
@ -448,7 +484,7 @@ describe("Parser", [&]() {
it("recognizes UTF8 characters as single characters", [&]() {
// 'ΩΩΩ — ΔΔ';
ts_document_set_language(document, load_real_language("javascript"));
ts_parser_set_language(parser, load_real_language("javascript"));
set_text("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';");
assert_root_node(
@ -460,14 +496,120 @@ describe("Parser", [&]() {
it("handles non-UTF8 characters", [&]() {
const char *string = "cons\xeb\x00e=ls\x83l6hi');\x0a";
ts_document_set_language(document, load_real_language("javascript"));
ts_document_set_input_string(document, string);
ts_document_parse(document);
TSNode root = ts_document_root_node(document);
ts_parser_set_language(parser, load_real_language("javascript"));
tree = ts_parser_parse_string(parser, nullptr, string, strlen(string));
TSNode root = ts_tree_root_node(tree);
AssertThat(ts_node_end_byte(root), Equals(strlen(string)));
});
});
describe("handling TSInputs", [&]() {
SpyInput *spy_input;
before_each([&]() {
spy_input = new SpyInput("{\"key\": [null, 2]}", 3);
ts_parser_set_language(parser, load_real_language("json"));
});
after_each([&]() {
delete spy_input;
});
it("handles UTF16 encodings", [&]() {
const char16_t content[] = u"[true, false]";
spy_input->content = string((const char *)content, sizeof(content));
spy_input->encoding = TSInputEncodingUTF16;
tree = ts_parser_parse(parser, nullptr, spy_input->input());
root = ts_tree_root_node(tree);
assert_root_node(
"(value (array (true) (false)))");
});
it("handles truncated UTF16 data", [&]() {
const char content[1] = { '\0' };
spy_input->content = string(content, sizeof(content));
spy_input->encoding = TSInputEncodingUTF16;
tree = ts_parser_parse(parser, nullptr, spy_input->input());
});
it("measures columns in bytes", [&]() {
const char16_t content[] = u"[true, false]";
spy_input->content = string((const char *)content, sizeof(content));
spy_input->encoding = TSInputEncodingUTF16;
tree = ts_parser_parse(parser, nullptr, spy_input->input());
root = ts_tree_root_node(tree);
AssertThat(ts_node_end_point(root), Equals<TSPoint>({0, 28}));
});
});
describe("set_language(language)", [&]() {
string input_string = "{\"key\": [1, 2]}\n";
it("uses the given language for future parses", [&]() {
ts_parser_set_language(parser, load_real_language("json"));
tree = ts_parser_parse_string(parser, nullptr, input_string.c_str(), input_string.size());
root = ts_tree_root_node(tree);
assert_root_node(
"(value (object (pair (string) (array (number) (number)))))");
});
it("does not allow setting a language with a different version number", [&]() {
TSLanguage language = *load_real_language("json");
AssertThat(ts_language_version(&language), Equals<uint32_t>(TREE_SITTER_LANGUAGE_VERSION));
language.version++;
AssertThat(ts_language_version(&language), !Equals<uint32_t>(TREE_SITTER_LANGUAGE_VERSION));
AssertThat(ts_parser_set_language(parser, &language), IsFalse());
AssertThat(ts_parser_language(parser), Equals<const TSLanguage *>(nullptr));
});
});
describe("set_logger(TSLogger)", [&]() {
SpyLogger *logger;
before_each([&]() {
logger = new SpyLogger();
ts_parser_set_language(parser, load_real_language("json"));
});
after_each([&]() {
delete logger;
});
it("calls the debugger with a message for each parse action", [&]() {
ts_parser_set_logger(parser, logger->logger());
tree = ts_parser_parse_string(parser, nullptr, "[ 1, 2, 3 ]", 11);
AssertThat(logger->messages, Contains("new_parse"));
AssertThat(logger->messages, Contains("skip character:' '"));
AssertThat(logger->messages, Contains("consume character:'['"));
AssertThat(logger->messages, Contains("consume character:'1'"));
AssertThat(logger->messages, Contains("reduce sym:array, child_count:4"));
AssertThat(logger->messages, Contains("accept"));
});
it("allows the debugger to be retrieved later", [&]() {
ts_parser_set_logger(parser, logger->logger());
AssertThat(ts_parser_logger(parser).payload, Equals(logger));
});
describe("disabling debugging", [&]() {
before_each([&]() {
ts_parser_set_logger(parser, logger->logger());
ts_parser_set_logger(parser, {NULL, NULL});
});
it("does not call the debugger any more", [&]() {
tree = ts_parser_parse_string(parser, nullptr, "{}", 2);
AssertThat(logger->messages, IsEmpty());
});
});
});
});
END_TEST

View file

@ -76,7 +76,7 @@ describe("Stack", [&]() {
before_each([&]() {
record_alloc::start();
ts_subtree_pool_init(&pool);
pool = ts_subtree_pool_new(10);
stack = ts_stack_new(&pool);
TSLanguage dummy_language;

View file

@ -41,7 +41,7 @@ describe("Subtree", []() {
SubtreePool pool;
before_each([&]() {
ts_subtree_pool_init(&pool);
pool = ts_subtree_pool_new(10);
});
after_each([&]() {

200
test/runtime/tree_test.cc Normal file
View file

@ -0,0 +1,200 @@
#include "test_helper.h"
#include "runtime/alloc.h"
#include "helpers/record_alloc.h"
#include "helpers/stream_methods.h"
#include "helpers/tree_helpers.h"
#include "helpers/point_helpers.h"
#include "helpers/spy_logger.h"
#include "helpers/stderr_logger.h"
#include "helpers/spy_input.h"
#include "helpers/load_language.h"
TSPoint point(uint32_t row, uint32_t column) {
TSPoint result = {row, column};
return result;
}
START_TEST
describe("Tree", [&]() {
TSParser *parser;
SpyInput *input;
TSTree *tree;
before_each([&]() {
parser = ts_parser_new();
});
after_each([&]() {
ts_parser_delete(parser);
});
auto assert_root_node = [&](const string &expected) {
TSNode node = ts_tree_root_node(tree);
char *node_string = ts_node_string(node);
string actual(node_string);
ts_free(node_string);
AssertThat(actual, Equals(expected));
};
describe("get_changed_ranges()", [&]() {
before_each([&]() {
ts_parser_set_language(parser, load_real_language("javascript"));
input = new SpyInput("{a: null};\n", 3);
tree = ts_parser_parse(parser, nullptr, input->input());
assert_root_node(
"(program (expression_statement (object (pair (property_identifier) (null)))))"
);
});
after_each([&]() {
ts_tree_delete(tree);
delete input;
});
auto get_changed_ranges_for_edit = [&](function<TSInputEdit()> fn) -> vector<TSRange> {
TSInputEdit edit = fn();
ts_tree_edit(tree, &edit);
uint32_t range_count = 0;
TSTree *new_tree = ts_parser_parse(parser, tree, input->input());
TSRange *ranges = ts_tree_get_changed_ranges(tree, new_tree, &range_count);
ts_tree_delete(tree);
tree = new_tree;
vector<TSRange> result;
for (size_t i = 0; i < range_count; i++) {
result.push_back(ranges[i]);
}
ts_free(ranges);
return result;
};
it("reports changes when one token has been updated", [&]() {
// Replace `null` with `nothing`
auto ranges = get_changed_ranges_for_edit([&]() {
return input->replace(input->content.find("ull"), 1, "othing");
});
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find("nothing")),
point(0, input->content.find("}"))
},
})));
// Replace `nothing` with `null` again
ranges = get_changed_ranges_for_edit([&]() {
return input->undo();
});
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find("null")),
point(0, input->content.find("}"))
},
})));
});
it("reports no changes when leading whitespace has changed (regression)", [&]() {
input->chars_per_chunk = 80;
// Insert leading whitespace
auto ranges = get_changed_ranges_for_edit([&]() {
return input->replace(0, 0, "\n");
});
assert_root_node(
"(program (expression_statement (object (pair (property_identifier) (null)))))"
);
AssertThat(ranges, IsEmpty());
// Remove leading whitespace
ranges = get_changed_ranges_for_edit([&]() {
return input->undo();
});
assert_root_node(
"(program (expression_statement (object (pair (property_identifier) (null)))))"
);
AssertThat(ranges, IsEmpty());
// Insert leading whitespace again
ranges = get_changed_ranges_for_edit([&]() {
return input->replace(0, 0, "\n");
});
assert_root_node(
"(program (expression_statement (object (pair (property_identifier) (null)))))"
);
AssertThat(ranges, IsEmpty());
});
it("reports changes when tokens have been appended", [&]() {
// Add a second key-value pair
auto ranges = get_changed_ranges_for_edit([&]() {
return input->replace(input->content.find("}"), 0, ", b: false");
});
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find(",")),
point(0, input->content.find("}"))
},
})));
// Add a third key-value pair in between the first two
ranges = get_changed_ranges_for_edit([&]() {
return input->replace(input->content.find(", b"), 0, ", c: 1");
});
assert_root_node(
"(program (expression_statement (object "
"(pair (property_identifier) (null)) "
"(pair (property_identifier) (number)) "
"(pair (property_identifier) (false)))))"
);
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find(", c")),
point(0, input->content.find(", b"))
},
})));
// Delete the middle pair.
ranges = get_changed_ranges_for_edit([&]() {
return input->undo();
});
assert_root_node(
"(program (expression_statement (object "
"(pair (property_identifier) (null)) "
"(pair (property_identifier) (false)))))"
);
AssertThat(ranges, IsEmpty());
// Delete the second pair.
ranges = get_changed_ranges_for_edit([&]() {
return input->undo();
});
assert_root_node(
"(program (expression_statement (object "
"(pair (property_identifier) (null)))))"
);
AssertThat(ranges, IsEmpty());
});
it("reports changes when trees have been wrapped", [&]() {
// Wrap the object in an assignment expression.
auto ranges = get_changed_ranges_for_edit([&]() {
return input->replace(input->content.find("null"), 0, "b === ");
});
assert_root_node(
"(program (expression_statement (object "
"(pair (property_identifier) (binary_expression (identifier) (null))))))"
);
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find("b ===")),
point(0, input->content.find("}"))
},
})));
});
});
});
END_TEST

View file

@ -66,12 +66,12 @@
'test/integration/fuzzing-examples.cc',
'test/integration/real_grammars.cc',
'test/integration/test_grammars.cc',
'test/runtime/document_test.cc',
'test/runtime/language_test.cc',
'test/runtime/node_test.cc',
'test/runtime/parser_test.cc',
'test/runtime/stack_test.cc',
'test/runtime/subtree_test.cc',
'test/runtime/tree_test.cc',
'test/tests.cc',
],
'cflags': [