Revert "Remove the separator characters construct"
This reverts commit 5cd07648fd.
The separators construct is useful as an optimization. It turns out that
constructing a node for every chunk of whitespace in a document causes a
significant performance regression.
Conflicts:
src/compiler/build_tables/build_lex_table.cc
src/compiler/grammar.cc
src/runtime/parser.c
This commit is contained in:
parent
e941f8c175
commit
545e575508
43 changed files with 9065 additions and 11203 deletions
|
|
@ -23,11 +23,12 @@ static int advance(TSLexer *lexer) {
|
|||
|
||||
static TSTree *accept(TSLexer *lexer, TSSymbol symbol, int is_hidden) {
|
||||
size_t current_position = ts_lexer_position(lexer);
|
||||
size_t size = current_position - lexer->token_end_position;
|
||||
size_t size = current_position - lexer->token_start_position;
|
||||
size_t padding = lexer->token_start_position - lexer->token_end_position;
|
||||
lexer->token_end_position = current_position;
|
||||
return (symbol == ts_builtin_sym_error)
|
||||
? ts_tree_make_error(size, ts_lexer_lookahead_char(lexer))
|
||||
: ts_tree_make_leaf(symbol, size, is_hidden);
|
||||
? ts_tree_make_error(size, padding, ts_lexer_lookahead_char(lexer))
|
||||
: ts_tree_make_leaf(symbol, size, padding, is_hidden);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -41,6 +42,7 @@ TSLexer ts_lexer_make() {
|
|||
.chunk_start = 0,
|
||||
.chunk_size = 0,
|
||||
.position_in_chunk = 0,
|
||||
.token_start_position = 0,
|
||||
.token_end_position = 0,
|
||||
.advance_fn = advance,
|
||||
.accept_fn = accept, };
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ TSNode *ts_node_make(const TSTree *tree, TSNode *parent, size_t index,
|
|||
}
|
||||
|
||||
TSNode *ts_node_make_root(const TSTree *tree, const char **names) {
|
||||
return ts_node_make(tree, NULL, 0, 0, names);
|
||||
return ts_node_make(tree, NULL, 0, tree->padding, names);
|
||||
}
|
||||
|
||||
void ts_node_retain(TSNode *node) { node->ref_count++; }
|
||||
|
|
|
|||
|
|
@ -36,24 +36,16 @@ static size_t breakdown_stack(TSParser *parser, TSInputEdit *edit) {
|
|||
break;
|
||||
|
||||
stack->size--;
|
||||
position -= node->size;
|
||||
|
||||
DEBUG_PARSE("BREAKDOWN %s %u", parser->language->symbol_names[node->symbol],
|
||||
ts_stack_top_state(stack));
|
||||
position -= ts_tree_total_size(node);
|
||||
|
||||
for (size_t i = 0; i < child_count && position < edit->position; i++) {
|
||||
TSTree *child = children[i];
|
||||
TSStateId state = ts_stack_top_state(stack);
|
||||
TSParseAction action = action_for(parser->language, state, child->symbol);
|
||||
TSStateId next_state = (action.type == TSParseActionTypeShift)
|
||||
? action.data.to_state
|
||||
: state;
|
||||
TSStateId next_state =
|
||||
action_for(parser->language, state, child->symbol).data.to_state;
|
||||
ts_stack_push(stack, next_state, child);
|
||||
ts_tree_retain(child);
|
||||
position += child->size;
|
||||
|
||||
DEBUG_PARSE("PUT_BACK %s %u",
|
||||
parser->language->symbol_names[child->symbol], next_state);
|
||||
position += ts_tree_total_size(child);
|
||||
}
|
||||
|
||||
ts_tree_release(node);
|
||||
|
|
@ -84,7 +76,9 @@ static void reduce(TSParser *parser, TSSymbol symbol, size_t child_count) {
|
|||
* The child node count is known ahead of time, but some children
|
||||
* may be ubiquitous tokens, which don't count.
|
||||
*/
|
||||
for (size_t i = 0; i < child_count && child_count < stack->size; i++) {
|
||||
for (size_t i = 0; i < child_count; i++) {
|
||||
if (child_count == stack->size)
|
||||
break;
|
||||
TSTree *child = stack->entries[stack->size - 1 - i].node;
|
||||
if (ts_tree_is_extra(child))
|
||||
child_count++;
|
||||
|
|
@ -127,30 +121,15 @@ static void lex(TSParser *parser, TSStateId lex_state) {
|
|||
static int handle_error(TSParser *parser) {
|
||||
TSTree *error = parser->lookahead;
|
||||
ts_tree_retain(error);
|
||||
size_t last_token_end = parser->lexer.token_end_position;
|
||||
|
||||
for (;;) {
|
||||
|
||||
/*
|
||||
* If there is no state in the stack for which we can recover with the
|
||||
* current lookahead token, advance to the next token. If no characters
|
||||
* were consumed, advance the lexer to the next character.
|
||||
*/
|
||||
size_t prev_position = ts_lexer_position(&parser->lexer);
|
||||
lex(parser, ts_lex_state_error);
|
||||
if (ts_lexer_position(&parser->lexer) == prev_position) {
|
||||
parser->lexer.token_end_position++;
|
||||
if (!ts_lexer_advance(&parser->lexer)) {
|
||||
DEBUG_PARSE("FAIL TO RECOVER");
|
||||
ts_stack_push(&parser->stack, 0, error);
|
||||
ts_tree_release(error);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Unwind the parse stack until a state is found in which an error is
|
||||
* expected and the current lookahead token is expected afterwards.
|
||||
*/
|
||||
size_t error_start = last_token_end;
|
||||
TS_STACK_FROM_TOP(parser->stack, entry, i) {
|
||||
TSParseAction action_on_error =
|
||||
action_for(parser->language, entry->state, ts_builtin_sym_error);
|
||||
|
|
@ -160,25 +139,41 @@ static int handle_error(TSParser *parser) {
|
|||
TSParseAction action_after_error = action_for(
|
||||
parser->language, state_after_error, parser->lookahead->symbol);
|
||||
|
||||
if (action_after_error.type == TSParseActionTypeShift ||
|
||||
action_after_error.type == TSParseActionTypeReduce) {
|
||||
if (action_after_error.type != TSParseActionTypeError) {
|
||||
DEBUG_PARSE("RECOVER %u", state_after_error);
|
||||
error->size += ts_lexer_position(&parser->lexer) - 1 - error_start;
|
||||
ts_stack_shrink(&parser->stack, i + 1);
|
||||
error->size = ts_lexer_position(&parser->lexer) -
|
||||
parser->lookahead->size -
|
||||
ts_stack_right_position(&parser->stack);
|
||||
ts_stack_push(&parser->stack, state_after_error, error);
|
||||
ts_tree_release(error);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
TSTree *removed_tree = entry->node;
|
||||
error_start -= ts_tree_total_size(removed_tree);
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is no state in the stack for which we can recover with the
|
||||
* current lookahead token, advance to the next token. If no characters
|
||||
* were consumed, advance the lexer to the next character.
|
||||
*/
|
||||
size_t prev_position = ts_lexer_position(&parser->lexer);
|
||||
lex(parser, ts_lex_state_error);
|
||||
parser->lookahead->padding = 0;
|
||||
if (ts_lexer_position(&parser->lexer) == prev_position)
|
||||
if (!ts_lexer_advance(&parser->lexer)) {
|
||||
DEBUG_PARSE("FAIL TO RECOVER");
|
||||
ts_stack_push(&parser->stack, 0, error);
|
||||
ts_tree_release(error);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static TSTree *get_root(TSParser *parser) {
|
||||
if (parser->stack.size == 0)
|
||||
ts_stack_push(&parser->stack, 0, ts_tree_make_error(0, 0));
|
||||
ts_stack_push(&parser->stack, 0, ts_tree_make_error(0, 0, 0));
|
||||
|
||||
reduce(parser, ts_builtin_sym_document, parser->stack.size);
|
||||
parser->lookahead->options = 0;
|
||||
|
|
|
|||
|
|
@ -50,26 +50,7 @@ size_t ts_stack_right_position(const TSStack *stack) {
|
|||
size_t result = 0;
|
||||
for (size_t i = 0; i < stack->size; i++) {
|
||||
TSTree *node = stack->entries[i].node;
|
||||
result += node->size;
|
||||
result += ts_tree_total_size(node);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
TSTree **ts_stack_pop_extras(TSStack *stack, size_t *count) {
|
||||
size_t first = stack->size;
|
||||
while (first > 0 && ts_tree_is_extra(stack->entries[first - 1].node))
|
||||
first--;
|
||||
|
||||
*count = (stack->size - first);
|
||||
if (*count == 0)
|
||||
return NULL;
|
||||
|
||||
TSTree **result = malloc(*count * sizeof(TSTree *));
|
||||
for (size_t i = 0; i < *count; i++) {
|
||||
result[i] = stack->entries[first + i].node;
|
||||
ts_tree_retain(result[i]);
|
||||
}
|
||||
|
||||
ts_stack_shrink(stack, first - 1);
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@ void ts_stack_push(TSStack *stack, TSStateId state, TSTree *node);
|
|||
TSStateId ts_stack_top_state(const TSStack *stack);
|
||||
TSTree *ts_stack_top_node(const TSStack *stack);
|
||||
size_t ts_stack_right_position(const TSStack *stack);
|
||||
TSTree **ts_stack_pop_extras(TSStack *, size_t *);
|
||||
|
||||
#define TS_STACK_FROM_TOP(stack, entry, index) \
|
||||
size_t index = stack.size - 1; \
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#include "tree_sitter/parser.h"
|
||||
#include "runtime/tree.h"
|
||||
|
||||
TSTree *ts_tree_make_leaf(TSSymbol sym, size_t size, bool is_hidden) {
|
||||
TSTree *ts_tree_make_leaf(TSSymbol sym, size_t size, size_t padding, bool is_hidden) {
|
||||
TSTree *result = malloc(sizeof(TSTree));
|
||||
*result = (TSTree) { .ref_count = 1,
|
||||
.symbol = sym,
|
||||
|
|
@ -12,12 +12,13 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, size_t size, bool is_hidden) {
|
|||
.child_count = 0,
|
||||
.children = NULL,
|
||||
.lookahead_char = 0,
|
||||
.padding = padding,
|
||||
.options = is_hidden ? TSTreeOptionsHidden : 0, };
|
||||
return result;
|
||||
}
|
||||
|
||||
TSTree *ts_tree_make_error(size_t size, char lookahead_char) {
|
||||
TSTree *result = ts_tree_make_leaf(ts_builtin_sym_error, size, false);
|
||||
TSTree *ts_tree_make_error(size_t size, size_t padding, char lookahead_char) {
|
||||
TSTree *result = ts_tree_make_leaf(ts_builtin_sym_error, size, padding, false);
|
||||
result->lookahead_char = lookahead_char;
|
||||
return result;
|
||||
}
|
||||
|
|
@ -26,14 +27,20 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count,
|
|||
TSTree **children, bool is_hidden) {
|
||||
|
||||
/*
|
||||
* Determine the new node's size and visible child count based on
|
||||
* Determine the new node's size, padding and visible child count based on
|
||||
* the given child nodes.
|
||||
*/
|
||||
size_t size = 0, visible_child_count = 0;
|
||||
size_t size = 0, padding = 0, visible_child_count = 0;
|
||||
for (size_t i = 0; i < child_count; i++) {
|
||||
TSTree *child = children[i];
|
||||
ts_tree_retain(child);
|
||||
size += child->size;
|
||||
|
||||
if (i == 0) {
|
||||
padding = child->padding;
|
||||
size = child->size;
|
||||
} else {
|
||||
size += child->padding + child->size;
|
||||
}
|
||||
|
||||
if (ts_tree_is_visible(child))
|
||||
visible_child_count++;
|
||||
|
|
@ -63,6 +70,7 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count,
|
|||
.child_count = child_count,
|
||||
.visible_child_count = visible_child_count,
|
||||
.size = size,
|
||||
.padding = padding,
|
||||
.options = options };
|
||||
|
||||
/*
|
||||
|
|
@ -73,6 +81,9 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count,
|
|||
for (size_t i = 0, vis_i = 0, offset = 0; i < child_count; i++) {
|
||||
TSTree *child = children[i];
|
||||
|
||||
if (i > 0)
|
||||
offset += child->padding;
|
||||
|
||||
if (ts_tree_is_visible(child)) {
|
||||
visible_children[vis_i].tree = child;
|
||||
visible_children[vis_i].offset = offset;
|
||||
|
|
@ -107,6 +118,10 @@ void ts_tree_release(TSTree *tree) {
|
|||
}
|
||||
}
|
||||
|
||||
size_t ts_tree_total_size(const TSTree *tree) {
|
||||
return tree->padding + tree->size;
|
||||
}
|
||||
|
||||
int ts_tree_equals(const TSTree *node1, const TSTree *node2) {
|
||||
if (node1->symbol != node2->symbol)
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ struct TSTree {
|
|||
TSSymbol symbol;
|
||||
TSTreeOptions options;
|
||||
size_t ref_count;
|
||||
size_t padding;
|
||||
size_t size;
|
||||
char lookahead_char;
|
||||
size_t child_count;
|
||||
|
|
@ -46,9 +47,9 @@ static inline int ts_tree_is_wrapper(const TSTree *tree) {
|
|||
return (tree->options & TSTreeOptionsWrapper);
|
||||
}
|
||||
|
||||
TSTree *ts_tree_make_leaf(TSSymbol, size_t, bool);
|
||||
TSTree *ts_tree_make_leaf(TSSymbol, size_t, size_t, bool);
|
||||
TSTree *ts_tree_make_node(TSSymbol, size_t, TSTree **, bool);
|
||||
TSTree *ts_tree_make_error(size_t, char);
|
||||
TSTree *ts_tree_make_error(size_t size, size_t padding, char lookahead_char);
|
||||
void ts_tree_retain(TSTree *tree);
|
||||
void ts_tree_release(TSTree *tree);
|
||||
int ts_tree_equals(const TSTree *tree1, const TSTree *tree2);
|
||||
|
|
@ -56,6 +57,7 @@ char *ts_tree_string(const TSTree *tree, const char **names);
|
|||
char *ts_tree_error_string(const TSTree *tree, const char **names);
|
||||
TSTree **ts_tree_children(const TSTree *tree, size_t *count);
|
||||
TSTreeChild *ts_tree_visible_children(const TSTree *tree, size_t *count);
|
||||
size_t ts_tree_total_size(const TSTree *tree);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue