Revert "Remove the separator characters construct"

This reverts commit 5cd07648fd.

The separators construct is useful as an optimization. It turns out that
constructing a node for every chunk of whitespace in a document causes a
significant performance regression.

Conflicts:
	src/compiler/build_tables/build_lex_table.cc
	src/compiler/grammar.cc
	src/runtime/parser.c
This commit is contained in:
Max Brunsfeld 2014-09-02 07:41:29 -07:00
parent e941f8c175
commit 545e575508
43 changed files with 9065 additions and 11203 deletions

View file

@ -23,11 +23,12 @@ static int advance(TSLexer *lexer) {
static TSTree *accept(TSLexer *lexer, TSSymbol symbol, int is_hidden) {
size_t current_position = ts_lexer_position(lexer);
size_t size = current_position - lexer->token_end_position;
size_t size = current_position - lexer->token_start_position;
size_t padding = lexer->token_start_position - lexer->token_end_position;
lexer->token_end_position = current_position;
return (symbol == ts_builtin_sym_error)
? ts_tree_make_error(size, ts_lexer_lookahead_char(lexer))
: ts_tree_make_leaf(symbol, size, is_hidden);
? ts_tree_make_error(size, padding, ts_lexer_lookahead_char(lexer))
: ts_tree_make_leaf(symbol, size, padding, is_hidden);
}
/*
@ -41,6 +42,7 @@ TSLexer ts_lexer_make() {
.chunk_start = 0,
.chunk_size = 0,
.position_in_chunk = 0,
.token_start_position = 0,
.token_end_position = 0,
.advance_fn = advance,
.accept_fn = accept, };

View file

@ -16,7 +16,7 @@ TSNode *ts_node_make(const TSTree *tree, TSNode *parent, size_t index,
}
TSNode *ts_node_make_root(const TSTree *tree, const char **names) {
return ts_node_make(tree, NULL, 0, 0, names);
return ts_node_make(tree, NULL, 0, tree->padding, names);
}
void ts_node_retain(TSNode *node) { node->ref_count++; }

View file

@ -36,24 +36,16 @@ static size_t breakdown_stack(TSParser *parser, TSInputEdit *edit) {
break;
stack->size--;
position -= node->size;
DEBUG_PARSE("BREAKDOWN %s %u", parser->language->symbol_names[node->symbol],
ts_stack_top_state(stack));
position -= ts_tree_total_size(node);
for (size_t i = 0; i < child_count && position < edit->position; i++) {
TSTree *child = children[i];
TSStateId state = ts_stack_top_state(stack);
TSParseAction action = action_for(parser->language, state, child->symbol);
TSStateId next_state = (action.type == TSParseActionTypeShift)
? action.data.to_state
: state;
TSStateId next_state =
action_for(parser->language, state, child->symbol).data.to_state;
ts_stack_push(stack, next_state, child);
ts_tree_retain(child);
position += child->size;
DEBUG_PARSE("PUT_BACK %s %u",
parser->language->symbol_names[child->symbol], next_state);
position += ts_tree_total_size(child);
}
ts_tree_release(node);
@ -84,7 +76,9 @@ static void reduce(TSParser *parser, TSSymbol symbol, size_t child_count) {
* The child node count is known ahead of time, but some children
* may be ubiquitous tokens, which don't count.
*/
for (size_t i = 0; i < child_count && child_count < stack->size; i++) {
for (size_t i = 0; i < child_count; i++) {
if (child_count == stack->size)
break;
TSTree *child = stack->entries[stack->size - 1 - i].node;
if (ts_tree_is_extra(child))
child_count++;
@ -127,30 +121,15 @@ static void lex(TSParser *parser, TSStateId lex_state) {
static int handle_error(TSParser *parser) {
TSTree *error = parser->lookahead;
ts_tree_retain(error);
size_t last_token_end = parser->lexer.token_end_position;
for (;;) {
/*
* If there is no state in the stack for which we can recover with the
* current lookahead token, advance to the next token. If no characters
* were consumed, advance the lexer to the next character.
*/
size_t prev_position = ts_lexer_position(&parser->lexer);
lex(parser, ts_lex_state_error);
if (ts_lexer_position(&parser->lexer) == prev_position) {
parser->lexer.token_end_position++;
if (!ts_lexer_advance(&parser->lexer)) {
DEBUG_PARSE("FAIL TO RECOVER");
ts_stack_push(&parser->stack, 0, error);
ts_tree_release(error);
return 0;
}
}
/*
* Unwind the parse stack until a state is found in which an error is
* expected and the current lookahead token is expected afterwards.
*/
size_t error_start = last_token_end;
TS_STACK_FROM_TOP(parser->stack, entry, i) {
TSParseAction action_on_error =
action_for(parser->language, entry->state, ts_builtin_sym_error);
@ -160,25 +139,41 @@ static int handle_error(TSParser *parser) {
TSParseAction action_after_error = action_for(
parser->language, state_after_error, parser->lookahead->symbol);
if (action_after_error.type == TSParseActionTypeShift ||
action_after_error.type == TSParseActionTypeReduce) {
if (action_after_error.type != TSParseActionTypeError) {
DEBUG_PARSE("RECOVER %u", state_after_error);
error->size += ts_lexer_position(&parser->lexer) - 1 - error_start;
ts_stack_shrink(&parser->stack, i + 1);
error->size = ts_lexer_position(&parser->lexer) -
parser->lookahead->size -
ts_stack_right_position(&parser->stack);
ts_stack_push(&parser->stack, state_after_error, error);
ts_tree_release(error);
return 1;
}
}
TSTree *removed_tree = entry->node;
error_start -= ts_tree_total_size(removed_tree);
}
/*
* If there is no state in the stack for which we can recover with the
* current lookahead token, advance to the next token. If no characters
* were consumed, advance the lexer to the next character.
*/
size_t prev_position = ts_lexer_position(&parser->lexer);
lex(parser, ts_lex_state_error);
parser->lookahead->padding = 0;
if (ts_lexer_position(&parser->lexer) == prev_position)
if (!ts_lexer_advance(&parser->lexer)) {
DEBUG_PARSE("FAIL TO RECOVER");
ts_stack_push(&parser->stack, 0, error);
ts_tree_release(error);
return 0;
}
}
}
static TSTree *get_root(TSParser *parser) {
if (parser->stack.size == 0)
ts_stack_push(&parser->stack, 0, ts_tree_make_error(0, 0));
ts_stack_push(&parser->stack, 0, ts_tree_make_error(0, 0, 0));
reduce(parser, ts_builtin_sym_document, parser->stack.size);
parser->lookahead->options = 0;

View file

@ -50,26 +50,7 @@ size_t ts_stack_right_position(const TSStack *stack) {
size_t result = 0;
for (size_t i = 0; i < stack->size; i++) {
TSTree *node = stack->entries[i].node;
result += node->size;
result += ts_tree_total_size(node);
}
return result;
}
TSTree **ts_stack_pop_extras(TSStack *stack, size_t *count) {
size_t first = stack->size;
while (first > 0 && ts_tree_is_extra(stack->entries[first - 1].node))
first--;
*count = (stack->size - first);
if (*count == 0)
return NULL;
TSTree **result = malloc(*count * sizeof(TSTree *));
for (size_t i = 0; i < *count; i++) {
result[i] = stack->entries[first + i].node;
ts_tree_retain(result[i]);
}
ts_stack_shrink(stack, first - 1);
return result;
}

View file

@ -25,7 +25,6 @@ void ts_stack_push(TSStack *stack, TSStateId state, TSTree *node);
TSStateId ts_stack_top_state(const TSStack *stack);
TSTree *ts_stack_top_node(const TSStack *stack);
size_t ts_stack_right_position(const TSStack *stack);
TSTree **ts_stack_pop_extras(TSStack *, size_t *);
#define TS_STACK_FROM_TOP(stack, entry, index) \
size_t index = stack.size - 1; \

View file

@ -4,7 +4,7 @@
#include "tree_sitter/parser.h"
#include "runtime/tree.h"
TSTree *ts_tree_make_leaf(TSSymbol sym, size_t size, bool is_hidden) {
TSTree *ts_tree_make_leaf(TSSymbol sym, size_t size, size_t padding, bool is_hidden) {
TSTree *result = malloc(sizeof(TSTree));
*result = (TSTree) { .ref_count = 1,
.symbol = sym,
@ -12,12 +12,13 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, size_t size, bool is_hidden) {
.child_count = 0,
.children = NULL,
.lookahead_char = 0,
.padding = padding,
.options = is_hidden ? TSTreeOptionsHidden : 0, };
return result;
}
TSTree *ts_tree_make_error(size_t size, char lookahead_char) {
TSTree *result = ts_tree_make_leaf(ts_builtin_sym_error, size, false);
TSTree *ts_tree_make_error(size_t size, size_t padding, char lookahead_char) {
TSTree *result = ts_tree_make_leaf(ts_builtin_sym_error, size, padding, false);
result->lookahead_char = lookahead_char;
return result;
}
@ -26,14 +27,20 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count,
TSTree **children, bool is_hidden) {
/*
* Determine the new node's size and visible child count based on
* Determine the new node's size, padding and visible child count based on
* the given child nodes.
*/
size_t size = 0, visible_child_count = 0;
size_t size = 0, padding = 0, visible_child_count = 0;
for (size_t i = 0; i < child_count; i++) {
TSTree *child = children[i];
ts_tree_retain(child);
size += child->size;
if (i == 0) {
padding = child->padding;
size = child->size;
} else {
size += child->padding + child->size;
}
if (ts_tree_is_visible(child))
visible_child_count++;
@ -63,6 +70,7 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count,
.child_count = child_count,
.visible_child_count = visible_child_count,
.size = size,
.padding = padding,
.options = options };
/*
@ -73,6 +81,9 @@ TSTree *ts_tree_make_node(TSSymbol symbol, size_t child_count,
for (size_t i = 0, vis_i = 0, offset = 0; i < child_count; i++) {
TSTree *child = children[i];
if (i > 0)
offset += child->padding;
if (ts_tree_is_visible(child)) {
visible_children[vis_i].tree = child;
visible_children[vis_i].offset = offset;
@ -107,6 +118,10 @@ void ts_tree_release(TSTree *tree) {
}
}
size_t ts_tree_total_size(const TSTree *tree) {
return tree->padding + tree->size;
}
int ts_tree_equals(const TSTree *node1, const TSTree *node2) {
if (node1->symbol != node2->symbol)
return 0;

View file

@ -18,6 +18,7 @@ struct TSTree {
TSSymbol symbol;
TSTreeOptions options;
size_t ref_count;
size_t padding;
size_t size;
char lookahead_char;
size_t child_count;
@ -46,9 +47,9 @@ static inline int ts_tree_is_wrapper(const TSTree *tree) {
return (tree->options & TSTreeOptionsWrapper);
}
TSTree *ts_tree_make_leaf(TSSymbol, size_t, bool);
TSTree *ts_tree_make_leaf(TSSymbol, size_t, size_t, bool);
TSTree *ts_tree_make_node(TSSymbol, size_t, TSTree **, bool);
TSTree *ts_tree_make_error(size_t, char);
TSTree *ts_tree_make_error(size_t size, size_t padding, char lookahead_char);
void ts_tree_retain(TSTree *tree);
void ts_tree_release(TSTree *tree);
int ts_tree_equals(const TSTree *tree1, const TSTree *tree2);
@ -56,6 +57,7 @@ char *ts_tree_string(const TSTree *tree, const char **names);
char *ts_tree_error_string(const TSTree *tree, const char **names);
TSTree **ts_tree_children(const TSTree *tree, size_t *count);
TSTreeChild *ts_tree_visible_children(const TSTree *tree, size_t *count);
size_t ts_tree_total_size(const TSTree *tree);
#ifdef __cplusplus
}