Start work on re-using right side of parse tree

This commit is contained in:
Max Brunsfeld 2014-10-09 14:02:03 -07:00
parent 80b8a0a9fb
commit 4dcc712a8c
5 changed files with 119 additions and 27 deletions

View file

@ -6,6 +6,15 @@
static const char *empty_chunk = "";
static void ts_lexer_read_next_chunk(TSLexer *lexer) {
TSInput input = lexer->input;
input.seek_fn(input.data, lexer->current_position);
lexer->chunk_start = lexer->current_position.bytes;
lexer->chunk = input.read_fn(input.data, &lexer->chunk_size);
if (!lexer->chunk_size)
lexer->chunk = empty_chunk;
}
static bool advance(TSLexer *lexer) {
/*
@ -27,10 +36,7 @@ static bool advance(TSLexer *lexer) {
* the end of the current chunk.
*/
if (lexer->current_position.bytes >= lexer->chunk_start + lexer->chunk_size) {
lexer->chunk_start += lexer->chunk_size;
lexer->chunk = lexer->input.read_fn(lexer->input.data, &lexer->chunk_size);
if (!lexer->chunk_size)
lexer->chunk = empty_chunk;
ts_lexer_read_next_chunk(lexer);
}
/*
@ -76,13 +82,12 @@ TSLexer ts_lexer_make() {
}
void ts_lexer_reset(TSLexer *lexer, TSLength position) {
lexer->input.seek_fn(lexer->input.data, position);
lexer->current_position = position;
lexer->token_end_position = position;
lexer->lookahead = 0;
lexer->lookahead_size = 0;
lexer->chunk_start = position.bytes;
lexer->chunk_size = 0;
lexer->chunk = lexer->input.read_fn(lexer->input.data, &lexer->chunk_size);
lexer->token_end_position = position;
lexer->current_position = position;
ts_lexer_read_next_chunk(lexer);
ts_lexer_advance(lexer);
}

View file

@ -1,4 +1,5 @@
#include <stdio.h>
#include <stdbool.h>
#include "tree_sitter/runtime.h"
#include "tree_sitter/parser.h"
#include "runtime/tree.h"
@ -22,11 +23,53 @@ static TSParseAction action_for(const TSLanguage *lang, TSStateId state,
return (lang->parse_table + (state * lang->symbol_count))[sym];
}
static void lex(TSParser *parser, TSStateId lex_state) {
parser->lookahead = parser->language->lex_fn(&parser->lexer, lex_state);
static size_t breakdown_right_stack(TSParser *parser, TSLength cur_position,
TSStateId state) {
TSStack *stack = &parser->right_stack;
size_t position = parser->total_chars - ts_stack_total_tree_size(stack).chars;
for (;;) {
TSTree *node = ts_stack_top_node(stack);
if (!node)
break;
if (position > cur_position.chars)
break;
bool can_be_used = action_for(parser->language, state, node->symbol).type !=
TSParseActionTypeError;
if (position == cur_position.chars && can_be_used)
break;
size_t child_count;
TSTree **children = ts_tree_children(node, &child_count);
DEBUG_PARSE("POP RIGHT %s", parser->language->symbol_names[node->symbol]);
stack->size--;
position += ts_tree_total_size(node).chars;
for (size_t i = child_count - 1; i + 1 > 0; i--) {
TSTree *child = children[i];
if (position > cur_position.chars) {
DEBUG_PARSE("PUSH RIGHT %s",
parser->language->symbol_names[child->symbol]);
ts_stack_push(stack, 0, child);
position -= ts_tree_total_size(child).chars;
} else {
break;
}
}
ts_tree_release(node);
}
return position;
}
static TSLength breakdown_stack(TSParser *parser, TSInputEdit *edit) {
ts_stack_shrink(&parser->right_stack, 0);
if (!edit) {
ts_stack_shrink(&parser->stack, 0);
return ts_length_zero();
@ -34,6 +77,8 @@ static TSLength breakdown_stack(TSParser *parser, TSInputEdit *edit) {
TSStack *stack = &parser->stack;
TSLength position = ts_stack_total_tree_size(stack);
parser->total_chars =
position.chars + edit->chars_inserted - edit->chars_removed;
for (;;) {
TSTree *node = ts_stack_top_node(stack);
@ -45,29 +90,54 @@ static TSLength breakdown_stack(TSParser *parser, TSInputEdit *edit) {
if (position.chars < edit->position && !children)
break;
DEBUG_PARSE("POP %s", parser->language->symbol_names[node->symbol]);
DEBUG_PARSE("POP LEFT %s", parser->language->symbol_names[node->symbol]);
stack->size--;
position = ts_length_sub(position, ts_tree_total_size(node));
for (size_t i = 0; i < child_count && position.chars < edit->position; i++) {
size_t i = 0;
for (; i < child_count && position.chars < edit->position; i++) {
TSTree *child = children[i];
TSStateId state = ts_stack_top_state(stack);
TSParseAction action = action_for(parser->language, state, child->symbol);
TSStateId next_state =
action.type == TSParseActionTypeShift ? action.data.to_state : state;
DEBUG_PARSE("PUT BACK %s", parser->language->symbol_names[child->symbol]);
DEBUG_PARSE("PUSH LEFT %s", parser->language->symbol_names[child->symbol]);
ts_stack_push(stack, next_state, child);
position = ts_length_add(position, ts_tree_total_size(child));
}
for (size_t j = child_count - 1; j + 1 > i + 1; j--) {
TSTree *child = children[j];
DEBUG_PARSE("PUSH RIGHT %s",
parser->language->symbol_names[child->symbol]);
ts_stack_push(&parser->right_stack, 0, child);
}
ts_tree_release(node);
}
DEBUG_PARSE("RESUME %lu", position.chars);
DEBUG_PARSE("RESUME LEFT %lu", position.chars);
return position;
}
static void lex(TSParser *parser, TSStateId lex_state) {
TSStateId state = ts_stack_top_state(&parser->stack);
size_t node_position =
breakdown_right_stack(parser, parser->lexer.current_position, state);
TSTree *node = ts_stack_top_node(&parser->right_stack);
if (node && node_position == parser->lexer.current_position.chars) {
DEBUG_PARSE("REUSE %s", parser->language->symbol_names[node->symbol]);
ts_stack_shrink(&parser->right_stack, parser->right_stack.size - 1);
parser->lookahead = node;
parser->lexer.current_position =
ts_length_add(parser->lexer.current_position, ts_tree_total_size(node));
} else {
parser->lookahead = parser->language->lex_fn(&parser->lexer, lex_state);
}
}
static void resize_error(TSParser *parser, TSTree *error) {
error->size =
ts_length_sub(ts_length_sub(parser->lexer.token_start_position,
@ -199,6 +269,7 @@ static TSTree *get_root(TSParser *parser) {
TSParser ts_parser_make(const TSLanguage *language) {
return (TSParser) { .lexer = ts_lexer_make(),
.stack = ts_stack_make(),
.right_stack = ts_stack_make(),
.debug = 0,
.language = language, };
}
@ -224,7 +295,8 @@ const TSTree *ts_parser_parse(TSParser *parser, TSInput input,
TSStateId state = ts_stack_top_state(&parser->stack);
if (!parser->lookahead)
lex(parser, parser->language->lex_states[state]);
TSParseAction action = action_for(parser->language, state, parser->lookahead->symbol);
TSParseAction action =
action_for(parser->language, state, parser->lookahead->symbol);
DEBUG_PARSE("LOOKAHEAD %s",
parser->language->symbol_names[parser->lookahead->symbol]);

View file

@ -10,6 +10,8 @@ extern "C" {
typedef struct {
TSLexer lexer;
TSStack stack;
TSStack right_stack;
size_t total_chars;
int debug;
TSTree *lookahead;
TSTree *next_lookahead;