Merge pull request #57 from tree-sitter/fix-error-recovery-bugs

Fix error recovery bug when error parent node contains extra tokens
This commit is contained in:
Max Brunsfeld 2017-02-07 21:11:16 -08:00 committed by GitHub
commit 819b63e78d
5 changed files with 66 additions and 24 deletions

View file

@ -2,19 +2,13 @@
GRAMMARS_DIR=$(dirname $0)/../spec/fixtures/grammars
GRAMMARS=(
javascript
json
c
cpp
python
)
fetch_grammar() {
local grammar=$1
local ref=$2
local grammar_dir=${GRAMMARS_DIR}/${grammar}
local grammar_url=https://github.com/tree-sitter/tree-sitter-${grammar}
for grammar in ${GRAMMARS[@]}; do
echo "Fetching ${grammar} grammar..."
grammar_dir=${GRAMMARS_DIR}/${grammar}
grammar_url=https://github.com/tree-sitter/tree-sitter-${grammar}
echo "Updating ${grammar} grammar..."
if [ ! -d $grammar_dir ]; then
git clone $grammar_url $grammar_dir
@ -23,6 +17,12 @@ for grammar in ${GRAMMARS[@]}; do
(
cd $grammar_dir;
git fetch origin
git reset --hard origin/master;
git reset --hard $ref;
)
done
}
fetch_grammar 'javascript' '76cd7dd5eb793db21640c725e58301bde83781f7'
fetch_grammar 'json' 'origin/master'
fetch_grammar 'c' 'origin/master'
fetch_grammar 'cpp' 'origin/master'
fetch_grammar 'python' 'origin/master'

View file

@ -14,8 +14,10 @@ e f;
(ERROR (identifier))
(identifier)
(statement_block
(expression_statement (ERROR (identifier)) (identifier))))
(expression_statement (ERROR (identifier)) (identifier)))
(ERROR (identifier))
(expression_statement (identifier))))
(ERROR (identifier))
(expression_statement (identifier)))
=======================================================
multiple invalid tokens right after the viable prefix
@ -33,7 +35,8 @@ h i j k;
(ERROR (identifier) (identifier))
(identifier)
(statement_block
(expression_statement (ERROR (identifier) (identifier) (identifier)) (identifier))))
(ERROR (identifier) (identifier) (identifier))
(expression_statement (identifier))))
(expression_statement
(ERROR (identifier) (identifier) (identifier))
(identifier)))
@ -76,6 +79,24 @@ a.b =
(member_access (identifier) (identifier)))
(ERROR))
=================================================================
An invalid token at the end of a construct with extra line breaks
=================================================================
a(
b,
c,,
);
---
(program
(expression_statement
(function_call (identifier) (arguments
(identifier)
(identifier)
(ERROR)))))
===================================================
Multi-line chained expressions in var declarations
===================================================

View file

@ -260,6 +260,7 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
LOG("skip_unrecognized_character");
skipped_error = true;
error_start_position = self->lexer.token_start_position;
error_end_position = self->lexer.token_start_position;
first_error_character = self->lexer.data.lookahead;
}
@ -596,6 +597,7 @@ static inline const TSParseAction *parser__reductions_after_sequence(
if (child_count == tree_count_below)
break;
Tree *tree = trees_below->contents[trees_below->size - 1 - i];
if (tree->extra) continue;
TSStateId next_state = ts_language_next_state(self->language, state, tree->symbol);
if (next_state == ERROR_STATE)
return NULL;
@ -607,6 +609,7 @@ static inline const TSParseAction *parser__reductions_after_sequence(
for (uint32_t i = 0; i < trees_above->size; i++) {
Tree *tree = trees_above->contents[i];
if (tree->extra) continue;
TSStateId next_state = ts_language_next_state(self->language, state, tree->symbol);
if (next_state == ERROR_STATE)
return NULL;
@ -738,7 +741,6 @@ static bool parser__repair_error(Parser *self, StackSlice slice,
ReduceAction repair = session.best_repair;
TSStateId next_state = session.best_repair_next_state;
uint32_t skip_count = session.best_repair_skip_count;
uint32_t count_below = repair.count - session.tree_count_above_error;
TSSymbol symbol = repair.symbol;
StackSlice new_slice = array_pop(&pop.slices);
@ -752,13 +754,8 @@ static bool parser__repair_error(Parser *self, StackSlice slice,
ts_stack_remove_version(self->stack, other_slice.version);
}
TreeArray skipped_children = array_new();
array_grow(&skipped_children, skip_count);
for (uint32_t i = count_below; i < children.size; i++)
array_push(&skipped_children, children.contents[i]);
TreeArray skipped_children = ts_tree_array_remove_last_n(&children, skip_count);
Tree *error = ts_tree_make_error_node(&skipped_children);
children.size = count_below;
array_push(&children, error);
for (uint32_t i = 0; i < slice.trees.size; i++)

View file

@ -61,6 +61,29 @@ uint32_t ts_tree_array_essential_count(const TreeArray *self) {
return result;
}
TreeArray ts_tree_array_remove_last_n(TreeArray *self, uint32_t remove_count) {
TreeArray result = array_new();
if (self->size == 0 || remove_count == 0) return result;
uint32_t count = 0;
uint32_t split_index = self->size - 1;
for (; split_index + 1 > 0; split_index--) {
Tree *tree = self->contents[split_index];
if (!tree->extra) {
count++;
if (count == remove_count) break;
}
}
array_grow(&result, self->size - split_index);
for (uint32_t i = split_index; i < self->size; i++) {
array_push(&result, self->contents[i]);
}
self->size = split_index;
return result;
}
Tree *ts_tree_make_error(Length size, Length padding, char lookahead_char) {
Tree *result = ts_tree_make_leaf(ts_builtin_sym_error, padding, size,
(TSSymbolMetadata){

View file

@ -68,6 +68,7 @@ typedef Array(TreePathEntry) TreePath;
bool ts_tree_array_copy(TreeArray, TreeArray *);
void ts_tree_array_delete(TreeArray *);
uint32_t ts_tree_array_essential_count(const TreeArray *);
TreeArray ts_tree_array_remove_last_n(TreeArray *, uint32_t);
Tree *ts_tree_make_leaf(TSSymbol, Length, Length, TSSymbolMetadata);
Tree *ts_tree_make_node(TSSymbol, uint32_t, Tree **, TSSymbolMetadata);