Handle multiple ubiquitous in a row

This commit is contained in:
Max Brunsfeld 2014-08-31 12:10:31 -07:00
parent a75686b017
commit 85d8c9df5c
4 changed files with 144 additions and 39 deletions

View file

@ -2,6 +2,7 @@
#include "runtime/helpers/spy_reader.h"
extern "C" const TSLanguage * ts_language_json();
extern "C" const TSLanguage * ts_language_javascript();
START_TEST
@ -116,13 +117,13 @@ describe("Document", [&]() {
describe("parsing", [&]() {
TSNode *root;
describe("error handling", [&]() {
describe("handling errors", [&]() {
before_each([&]() {
ts_document_set_language(doc, ts_language_json());
});
describe("when the error occurs at the beginning of a token", [&]() {
it("computes the error node's size and position correctly 1", [&]() {
it("computes the error node's size and position correctly", [&]() {
ts_document_set_input_string(doc, " [123, @@@@@, true]");
AssertThat(ts_node_string(ts_document_root_node(doc)), Equals(
"(DOCUMENT (array (number) (ERROR '@') (true)))"));
@ -130,37 +131,47 @@ describe("Document", [&]() {
root = ts_document_root_node(doc);
TSNode *array = ts_node_child(root, 0);
TSNode *error = ts_node_child(array, 1);
TSNode *last = ts_node_child(array, 2);
AssertThat(ts_node_name(error), Equals("error"));
AssertThat(ts_node_pos(error), Equals(string(" [123,").length()))
AssertThat(ts_node_size(error), Equals(string(" @@@@@").length()))
AssertThat(ts_node_name(last), Equals("true"));
AssertThat(ts_node_pos(last), Equals(string(" [123, @@@@@, ").length()))
ts_node_release(last);
ts_node_release(error);
ts_node_release(array);
});
});
describe("when the error occurs in the middle of a token", [&]() {
it("computes the error node's size and position correctly 2", [&]() {
ts_document_set_input_string(doc, " [123, total nonsense, true]");
it("computes the error node's size and position correctly", [&]() {
ts_document_set_input_string(doc, " [123, faaaaalse, true]");
AssertThat(ts_node_string(ts_document_root_node(doc)), Equals(
"(DOCUMENT (array (number) (ERROR 'o') (true)))"));
"(DOCUMENT (array (number) (ERROR 'a') (true)))"));
root = ts_document_root_node(doc);
TSNode *array = ts_node_child(root, 0);
TSNode *error = ts_node_child(array, 1);
TSNode *last = ts_node_child(array, 2);
AssertThat(ts_node_name(error), Equals("error"));
AssertThat(ts_node_pos(error), Equals(string(" [123,").length()))
AssertThat(ts_node_size(error), Equals(string(" total nonsense").length()))
AssertThat(ts_node_size(error), Equals(string(" faaaaalse").length()))
AssertThat(ts_node_name(last), Equals("true"));
AssertThat(ts_node_pos(last), Equals(string(" [123, faaaaalse, ").length()))
ts_node_release(last);
ts_node_release(error);
ts_node_release(array);
});
});
describe("when the error occurs after one or more tokens", [&]() {
it("computes the error node's size and position correctly 3", [&]() {
it("computes the error node's size and position correctly", [&]() {
ts_document_set_input_string(doc, " [123, true false, true]");
AssertThat(ts_node_string(ts_document_root_node(doc)), Equals(
"(DOCUMENT (array (number) (ERROR 'f') (true)))"));
@ -168,15 +179,89 @@ describe("Document", [&]() {
root = ts_document_root_node(doc);
TSNode *array = ts_node_child(root, 0);
TSNode *error = ts_node_child(array, 1);
TSNode *last = ts_node_child(array, 2);
AssertThat(ts_node_name(error), Equals("error"));
AssertThat(ts_node_pos(error), Equals(string(" [123,").length()))
AssertThat(ts_node_size(error), Equals(string(" true false").length()))
AssertThat(ts_node_name(last), Equals("true"));
AssertThat(ts_node_pos(last), Equals(string(" [123, true false, ").length()))
ts_node_release(last);
ts_node_release(error);
ts_node_release(array);
});
});
describe("when the error is an empty string", [&]() {
it("computes the error node's size and position correctly", [&]() {
ts_document_set_input_string(doc, " [123, , true]");
AssertThat(ts_node_string(ts_document_root_node(doc)), Equals(
"(DOCUMENT (array (number) (ERROR ',') (true)))"));
root = ts_document_root_node(doc);
TSNode *array = ts_node_child(root, 0);
TSNode *error = ts_node_child(array, 1);
TSNode *last = ts_node_child(array, 2);
AssertThat(ts_node_name(error), Equals("error"));
AssertThat(ts_node_pos(error), Equals(string(" [123,").length()))
AssertThat(ts_node_size(error), Equals(string(" ").length()))
AssertThat(ts_node_name(last), Equals("true"));
AssertThat(ts_node_pos(last), Equals(string(" [123, , ").length()))
ts_node_release(last);
ts_node_release(error);
ts_node_release(array);
});
});
});
describe("handling ubiquitous tokens", [&]() {
// In the javascript example grammar, ASI works by using newlines as
// terminators in statements, but also as ubiquitous tokens.
before_each([&]() {
ts_document_set_language(doc, ts_language_javascript());
});
describe("when the token appears as part of a grammar rule", [&]() {
it("is incorporated into the tree", [&]() {
ts_document_set_input_string(doc, "fn()\n");
AssertThat(ts_node_string(ts_document_root_node(doc)), Equals(
"(DOCUMENT (program (expression_statement (function_call (identifier)))))"));
});
});
describe("when the token appears somewhere else", [&]() {
it("is incorporated into the tree", [&]() {
ts_document_set_input_string(doc,
"fn()\n"
" .otherFn();");
AssertThat(ts_node_string(ts_document_root_node(doc)), Equals(
"(DOCUMENT (program "
"(expression_statement (function_call "
"(property_access (function_call (identifier)) (identifier))))))"));
});
describe("when several ubiquitous tokens appear in a row", [&]() {
it("is incorporated into the tree", [&]() {
ts_document_set_input_string(doc,
"fn()\n\n"
"// This is a comment"
"\n\n"
".otherFn();");
AssertThat(ts_node_string(ts_document_root_node(doc)), Equals(
"(DOCUMENT (program "
"(expression_statement (function_call "
"(property_access (function_call (identifier)) "
"(comment) "
"(identifier))))))"));
});
});
});
});
});
});

View file

@ -110,8 +110,15 @@ static void reduce(TSParser *parser, TSSymbol symbol, size_t child_count) {
}
static int reduce_extra(TSParser *parser, TSSymbol symbol) {
TSTree *top_node = ts_stack_top_node(&parser->stack);
if (top_node->symbol == symbol && !ts_tree_is_extra(top_node)) {
TSTree *last_node = NULL;
TS_STACK_FROM_TOP(parser->stack, entry, i) {
if (!ts_tree_is_extra(entry->node)) {
last_node = entry->node;
break;
}
}
if (last_node && last_node->symbol == symbol) {
reduce(parser, symbol, 1);
ts_tree_set_extra(parser->lookahead);
return 1;
@ -136,28 +143,13 @@ static int handle_error(TSParser *parser) {
for (;;) {
/*
* If there is no state in the stack for which we can recover with the
* current lookahead token, advance to the next token. If no characters
* were consumed, advance the lexer to the next character.
*/
size_t prev_position = ts_lexer_position(&parser->lexer);
lex(parser, ts_lex_state_error);
if (ts_lexer_position(&parser->lexer) == prev_position)
if (!ts_lexer_advance(&parser->lexer)) {
DEBUG_PARSE("FAIL TO RECOVER");
ts_stack_push(&parser->stack, 0, error);
ts_tree_release(error);
return 0;
}
/*
* Unwind the parse stack until a state is found in which an error is
* expected and the current lookahead token is expected afterwards.
*/
size_t error_start_pos = last_token_end;
for (size_t i = parser->stack.size - 1; i + 1 > 0; i--) {
TSStateId state = parser->stack.entries[i].state;
TS_STACK_FROM_TOP(parser->stack, entry, i) {
TSStateId state = entry->state;
TSParseAction action_on_error =
actions_for_state(parser->language, state)[ts_builtin_sym_error];
@ -168,7 +160,8 @@ static int handle_error(TSParser *parser) {
if (action_after_error.type != TSParseActionTypeError) {
DEBUG_PARSE("RECOVER %u", state_after_error);
error->size = ts_lexer_position(&parser->lexer) - error_start_pos - 1;
size_t current_position = ts_lexer_position(&parser->lexer);
error->size = current_position - 1 - error_start_pos;
ts_stack_shrink(&parser->stack, i + 1);
ts_stack_push(&parser->stack, state_after_error, error);
ts_tree_release(error);
@ -176,9 +169,25 @@ static int handle_error(TSParser *parser) {
}
}
TSTree *removed_tree = parser->stack.entries[i].node;
TSTree *removed_tree = entry->node;
error_start_pos -= ts_tree_total_size(removed_tree);
}
/*
* If there is no state in the stack for which we can recover with the
* current lookahead token, advance to the next token. If no characters
* were consumed, advance the lexer to the next character.
*/
size_t prev_position = ts_lexer_position(&parser->lexer);
lex(parser, ts_lex_state_error);
parser->lookahead->padding = 0;
if (ts_lexer_position(&parser->lexer) == prev_position)
if (!ts_lexer_advance(&parser->lexer)) {
DEBUG_PARSE("FAIL TO RECOVER");
ts_stack_push(&parser->stack, 0, error);
ts_tree_release(error);
return 0;
}
}
}
@ -256,8 +265,12 @@ const TSTree *ts_parser_parse(TSParser *parser, TSInput input,
break;
case TSParseActionTypeReduceExtra:
if (!reduce_extra(parser, action.data.symbol)) {
DEBUG_PARSE("ERROR");
if (!handle_error(parser))
return get_root(parser);
}
DEBUG_PARSE("REDUCE EXTRA");
reduce_extra(parser, action.data.symbol);
break;
case TSParseActionTypeAccept:

View file

@ -6,10 +6,10 @@ static size_t INITIAL_STACK_SIZE = 100;
static TSStateId INITIAL_STATE = 0;
TSStack ts_stack_make() {
TSStack result = {
.entries = calloc(INITIAL_STACK_SIZE, sizeof(*result.entries)), .size = 0,
.capacity = INITIAL_STACK_SIZE,
};
TSStack result = { .size = 0,
.capacity = INITIAL_STACK_SIZE,
.entries =
calloc(INITIAL_STACK_SIZE, sizeof(*result.entries)), };
return result;
}
@ -33,7 +33,8 @@ TSTree *ts_stack_top_node(const TSStack *stack) {
void ts_stack_push(TSStack *stack, TSStateId state, TSTree *node) {
if (stack->size == stack->capacity) {
stack->capacity *= 2;
stack->entries = realloc(stack->entries, stack->capacity * sizeof(*stack->entries));
stack->entries =
realloc(stack->entries, stack->capacity * sizeof(*stack->entries));
}
stack->entries[stack->size].state = state;
stack->entries[stack->size].node = node;

View file

@ -7,14 +7,15 @@ extern "C" {
#include "tree_sitter/parser.h"
typedef struct {
TSTree *node;
TSStateId state;
} TSStackEntry;
typedef struct {
size_t size;
size_t capacity;
struct {
TSTree *node;
TSStateId state;
int is_extra;
} *entries;
TSStackEntry *entries;
} TSStack;
TSStack ts_stack_make();
@ -25,6 +26,11 @@ TSStateId ts_stack_top_state(const TSStack *stack);
TSTree *ts_stack_top_node(const TSStack *stack);
size_t ts_stack_right_position(const TSStack *stack);
#define TS_STACK_FROM_TOP(stack, entry, index) \
size_t index = stack.size - 1; \
for (TSStackEntry *entry = stack.entries + stack.size - 1; \
entry >= stack.entries; entry-- && index--)
#ifdef __cplusplus
}
#endif