Handle multiple ubiquitous in a row
This commit is contained in:
parent
a75686b017
commit
85d8c9df5c
4 changed files with 144 additions and 39 deletions
|
|
@ -2,6 +2,7 @@
|
|||
#include "runtime/helpers/spy_reader.h"
|
||||
|
||||
extern "C" const TSLanguage * ts_language_json();
|
||||
extern "C" const TSLanguage * ts_language_javascript();
|
||||
|
||||
START_TEST
|
||||
|
||||
|
|
@ -116,13 +117,13 @@ describe("Document", [&]() {
|
|||
describe("parsing", [&]() {
|
||||
TSNode *root;
|
||||
|
||||
describe("error handling", [&]() {
|
||||
describe("handling errors", [&]() {
|
||||
before_each([&]() {
|
||||
ts_document_set_language(doc, ts_language_json());
|
||||
});
|
||||
|
||||
describe("when the error occurs at the beginning of a token", [&]() {
|
||||
it("computes the error node's size and position correctly 1", [&]() {
|
||||
it("computes the error node's size and position correctly", [&]() {
|
||||
ts_document_set_input_string(doc, " [123, @@@@@, true]");
|
||||
AssertThat(ts_node_string(ts_document_root_node(doc)), Equals(
|
||||
"(DOCUMENT (array (number) (ERROR '@') (true)))"));
|
||||
|
|
@ -130,37 +131,47 @@ describe("Document", [&]() {
|
|||
root = ts_document_root_node(doc);
|
||||
TSNode *array = ts_node_child(root, 0);
|
||||
TSNode *error = ts_node_child(array, 1);
|
||||
TSNode *last = ts_node_child(array, 2);
|
||||
|
||||
AssertThat(ts_node_name(error), Equals("error"));
|
||||
AssertThat(ts_node_pos(error), Equals(string(" [123,").length()))
|
||||
AssertThat(ts_node_size(error), Equals(string(" @@@@@").length()))
|
||||
|
||||
AssertThat(ts_node_name(last), Equals("true"));
|
||||
AssertThat(ts_node_pos(last), Equals(string(" [123, @@@@@, ").length()))
|
||||
|
||||
ts_node_release(last);
|
||||
ts_node_release(error);
|
||||
ts_node_release(array);
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the error occurs in the middle of a token", [&]() {
|
||||
it("computes the error node's size and position correctly 2", [&]() {
|
||||
ts_document_set_input_string(doc, " [123, total nonsense, true]");
|
||||
it("computes the error node's size and position correctly", [&]() {
|
||||
ts_document_set_input_string(doc, " [123, faaaaalse, true]");
|
||||
AssertThat(ts_node_string(ts_document_root_node(doc)), Equals(
|
||||
"(DOCUMENT (array (number) (ERROR 'o') (true)))"));
|
||||
"(DOCUMENT (array (number) (ERROR 'a') (true)))"));
|
||||
|
||||
root = ts_document_root_node(doc);
|
||||
TSNode *array = ts_node_child(root, 0);
|
||||
TSNode *error = ts_node_child(array, 1);
|
||||
TSNode *last = ts_node_child(array, 2);
|
||||
|
||||
AssertThat(ts_node_name(error), Equals("error"));
|
||||
AssertThat(ts_node_pos(error), Equals(string(" [123,").length()))
|
||||
AssertThat(ts_node_size(error), Equals(string(" total nonsense").length()))
|
||||
AssertThat(ts_node_size(error), Equals(string(" faaaaalse").length()))
|
||||
|
||||
AssertThat(ts_node_name(last), Equals("true"));
|
||||
AssertThat(ts_node_pos(last), Equals(string(" [123, faaaaalse, ").length()))
|
||||
|
||||
ts_node_release(last);
|
||||
ts_node_release(error);
|
||||
ts_node_release(array);
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the error occurs after one or more tokens", [&]() {
|
||||
it("computes the error node's size and position correctly 3", [&]() {
|
||||
it("computes the error node's size and position correctly", [&]() {
|
||||
ts_document_set_input_string(doc, " [123, true false, true]");
|
||||
AssertThat(ts_node_string(ts_document_root_node(doc)), Equals(
|
||||
"(DOCUMENT (array (number) (ERROR 'f') (true)))"));
|
||||
|
|
@ -168,15 +179,89 @@ describe("Document", [&]() {
|
|||
root = ts_document_root_node(doc);
|
||||
TSNode *array = ts_node_child(root, 0);
|
||||
TSNode *error = ts_node_child(array, 1);
|
||||
TSNode *last = ts_node_child(array, 2);
|
||||
|
||||
AssertThat(ts_node_name(error), Equals("error"));
|
||||
AssertThat(ts_node_pos(error), Equals(string(" [123,").length()))
|
||||
AssertThat(ts_node_size(error), Equals(string(" true false").length()))
|
||||
|
||||
AssertThat(ts_node_name(last), Equals("true"));
|
||||
AssertThat(ts_node_pos(last), Equals(string(" [123, true false, ").length()))
|
||||
|
||||
ts_node_release(last);
|
||||
ts_node_release(error);
|
||||
ts_node_release(array);
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the error is an empty string", [&]() {
|
||||
it("computes the error node's size and position correctly", [&]() {
|
||||
ts_document_set_input_string(doc, " [123, , true]");
|
||||
AssertThat(ts_node_string(ts_document_root_node(doc)), Equals(
|
||||
"(DOCUMENT (array (number) (ERROR ',') (true)))"));
|
||||
|
||||
root = ts_document_root_node(doc);
|
||||
TSNode *array = ts_node_child(root, 0);
|
||||
TSNode *error = ts_node_child(array, 1);
|
||||
TSNode *last = ts_node_child(array, 2);
|
||||
|
||||
AssertThat(ts_node_name(error), Equals("error"));
|
||||
AssertThat(ts_node_pos(error), Equals(string(" [123,").length()))
|
||||
AssertThat(ts_node_size(error), Equals(string(" ").length()))
|
||||
|
||||
AssertThat(ts_node_name(last), Equals("true"));
|
||||
AssertThat(ts_node_pos(last), Equals(string(" [123, , ").length()))
|
||||
|
||||
ts_node_release(last);
|
||||
ts_node_release(error);
|
||||
ts_node_release(array);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("handling ubiquitous tokens", [&]() {
|
||||
|
||||
// In the javascript example grammar, ASI works by using newlines as
|
||||
// terminators in statements, but also as ubiquitous tokens.
|
||||
before_each([&]() {
|
||||
ts_document_set_language(doc, ts_language_javascript());
|
||||
});
|
||||
|
||||
describe("when the token appears as part of a grammar rule", [&]() {
|
||||
it("is incorporated into the tree", [&]() {
|
||||
ts_document_set_input_string(doc, "fn()\n");
|
||||
AssertThat(ts_node_string(ts_document_root_node(doc)), Equals(
|
||||
"(DOCUMENT (program (expression_statement (function_call (identifier)))))"));
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the token appears somewhere else", [&]() {
|
||||
it("is incorporated into the tree", [&]() {
|
||||
ts_document_set_input_string(doc,
|
||||
"fn()\n"
|
||||
" .otherFn();");
|
||||
AssertThat(ts_node_string(ts_document_root_node(doc)), Equals(
|
||||
"(DOCUMENT (program "
|
||||
"(expression_statement (function_call "
|
||||
"(property_access (function_call (identifier)) (identifier))))))"));
|
||||
});
|
||||
|
||||
describe("when several ubiquitous tokens appear in a row", [&]() {
|
||||
it("is incorporated into the tree", [&]() {
|
||||
ts_document_set_input_string(doc,
|
||||
"fn()\n\n"
|
||||
"// This is a comment"
|
||||
"\n\n"
|
||||
".otherFn();");
|
||||
AssertThat(ts_node_string(ts_document_root_node(doc)), Equals(
|
||||
"(DOCUMENT (program "
|
||||
"(expression_statement (function_call "
|
||||
"(property_access (function_call (identifier)) "
|
||||
"(comment) "
|
||||
"(identifier))))))"));
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -110,8 +110,15 @@ static void reduce(TSParser *parser, TSSymbol symbol, size_t child_count) {
|
|||
}
|
||||
|
||||
static int reduce_extra(TSParser *parser, TSSymbol symbol) {
|
||||
TSTree *top_node = ts_stack_top_node(&parser->stack);
|
||||
if (top_node->symbol == symbol && !ts_tree_is_extra(top_node)) {
|
||||
TSTree *last_node = NULL;
|
||||
TS_STACK_FROM_TOP(parser->stack, entry, i) {
|
||||
if (!ts_tree_is_extra(entry->node)) {
|
||||
last_node = entry->node;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (last_node && last_node->symbol == symbol) {
|
||||
reduce(parser, symbol, 1);
|
||||
ts_tree_set_extra(parser->lookahead);
|
||||
return 1;
|
||||
|
|
@ -136,28 +143,13 @@ static int handle_error(TSParser *parser) {
|
|||
|
||||
for (;;) {
|
||||
|
||||
/*
|
||||
* If there is no state in the stack for which we can recover with the
|
||||
* current lookahead token, advance to the next token. If no characters
|
||||
* were consumed, advance the lexer to the next character.
|
||||
*/
|
||||
size_t prev_position = ts_lexer_position(&parser->lexer);
|
||||
lex(parser, ts_lex_state_error);
|
||||
if (ts_lexer_position(&parser->lexer) == prev_position)
|
||||
if (!ts_lexer_advance(&parser->lexer)) {
|
||||
DEBUG_PARSE("FAIL TO RECOVER");
|
||||
ts_stack_push(&parser->stack, 0, error);
|
||||
ts_tree_release(error);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unwind the parse stack until a state is found in which an error is
|
||||
* expected and the current lookahead token is expected afterwards.
|
||||
*/
|
||||
size_t error_start_pos = last_token_end;
|
||||
for (size_t i = parser->stack.size - 1; i + 1 > 0; i--) {
|
||||
TSStateId state = parser->stack.entries[i].state;
|
||||
TS_STACK_FROM_TOP(parser->stack, entry, i) {
|
||||
TSStateId state = entry->state;
|
||||
TSParseAction action_on_error =
|
||||
actions_for_state(parser->language, state)[ts_builtin_sym_error];
|
||||
|
||||
|
|
@ -168,7 +160,8 @@ static int handle_error(TSParser *parser) {
|
|||
|
||||
if (action_after_error.type != TSParseActionTypeError) {
|
||||
DEBUG_PARSE("RECOVER %u", state_after_error);
|
||||
error->size = ts_lexer_position(&parser->lexer) - error_start_pos - 1;
|
||||
size_t current_position = ts_lexer_position(&parser->lexer);
|
||||
error->size = current_position - 1 - error_start_pos;
|
||||
ts_stack_shrink(&parser->stack, i + 1);
|
||||
ts_stack_push(&parser->stack, state_after_error, error);
|
||||
ts_tree_release(error);
|
||||
|
|
@ -176,9 +169,25 @@ static int handle_error(TSParser *parser) {
|
|||
}
|
||||
}
|
||||
|
||||
TSTree *removed_tree = parser->stack.entries[i].node;
|
||||
TSTree *removed_tree = entry->node;
|
||||
error_start_pos -= ts_tree_total_size(removed_tree);
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is no state in the stack for which we can recover with the
|
||||
* current lookahead token, advance to the next token. If no characters
|
||||
* were consumed, advance the lexer to the next character.
|
||||
*/
|
||||
size_t prev_position = ts_lexer_position(&parser->lexer);
|
||||
lex(parser, ts_lex_state_error);
|
||||
parser->lookahead->padding = 0;
|
||||
if (ts_lexer_position(&parser->lexer) == prev_position)
|
||||
if (!ts_lexer_advance(&parser->lexer)) {
|
||||
DEBUG_PARSE("FAIL TO RECOVER");
|
||||
ts_stack_push(&parser->stack, 0, error);
|
||||
ts_tree_release(error);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -256,8 +265,12 @@ const TSTree *ts_parser_parse(TSParser *parser, TSInput input,
|
|||
break;
|
||||
|
||||
case TSParseActionTypeReduceExtra:
|
||||
if (!reduce_extra(parser, action.data.symbol)) {
|
||||
DEBUG_PARSE("ERROR");
|
||||
if (!handle_error(parser))
|
||||
return get_root(parser);
|
||||
}
|
||||
DEBUG_PARSE("REDUCE EXTRA");
|
||||
reduce_extra(parser, action.data.symbol);
|
||||
break;
|
||||
|
||||
case TSParseActionTypeAccept:
|
||||
|
|
|
|||
|
|
@ -6,10 +6,10 @@ static size_t INITIAL_STACK_SIZE = 100;
|
|||
static TSStateId INITIAL_STATE = 0;
|
||||
|
||||
TSStack ts_stack_make() {
|
||||
TSStack result = {
|
||||
.entries = calloc(INITIAL_STACK_SIZE, sizeof(*result.entries)), .size = 0,
|
||||
.capacity = INITIAL_STACK_SIZE,
|
||||
};
|
||||
TSStack result = { .size = 0,
|
||||
.capacity = INITIAL_STACK_SIZE,
|
||||
.entries =
|
||||
calloc(INITIAL_STACK_SIZE, sizeof(*result.entries)), };
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -33,7 +33,8 @@ TSTree *ts_stack_top_node(const TSStack *stack) {
|
|||
void ts_stack_push(TSStack *stack, TSStateId state, TSTree *node) {
|
||||
if (stack->size == stack->capacity) {
|
||||
stack->capacity *= 2;
|
||||
stack->entries = realloc(stack->entries, stack->capacity * sizeof(*stack->entries));
|
||||
stack->entries =
|
||||
realloc(stack->entries, stack->capacity * sizeof(*stack->entries));
|
||||
}
|
||||
stack->entries[stack->size].state = state;
|
||||
stack->entries[stack->size].node = node;
|
||||
|
|
|
|||
|
|
@ -7,14 +7,15 @@ extern "C" {
|
|||
|
||||
#include "tree_sitter/parser.h"
|
||||
|
||||
typedef struct {
|
||||
TSTree *node;
|
||||
TSStateId state;
|
||||
} TSStackEntry;
|
||||
|
||||
typedef struct {
|
||||
size_t size;
|
||||
size_t capacity;
|
||||
struct {
|
||||
TSTree *node;
|
||||
TSStateId state;
|
||||
int is_extra;
|
||||
} *entries;
|
||||
TSStackEntry *entries;
|
||||
} TSStack;
|
||||
|
||||
TSStack ts_stack_make();
|
||||
|
|
@ -25,6 +26,11 @@ TSStateId ts_stack_top_state(const TSStack *stack);
|
|||
TSTree *ts_stack_top_node(const TSStack *stack);
|
||||
size_t ts_stack_right_position(const TSStack *stack);
|
||||
|
||||
#define TS_STACK_FROM_TOP(stack, entry, index) \
|
||||
size_t index = stack.size - 1; \
|
||||
for (TSStackEntry *entry = stack.entries + stack.size - 1; \
|
||||
entry >= stack.entries; entry-- && index--)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue