Allow multiple top-level nodes

Now, the root node of a document is always a document node.
It will often have only one child node which corresponds to the grammar's
start symbol, but not always. Currently, it may have more than one child
if there are ubiquitous tokens such as comments at the beginning of the
document. In the future, it will also be possible be possible to have multiple
for the document to have multiple children if the document is partially parsed.
This commit is contained in:
Max Brunsfeld 2014-08-08 23:58:59 -07:00
parent 9302080aa6
commit 1e79ed794b
17 changed files with 78 additions and 39 deletions

View file

@ -1,10 +1,10 @@
#include "tree_sitter/parser.h"
#define STATE_COUNT 32
#define SYMBOL_COUNT 18
#define SYMBOL_COUNT 19
enum {
ts_sym_expression = ts_start_sym,
ts_sym_expression = ts_builtin_sym_start,
ts_sym_sum,
ts_sym_difference,
ts_sym_product,
@ -23,6 +23,7 @@ enum {
};
SYMBOL_NAMES = {
[ts_builtin_sym_document] = "DOCUMENT",
[ts_sym_expression] = "expression",
[ts_sym_sum] = "sum",
[ts_sym_difference] = "difference",

View file

@ -1,10 +1,10 @@
#include "tree_sitter/parser.h"
#define STATE_COUNT 372
#define SYMBOL_COUNT 87
#define SYMBOL_COUNT 88
enum {
ts_sym_program = ts_start_sym,
ts_sym_program = ts_builtin_sym_start,
ts_sym_package_directive,
ts_sym_imports_block,
ts_sym_package_import,
@ -92,6 +92,7 @@ enum {
};
SYMBOL_NAMES = {
[ts_builtin_sym_document] = "DOCUMENT",
[ts_sym_program] = "program",
[ts_sym_package_directive] = "package_directive",
[ts_sym_imports_block] = "imports_block",

View file

@ -1,10 +1,10 @@
#include "tree_sitter/parser.h"
#define STATE_COUNT 2212
#define SYMBOL_COUNT 109
#define SYMBOL_COUNT 110
enum {
ts_sym_program = ts_start_sym,
ts_sym_program = ts_builtin_sym_start,
ts_sym_statement,
ts_sym_statement_block,
ts_sym_for_statement,
@ -114,6 +114,7 @@ enum {
};
SYMBOL_NAMES = {
[ts_builtin_sym_document] = "DOCUMENT",
[ts_sym_program] = "program",
[ts_sym_statement] = "statement",
[ts_sym_statement_block] = "statement_block",

View file

@ -1,10 +1,10 @@
#include "tree_sitter/parser.h"
#define STATE_COUNT 60
#define SYMBOL_COUNT 18
#define SYMBOL_COUNT 19
enum {
ts_sym_value = ts_start_sym,
ts_sym_value = ts_builtin_sym_start,
ts_sym_object,
ts_sym_array,
ts_sym_string,
@ -23,6 +23,7 @@ enum {
};
SYMBOL_NAMES = {
[ts_builtin_sym_document] = "DOCUMENT",
[ts_sym_value] = "value",
[ts_sym_object] = "object",
[ts_sym_array] = "array",
@ -319,6 +320,7 @@ LEX_FN() {
ADVANCE(27);
LEX_ERROR();
case ts_lex_state_error:
START_TOKEN();
if (lookahead == 0)
ADVANCE(25);
if (('\t' <= lookahead && lookahead <= '\n') ||

View file

@ -50,7 +50,8 @@ TSNode *ts_document_root_node(const TSDocument *);
#define ts_builtin_sym_error 0
#define ts_builtin_sym_end 1
#define ts_start_sym 2
#define ts_builtin_sym_document 2
#define ts_builtin_sym_start 3
#ifdef __cplusplus
}

View file

@ -41,7 +41,7 @@ describe("Document", [&]() {
it("parses the document", [&]() {
AssertThat(string(ts_node_string(ts_document_root_node(doc))), Equals(
"(object (string) (array (number) (number)))"));
"(DOCUMENT (object (string) (array (number) (number))))"));
});
});
});
@ -62,7 +62,7 @@ describe("Document", [&]() {
it("parses the input", [&]() {
AssertThat(string(ts_node_string(ts_document_root_node(doc))), Equals(
"(object (string) (array (number) (number)))"));
"(DOCUMENT (object (string) (array (number) (number))))"));
});
it("reads the entire input", [&]() {
@ -82,7 +82,7 @@ describe("Document", [&]() {
it("updates the parse tree", [&]() {
AssertThat(string(ts_node_string(ts_document_root_node(doc))), Equals(
"(object (string) (array (number) (number)) (string) (number))"));
"(DOCUMENT (object (string) (array (number) (number)) (string) (number)))"));
});
it("re-reads only the changed portion of the input", [&]() {
@ -102,7 +102,7 @@ describe("Document", [&]() {
it("updates the parse tree", [&]() {
AssertThat(string(ts_node_string(ts_document_root_node(doc))), Equals(
"(object (string) (number) (string) (array (number) (number)))"));
"(DOCUMENT (object (string) (number) (string) (array (number) (number))))"));
});
it_skip("re-reads only the changed portion of the input", [&]() {

View file

@ -3,7 +3,7 @@ recovers from errors at the top level
=====================================================
x * * y
---
(ERROR '*')
(variable) (ERROR '*')
=====================================================
recovers from errors inside parenthesized expressions

View file

@ -39,7 +39,7 @@ var x = {
(statement_block (var_declaration (identifier) (identifier)))))))
==========================================
parses comments. TODO - leading comments
parses comments
==========================================
// this is the beginning of the script.
// here we go.
@ -54,6 +54,8 @@ var thing = {
}
};
---
(comment)
(comment)
(program
(var_declaration (identifier) (object
(comment)

View file

@ -3,7 +3,7 @@ recovers from top-level errors
==========================================
[}
---
(ERROR '}')
(ERROR <EOF>) (ERROR '}')
==========================================
recovers from unexpected tokens

View file

@ -29,7 +29,7 @@ describe("Languages", [&]() {
it(entry.description.c_str(), [&]() {
ts_document_set_input_string(doc, entry.input.c_str());
auto doc_string = ts_node_string(ts_document_root_node(doc));
AssertThat(doc_string, Equals(entry.tree_string.c_str()));
AssertThat(doc_string, Equals(("(DOCUMENT " + entry.tree_string + ")").c_str()));
free((void *)doc_string);
});
}

View file

@ -14,7 +14,7 @@ describe("Node", []() {
ts_document_set_input_string(document, " [12, 5, 345]");
root = ts_document_root_node(document);
AssertThat(ts_node_string(root), Equals("(array (number) (number) (number))"));
AssertThat(ts_node_string(root), Equals("(DOCUMENT (array (number) (number) (number)))"));
});
after_each([&]() {
@ -23,23 +23,28 @@ describe("Node", []() {
describe("child_count", [&]() {
it("returns the number of visible child nodes", [&]() {
AssertThat(ts_node_child_count(root), Equals<size_t>(3));
TSNode *array = ts_node_child(root, 0);
AssertThat(ts_node_child_count(array), Equals<size_t>(3));
ts_node_release(array);
});
});
describe("child(i)", [&]() {
it("returns the child node at the given index", [&]() {
TSNode *number1 = ts_node_child(root, 0);
TSNode *number2 = ts_node_child(root, 1);
TSNode *number3 = ts_node_child(root, 2);
TSNode *array = ts_node_child(root, 0);
TSNode *number1 = ts_node_child(array, 0);
TSNode *number2 = ts_node_child(array, 1);
TSNode *number3 = ts_node_child(array, 2);
AssertThat(ts_node_name(root), Equals("array"));
AssertThat(ts_node_name(array), Equals("array"));
AssertThat(ts_node_name(number1), Equals("number"));
AssertThat(ts_node_name(number2), Equals("number"));
AssertThat(ts_node_name(number3), Equals("number"));
AssertThat(ts_node_pos(root), Equals<size_t>(2));
AssertThat(ts_node_size(root), Equals<size_t>(12));
AssertThat(ts_node_pos(array), Equals<size_t>(2));
AssertThat(ts_node_size(array), Equals<size_t>(12));
AssertThat(ts_node_pos(number1), Equals<size_t>(3));
AssertThat(ts_node_size(number1), Equals<size_t>(2));
@ -50,6 +55,7 @@ describe("Node", []() {
AssertThat(ts_node_pos(number3), Equals<size_t>(10));
AssertThat(ts_node_size(number3), Equals<size_t>(3));
ts_node_release(array);
ts_node_release(number1);
ts_node_release(number2);
ts_node_release(number3);
@ -58,21 +64,28 @@ describe("Node", []() {
describe("parent", [&]() {
it("returns the node's parent node", [&]() {
TSNode *number = ts_node_child(root, 1);
AssertThat(ts_node_parent(number), Equals(root));
TSNode *array = ts_node_child(root, 0);
TSNode *number = ts_node_child(array, 1);
AssertThat(ts_node_parent(number), Equals(array));
AssertThat(ts_node_parent(array), Equals(root));
ts_node_release(array);
ts_node_release(number);
});
});
describe("next_sibling and prev_sibling", [&]() {
it("returns the node's next and previous siblings", [&]() {
TSNode *number1 = ts_node_child(root, 0);
TSNode *number2 = ts_node_child(root, 1);
TSNode *number3 = ts_node_child(root, 2);
TSNode *array = ts_node_child(root, 0);
TSNode *number1 = ts_node_child(array, 0);
TSNode *number2 = ts_node_child(array, 1);
TSNode *number3 = ts_node_child(array, 2);
AssertThat(ts_node_eq(ts_node_next_sibling(number2), number3), IsTrue());
AssertThat(ts_node_eq(ts_node_prev_sibling(number2), number1), IsTrue());
ts_node_release(array);
ts_node_release(number1);
ts_node_release(number2);
ts_node_release(number3);

View file

@ -143,6 +143,7 @@ class ParseTableBuilder {
item_set_closure(start_item, { rules::END_OF_INPUT() }, grammar));
parse_table.symbols.insert(rules::ERROR());
parse_table.symbols.insert(rules::DOCUMENT());
while (!item_sets_to_process.empty()) {
auto pair = item_sets_to_process.back();

View file

@ -77,7 +77,7 @@ class CCodeGenerator {
for (auto symbol : parse_table.symbols)
if (!symbol.is_built_in()) {
if (at_start)
line(symbol_id(symbol) + " = ts_start_sym,");
line(symbol_id(symbol) + " = ts_builtin_sym_start,");
else
line(symbol_id(symbol) + ",");
at_start = false;
@ -88,10 +88,6 @@ class CCodeGenerator {
}
void symbol_names_list() {
set<rules::Symbol> symbols(parse_table.symbols);
symbols.insert(rules::END_OF_INPUT());
symbols.insert(rules::ERROR());
line("SYMBOL_NAMES = {");
indent([&]() {
for (auto symbol : parse_table.symbols)
@ -174,8 +170,12 @@ class CCodeGenerator {
string symbol_id(const rules::Symbol &symbol) {
if (symbol.is_built_in()) {
return (symbol == rules::ERROR()) ? "ts_builtin_sym_error"
: "ts_builtin_sym_end";
if (symbol == rules::ERROR())
return "ts_builtin_sym_error";
else if (symbol == rules::END_OF_INPUT())
return "ts_builtin_sym_end";
else
return "ts_builtin_sym_document";
} else {
string name = sanitize_name(rule_name(symbol));
if (symbol.is_auxiliary())
@ -221,7 +221,12 @@ class CCodeGenerator {
string symbol_name(const rules::Symbol &symbol) {
if (symbol.is_built_in()) {
return (symbol == rules::ERROR()) ? "error" : "end";
if (symbol == rules::ERROR())
return "error";
else if (symbol == rules::END_OF_INPUT())
return "end";
else
return "DOCUMENT";
} else if (symbol.is_token() && symbol.is_auxiliary()) {
return rule_name(symbol);
} else {

View file

@ -6,6 +6,7 @@ namespace rules {
Symbol END_OF_INPUT() { return Symbol(-1, SymbolOptionToken); }
Symbol ERROR() { return Symbol(-2, SymbolOptionToken); }
Symbol START() { return Symbol(-3); }
Symbol DOCUMENT() { return Symbol(-4); }
} // namespace rules
} // namespace tree_sitter

View file

@ -9,6 +9,7 @@ namespace rules {
Symbol ERROR();
Symbol START();
Symbol END_OF_INPUT();
Symbol DOCUMENT();
} // namespace rules
} // namespace tree_sitter

View file

@ -177,7 +177,15 @@ int ts_parser_handle_error(TSParser *parser) {
}
TSTree *ts_parser_tree_root(TSParser *parser) {
return ts_stack_top_node(&parser->stack);
TSStack *stack = &parser->stack;
if (stack->size == 0)
return NULL;
TSTree *tree = ts_stack_reduce(stack, ts_builtin_sym_document,
stack->size, parser->language->hidden_symbol_flags);
tree->options = 0;
ts_stack_push(stack, 0, tree);
return tree;
}
TSParseAction ts_parser_next_action(TSParser *parser) {

View file

@ -58,6 +58,8 @@ TSTree *ts_stack_reduce(TSStack *stack, TSSymbol symbol, size_t child_count,
// The child node count is known ahead of time, but some children may be
// ubiquitous tokens, which don't count.
for (size_t i = 0; i < child_count; i++) {
if (child_count == stack->size)
break;
TSTree *child = stack->entries[stack->size - 1 - i].node;
if (ts_tree_is_extra(child))
child_count++;