diff --git a/examples/parsers/arithmetic.c b/examples/parsers/arithmetic.c index 7e79db8f..e644fd75 100644 --- a/examples/parsers/arithmetic.c +++ b/examples/parsers/arithmetic.c @@ -1,10 +1,10 @@ #include "tree_sitter/parser.h" #define STATE_COUNT 32 -#define SYMBOL_COUNT 18 +#define SYMBOL_COUNT 19 enum { - ts_sym_expression = ts_start_sym, + ts_sym_expression = ts_builtin_sym_start, ts_sym_sum, ts_sym_difference, ts_sym_product, @@ -23,6 +23,7 @@ enum { }; SYMBOL_NAMES = { + [ts_builtin_sym_document] = "DOCUMENT", [ts_sym_expression] = "expression", [ts_sym_sum] = "sum", [ts_sym_difference] = "difference", diff --git a/examples/parsers/golang.c b/examples/parsers/golang.c index cbc6a529..2b171aaf 100644 --- a/examples/parsers/golang.c +++ b/examples/parsers/golang.c @@ -1,10 +1,10 @@ #include "tree_sitter/parser.h" #define STATE_COUNT 372 -#define SYMBOL_COUNT 87 +#define SYMBOL_COUNT 88 enum { - ts_sym_program = ts_start_sym, + ts_sym_program = ts_builtin_sym_start, ts_sym_package_directive, ts_sym_imports_block, ts_sym_package_import, @@ -92,6 +92,7 @@ enum { }; SYMBOL_NAMES = { + [ts_builtin_sym_document] = "DOCUMENT", [ts_sym_program] = "program", [ts_sym_package_directive] = "package_directive", [ts_sym_imports_block] = "imports_block", diff --git a/examples/parsers/javascript.c b/examples/parsers/javascript.c index bc2a6a13..06fe2a49 100644 --- a/examples/parsers/javascript.c +++ b/examples/parsers/javascript.c @@ -1,10 +1,10 @@ #include "tree_sitter/parser.h" #define STATE_COUNT 2212 -#define SYMBOL_COUNT 109 +#define SYMBOL_COUNT 110 enum { - ts_sym_program = ts_start_sym, + ts_sym_program = ts_builtin_sym_start, ts_sym_statement, ts_sym_statement_block, ts_sym_for_statement, @@ -114,6 +114,7 @@ enum { }; SYMBOL_NAMES = { + [ts_builtin_sym_document] = "DOCUMENT", [ts_sym_program] = "program", [ts_sym_statement] = "statement", [ts_sym_statement_block] = "statement_block", diff --git a/examples/parsers/json.c b/examples/parsers/json.c index aabf1c69..af4c2a42 100644 --- a/examples/parsers/json.c +++ b/examples/parsers/json.c @@ -1,10 +1,10 @@ #include "tree_sitter/parser.h" #define STATE_COUNT 60 -#define SYMBOL_COUNT 18 +#define SYMBOL_COUNT 19 enum { - ts_sym_value = ts_start_sym, + ts_sym_value = ts_builtin_sym_start, ts_sym_object, ts_sym_array, ts_sym_string, @@ -23,6 +23,7 @@ enum { }; SYMBOL_NAMES = { + [ts_builtin_sym_document] = "DOCUMENT", [ts_sym_value] = "value", [ts_sym_object] = "object", [ts_sym_array] = "array", @@ -319,6 +320,7 @@ LEX_FN() { ADVANCE(27); LEX_ERROR(); case ts_lex_state_error: + START_TOKEN(); if (lookahead == 0) ADVANCE(25); if (('\t' <= lookahead && lookahead <= '\n') || diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index ece23a10..889e2054 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -50,7 +50,8 @@ TSNode *ts_document_root_node(const TSDocument *); #define ts_builtin_sym_error 0 #define ts_builtin_sym_end 1 -#define ts_start_sym 2 +#define ts_builtin_sym_document 2 +#define ts_builtin_sym_start 3 #ifdef __cplusplus } diff --git a/spec/runtime/document_spec.cc b/spec/runtime/document_spec.cc index 795b8edf..b3a28178 100644 --- a/spec/runtime/document_spec.cc +++ b/spec/runtime/document_spec.cc @@ -41,7 +41,7 @@ describe("Document", [&]() { it("parses the document", [&]() { AssertThat(string(ts_node_string(ts_document_root_node(doc))), Equals( - "(object (string) (array (number) (number)))")); + "(DOCUMENT (object (string) (array (number) (number))))")); }); }); }); @@ -62,7 +62,7 @@ describe("Document", [&]() { it("parses the input", [&]() { AssertThat(string(ts_node_string(ts_document_root_node(doc))), Equals( - "(object (string) (array (number) (number)))")); + "(DOCUMENT (object (string) (array (number) (number))))")); }); it("reads the entire input", [&]() { @@ -82,7 +82,7 @@ describe("Document", [&]() { it("updates the parse tree", [&]() { AssertThat(string(ts_node_string(ts_document_root_node(doc))), Equals( - "(object (string) (array (number) (number)) (string) (number))")); + "(DOCUMENT (object (string) (array (number) (number)) (string) (number)))")); }); it("re-reads only the changed portion of the input", [&]() { @@ -102,7 +102,7 @@ describe("Document", [&]() { it("updates the parse tree", [&]() { AssertThat(string(ts_node_string(ts_document_root_node(doc))), Equals( - "(object (string) (number) (string) (array (number) (number)))")); + "(DOCUMENT (object (string) (number) (string) (array (number) (number))))")); }); it_skip("re-reads only the changed portion of the input", [&]() { diff --git a/spec/runtime/languages/arithmetic/errors.txt b/spec/runtime/languages/arithmetic/errors.txt index 56b0bca6..9e9319b1 100644 --- a/spec/runtime/languages/arithmetic/errors.txt +++ b/spec/runtime/languages/arithmetic/errors.txt @@ -3,7 +3,7 @@ recovers from errors at the top level ===================================================== x * * y --- -(ERROR '*') +(variable) (ERROR '*') ===================================================== recovers from errors inside parenthesized expressions diff --git a/spec/runtime/languages/javascript/literals.txt b/spec/runtime/languages/javascript/literals.txt index 41f7da00..1e9dfca2 100644 --- a/spec/runtime/languages/javascript/literals.txt +++ b/spec/runtime/languages/javascript/literals.txt @@ -39,7 +39,7 @@ var x = { (statement_block (var_declaration (identifier) (identifier))))))) ========================================== -parses comments. TODO - leading comments +parses comments ========================================== // this is the beginning of the script. // here we go. @@ -54,6 +54,8 @@ var thing = { } }; --- +(comment) +(comment) (program (var_declaration (identifier) (object (comment) diff --git a/spec/runtime/languages/json/errors.txt b/spec/runtime/languages/json/errors.txt index 567155f5..96a19a18 100644 --- a/spec/runtime/languages/json/errors.txt +++ b/spec/runtime/languages/json/errors.txt @@ -3,7 +3,7 @@ recovers from top-level errors ========================================== [} --- -(ERROR '}') +(ERROR ) (ERROR '}') ========================================== recovers from unexpected tokens diff --git a/spec/runtime/languages/language_specs.cc b/spec/runtime/languages/language_specs.cc index 1c147693..f2386283 100644 --- a/spec/runtime/languages/language_specs.cc +++ b/spec/runtime/languages/language_specs.cc @@ -29,7 +29,7 @@ describe("Languages", [&]() { it(entry.description.c_str(), [&]() { ts_document_set_input_string(doc, entry.input.c_str()); auto doc_string = ts_node_string(ts_document_root_node(doc)); - AssertThat(doc_string, Equals(entry.tree_string.c_str())); + AssertThat(doc_string, Equals(("(DOCUMENT " + entry.tree_string + ")").c_str())); free((void *)doc_string); }); } diff --git a/spec/runtime/node_spec.cc b/spec/runtime/node_spec.cc index 6cc2ce8e..99888006 100644 --- a/spec/runtime/node_spec.cc +++ b/spec/runtime/node_spec.cc @@ -14,7 +14,7 @@ describe("Node", []() { ts_document_set_input_string(document, " [12, 5, 345]"); root = ts_document_root_node(document); - AssertThat(ts_node_string(root), Equals("(array (number) (number) (number))")); + AssertThat(ts_node_string(root), Equals("(DOCUMENT (array (number) (number) (number)))")); }); after_each([&]() { @@ -23,23 +23,28 @@ describe("Node", []() { describe("child_count", [&]() { it("returns the number of visible child nodes", [&]() { - AssertThat(ts_node_child_count(root), Equals(3)); + TSNode *array = ts_node_child(root, 0); + + AssertThat(ts_node_child_count(array), Equals(3)); + + ts_node_release(array); }); }); describe("child(i)", [&]() { it("returns the child node at the given index", [&]() { - TSNode *number1 = ts_node_child(root, 0); - TSNode *number2 = ts_node_child(root, 1); - TSNode *number3 = ts_node_child(root, 2); + TSNode *array = ts_node_child(root, 0); + TSNode *number1 = ts_node_child(array, 0); + TSNode *number2 = ts_node_child(array, 1); + TSNode *number3 = ts_node_child(array, 2); - AssertThat(ts_node_name(root), Equals("array")); + AssertThat(ts_node_name(array), Equals("array")); AssertThat(ts_node_name(number1), Equals("number")); AssertThat(ts_node_name(number2), Equals("number")); AssertThat(ts_node_name(number3), Equals("number")); - AssertThat(ts_node_pos(root), Equals(2)); - AssertThat(ts_node_size(root), Equals(12)); + AssertThat(ts_node_pos(array), Equals(2)); + AssertThat(ts_node_size(array), Equals(12)); AssertThat(ts_node_pos(number1), Equals(3)); AssertThat(ts_node_size(number1), Equals(2)); @@ -50,6 +55,7 @@ describe("Node", []() { AssertThat(ts_node_pos(number3), Equals(10)); AssertThat(ts_node_size(number3), Equals(3)); + ts_node_release(array); ts_node_release(number1); ts_node_release(number2); ts_node_release(number3); @@ -58,21 +64,28 @@ describe("Node", []() { describe("parent", [&]() { it("returns the node's parent node", [&]() { - TSNode *number = ts_node_child(root, 1); - AssertThat(ts_node_parent(number), Equals(root)); + TSNode *array = ts_node_child(root, 0); + TSNode *number = ts_node_child(array, 1); + + AssertThat(ts_node_parent(number), Equals(array)); + AssertThat(ts_node_parent(array), Equals(root)); + + ts_node_release(array); ts_node_release(number); }); }); describe("next_sibling and prev_sibling", [&]() { it("returns the node's next and previous siblings", [&]() { - TSNode *number1 = ts_node_child(root, 0); - TSNode *number2 = ts_node_child(root, 1); - TSNode *number3 = ts_node_child(root, 2); + TSNode *array = ts_node_child(root, 0); + TSNode *number1 = ts_node_child(array, 0); + TSNode *number2 = ts_node_child(array, 1); + TSNode *number3 = ts_node_child(array, 2); AssertThat(ts_node_eq(ts_node_next_sibling(number2), number3), IsTrue()); AssertThat(ts_node_eq(ts_node_prev_sibling(number2), number1), IsTrue()); + ts_node_release(array); ts_node_release(number1); ts_node_release(number2); ts_node_release(number3); diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index d9118b5a..e9be8f95 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -143,6 +143,7 @@ class ParseTableBuilder { item_set_closure(start_item, { rules::END_OF_INPUT() }, grammar)); parse_table.symbols.insert(rules::ERROR()); + parse_table.symbols.insert(rules::DOCUMENT()); while (!item_sets_to_process.empty()) { auto pair = item_sets_to_process.back(); diff --git a/src/compiler/generate_code/c_code.cc b/src/compiler/generate_code/c_code.cc index d3538991..9573baa8 100644 --- a/src/compiler/generate_code/c_code.cc +++ b/src/compiler/generate_code/c_code.cc @@ -77,7 +77,7 @@ class CCodeGenerator { for (auto symbol : parse_table.symbols) if (!symbol.is_built_in()) { if (at_start) - line(symbol_id(symbol) + " = ts_start_sym,"); + line(symbol_id(symbol) + " = ts_builtin_sym_start,"); else line(symbol_id(symbol) + ","); at_start = false; @@ -88,10 +88,6 @@ class CCodeGenerator { } void symbol_names_list() { - set symbols(parse_table.symbols); - symbols.insert(rules::END_OF_INPUT()); - symbols.insert(rules::ERROR()); - line("SYMBOL_NAMES = {"); indent([&]() { for (auto symbol : parse_table.symbols) @@ -174,8 +170,12 @@ class CCodeGenerator { string symbol_id(const rules::Symbol &symbol) { if (symbol.is_built_in()) { - return (symbol == rules::ERROR()) ? "ts_builtin_sym_error" - : "ts_builtin_sym_end"; + if (symbol == rules::ERROR()) + return "ts_builtin_sym_error"; + else if (symbol == rules::END_OF_INPUT()) + return "ts_builtin_sym_end"; + else + return "ts_builtin_sym_document"; } else { string name = sanitize_name(rule_name(symbol)); if (symbol.is_auxiliary()) @@ -221,7 +221,12 @@ class CCodeGenerator { string symbol_name(const rules::Symbol &symbol) { if (symbol.is_built_in()) { - return (symbol == rules::ERROR()) ? "error" : "end"; + if (symbol == rules::ERROR()) + return "error"; + else if (symbol == rules::END_OF_INPUT()) + return "end"; + else + return "DOCUMENT"; } else if (symbol.is_token() && symbol.is_auxiliary()) { return rule_name(symbol); } else { diff --git a/src/compiler/rules/built_in_symbols.cc b/src/compiler/rules/built_in_symbols.cc index a6befc8e..7a648a3d 100644 --- a/src/compiler/rules/built_in_symbols.cc +++ b/src/compiler/rules/built_in_symbols.cc @@ -6,6 +6,7 @@ namespace rules { Symbol END_OF_INPUT() { return Symbol(-1, SymbolOptionToken); } Symbol ERROR() { return Symbol(-2, SymbolOptionToken); } Symbol START() { return Symbol(-3); } +Symbol DOCUMENT() { return Symbol(-4); } } // namespace rules } // namespace tree_sitter diff --git a/src/compiler/rules/built_in_symbols.h b/src/compiler/rules/built_in_symbols.h index 10f1f2da..63ad3df4 100644 --- a/src/compiler/rules/built_in_symbols.h +++ b/src/compiler/rules/built_in_symbols.h @@ -9,6 +9,7 @@ namespace rules { Symbol ERROR(); Symbol START(); Symbol END_OF_INPUT(); +Symbol DOCUMENT(); } // namespace rules } // namespace tree_sitter diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 3b0da1cf..3c732d49 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -177,7 +177,15 @@ int ts_parser_handle_error(TSParser *parser) { } TSTree *ts_parser_tree_root(TSParser *parser) { - return ts_stack_top_node(&parser->stack); + TSStack *stack = &parser->stack; + if (stack->size == 0) + return NULL; + + TSTree *tree = ts_stack_reduce(stack, ts_builtin_sym_document, + stack->size, parser->language->hidden_symbol_flags); + tree->options = 0; + ts_stack_push(stack, 0, tree); + return tree; } TSParseAction ts_parser_next_action(TSParser *parser) { diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 374b7243..43a20a9d 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -58,6 +58,8 @@ TSTree *ts_stack_reduce(TSStack *stack, TSSymbol symbol, size_t child_count, // The child node count is known ahead of time, but some children may be // ubiquitous tokens, which don't count. for (size_t i = 0; i < child_count; i++) { + if (child_count == stack->size) + break; TSTree *child = stack->entries[stack->size - 1 - i].node; if (ts_tree_is_extra(child)) child_count++;