diff --git a/.travis.yml b/.travis.yml index 5a8e2be9..b37fa80d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,3 +16,7 @@ install: script: - script/ci + +cache: + directories: + - test/fixtures/grammars diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index 95da0787..638bc5bd 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -31,6 +31,7 @@ typedef struct { const char *(*read)(void *payload, uint32_t *bytes_read); int (*seek)(void *payload, uint32_t character_index, uint32_t byte_index); TSInputEncoding encoding; + bool measure_columns_in_bytes; } TSInput; typedef enum { diff --git a/script/fetch-fixtures b/script/fetch-fixtures index a829962f..7709b219 100755 --- a/script/fetch-fixtures +++ b/script/fetch-fixtures @@ -21,8 +21,8 @@ fetch_grammar() { ) } -fetch_grammar 'javascript' '76cd7dd5eb793db21640c725e58301bde83781f7' +fetch_grammar 'javascript' 'origin/master' fetch_grammar 'json' 'origin/master' fetch_grammar 'c' 'origin/master' fetch_grammar 'cpp' 'origin/master' -fetch_grammar 'python' '179cb35e5b35baeef4a37f00732ff2de15e2e8bd' +fetch_grammar 'python' 'origin/master' diff --git a/src/runtime/document.c b/src/runtime/document.c index 6bcc5fbc..64677cb4 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -26,8 +26,13 @@ void ts_document_free(TSDocument *self) { parser_destroy(&self->parser); if (self->tree) ts_tree_release(self->tree); - ts_document_set_input(self, - (TSInput){ NULL, NULL, NULL, TSInputEncodingUTF8 }); + ts_document_set_input(self, (TSInput){ + NULL, + NULL, + NULL, + TSInputEncodingUTF8, + false + }); ts_free(self); } diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index 7e0ef51f..21ce2b96 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -60,6 +60,8 @@ static void ts_lexer__advance(void *payload, bool skip) { if (self->data.lookahead == '\n') { self->current_position.extent.row++; self->current_position.extent.column = 0; + } else if (self->input.measure_columns_in_bytes) { + self->current_position.extent.column += self->lookahead_size; } else { self->current_position.extent.column++; } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index ef9cd31c..2bb86381 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -243,8 +243,14 @@ static Tree *parser__lex(Parser *self, StackVersion version) { ts_lexer_start(&self->lexer); if (self->language->external_scanner.scan(self->external_scanner_payload, &self->lexer.data, valid_external_tokens)) { - found_external_token = true; - break; + if (length_has_unknown_chars(self->lexer.token_end_position)) { + self->lexer.token_end_position = self->lexer.current_position; + } + if (lex_mode.lex_state != 0 || + self->lexer.token_end_position.bytes > current_position.bytes) { + found_external_token = true; + break; + } } ts_lexer_reset(&self->lexer, current_position); } @@ -253,6 +259,9 @@ static Tree *parser__lex(Parser *self, StackVersion version) { current_position.extent.row, current_position.extent.column); ts_lexer_start(&self->lexer); if (self->language->lex_fn(&self->lexer.data, lex_mode.lex_state)) { + if (length_has_unknown_chars(self->lexer.token_end_position)) { + self->lexer.token_end_position = self->lexer.current_position; + } break; } @@ -298,9 +307,6 @@ static Tree *parser__lex(Parser *self, StackVersion version) { symbol = self->language->external_scanner.symbol_map[symbol]; } - if (length_has_unknown_chars(self->lexer.token_end_position)) { - self->lexer.token_end_position = self->lexer.current_position; - } Length padding = length_sub(self->lexer.token_start_position, start_position); Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position); TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, symbol); @@ -910,6 +916,11 @@ static StackIterateAction parser__skip_preceding_trees_callback( void *payload, TSStateId state, TreeArray *trees, uint32_t tree_count, bool is_done, bool is_pending) { if (tree_count > 0 && state != ERROR_STATE) { + uint32_t bytes_skipped = 0; + for (uint32_t i = 0; i < trees->size; i++) { + bytes_skipped += ts_tree_total_bytes(trees->contents[i]); + } + if (bytes_skipped == 0) return StackIterateNone; SkipPrecedingTreesSession *session = payload; Parser *self = session->parser; TSSymbol lookahead_symbol = session->lookahead_symbol; diff --git a/src/runtime/string_input.c b/src/runtime/string_input.c index 3b951a10..6cbf5b2c 100644 --- a/src/runtime/string_input.c +++ b/src/runtime/string_input.c @@ -43,8 +43,9 @@ TSInput ts_string_input_make_with_length(const char *string, uint32_t length) { .read = ts_string_input_read, .seek = ts_string_input_seek, .encoding = TSInputEncodingUTF8, + .measure_columns_in_bytes = false, }; error: - return (TSInput){ NULL, NULL, NULL, TSInputEncodingUTF8 }; + return (TSInput){ NULL, NULL, NULL, TSInputEncodingUTF8, false }; } diff --git a/src/runtime/tree_path.h b/src/runtime/tree_path.h index f64dd02f..b8f3aa8b 100644 --- a/src/runtime/tree_path.h +++ b/src/runtime/tree_path.h @@ -126,6 +126,7 @@ static bool tree_must_eq(Tree *old_tree, Tree *new_tree) { return old_tree == new_tree || ( !old_tree->has_changes && old_tree->symbol == new_tree->symbol && + old_tree->symbol != ts_builtin_sym_error && old_tree->size.bytes == new_tree->size.bytes && old_tree->parse_state != TS_TREE_STATE_NONE && new_tree->parse_state != TS_TREE_STATE_NONE && diff --git a/test/fixtures/error_corpus/javascript_errors.txt b/test/fixtures/error_corpus/javascript_errors.txt index 39f54f9a..5d6a7eb0 100644 --- a/test/fixtures/error_corpus/javascript_errors.txt +++ b/test/fixtures/error_corpus/javascript_errors.txt @@ -14,10 +14,8 @@ e f; (ERROR (identifier)) (identifier) (statement_block - (ERROR (identifier)) - (expression_statement (identifier)))) - (ERROR (identifier)) - (expression_statement (identifier))) + (expression_statement (ERROR (identifier)) (identifier)))) + (expression_statement (ERROR (identifier)) (identifier))) ======================================================= multiple invalid tokens right after the viable prefix @@ -35,10 +33,11 @@ h i j k; (ERROR (identifier) (identifier)) (identifier) (statement_block - (ERROR (identifier) (identifier) (identifier)) - (expression_statement (identifier)))) + (expression_statement + (ERROR (identifier) (jsx_attribute (identifier)) (jsx_attribute (identifier))) + (identifier)))) (expression_statement - (ERROR (identifier) (identifier) (identifier)) + (ERROR (identifier) (jsx_attribute (identifier)) (jsx_attribute (identifier))) (identifier))) =================================================== @@ -53,8 +52,8 @@ if ({a: 'b'} {c: 'd'}) { (program (if_statement - (object (pair (identifier) (string))) (ERROR (object (pair (identifier) (string)))) + (object (pair (identifier) (string))) (statement_block (expression_statement (assignment (identifier) @@ -75,9 +74,7 @@ a.b = (program (comment) - (trailing_expression_statement - (member_access (identifier) (identifier))) - (ERROR)) + (ERROR (member_access (identifier) (identifier)))) ================================================================= An invalid token at the end of a construct with extra line breaks @@ -97,27 +94,6 @@ a( (identifier) (ERROR))))) -=================================================== -Multi-line chained expressions in var declarations -=================================================== - -const one = two - .three(four) - .five() - ---- - -(program - (var_declaration (var_assignment - (identifier) - (function_call - (member_access - (function_call - (member_access (identifier) (identifier)) - (arguments (identifier))) - (identifier)) - (arguments))))) - =================================================== Errors after a sequence of function declarations =================================================== @@ -146,12 +122,12 @@ var x = !!! (program (comment) - (expression_statement (function (identifier) (formal_parameters) (statement_block))) - (expression_statement (function (identifier) (formal_parameters) (statement_block))) - (expression_statement (function (identifier) (formal_parameters) (statement_block))) - (expression_statement (function (identifier) (formal_parameters) (statement_block))) - (expression_statement (function (identifier) (formal_parameters) (statement_block))) - (expression_statement (function (identifier) (formal_parameters) (statement_block))) - (expression_statement (function (identifier) (formal_parameters) (statement_block))) - (expression_statement (function (identifier) (formal_parameters) (statement_block))) - (trailing_var_declaration (identifier)) (ERROR)) + (function (identifier) (formal_parameters) (statement_block)) + (function (identifier) (formal_parameters) (statement_block)) + (function (identifier) (formal_parameters) (statement_block)) + (function (identifier) (formal_parameters) (statement_block)) + (function (identifier) (formal_parameters) (statement_block)) + (function (identifier) (formal_parameters) (statement_block)) + (function (identifier) (formal_parameters) (statement_block)) + (function (identifier) (formal_parameters) (statement_block)) + (ERROR (identifier))) diff --git a/test/helpers/load_language.cc b/test/helpers/load_language.cc index d5e70db0..d057e130 100644 --- a/test/helpers/load_language.cc +++ b/test/helpers/load_language.cc @@ -149,7 +149,10 @@ const TSLanguage *load_real_language(const string &language_name) { string parser_filename = language_dir + "/src/parser.c"; string external_scanner_filename = language_dir + "/src/scanner.cc"; if (!file_exists(external_scanner_filename)) { - external_scanner_filename = ""; + external_scanner_filename = language_dir + "/src/scanner.c"; + if (!file_exists(external_scanner_filename)) { + external_scanner_filename = ""; + } } int grammar_mtime = get_modified_time(grammar_filename); diff --git a/test/helpers/spy_input.cc b/test/helpers/spy_input.cc index 6c479c16..9edaf554 100644 --- a/test/helpers/spy_input.cc +++ b/test/helpers/spy_input.cc @@ -6,6 +6,7 @@ using std::pair; using std::string; +using std::vector; static const size_t UTF8_MAX_CHAR_SIZE = 4; @@ -16,12 +17,25 @@ SpyInput::SpyInput(string content, size_t chars_per_chunk) : byte_offset(0), content(content), encoding(TSInputEncodingUTF8), - strings_read({""}) {} + ranges_read({}) {} SpyInput::~SpyInput() { delete[] buffer; } +static void add_byte_range(vector> *ranges, + uint32_t start, uint32_t count) { + uint32_t end = start + count; + for (auto &range : *ranges) { + if (range.first <= start && start <= range.second) { + if (start < range.first) range.first = start; + if (end > range.second) range.second = end; + return; + } + } + ranges->push_back({start, end}); +} + const char * SpyInput::read(void *payload, uint32_t *bytes_read) { auto spy = static_cast(payload); @@ -36,7 +50,7 @@ const char * SpyInput::read(void *payload, uint32_t *bytes_read) { string result = spy->content.substr(spy->byte_offset, byte_count); *bytes_read = byte_count; - spy->strings_read.back() += result; + add_byte_range(&spy->ranges_read, spy->byte_offset, byte_count); spy->byte_offset += byte_count; /* @@ -54,18 +68,25 @@ const char * SpyInput::read(void *payload, uint32_t *bytes_read) { int SpyInput::seek(void *payload, uint32_t character, uint32_t byte) { auto spy = static_cast(payload); - if (spy->strings_read.size() == 0 || spy->strings_read.back().size() > 0) - spy->strings_read.push_back(""); spy->byte_offset = byte; return 0; } +vector SpyInput::strings_read() const { + vector result; + for (auto &range : ranges_read) { + result.push_back(content.substr(range.first, range.second - range.first)); + } + return result; +} + TSInput SpyInput::input() { TSInput result; result.payload = this; result.encoding = encoding; result.seek = seek; result.read = read; + result.measure_columns_in_bytes = true; return result; } @@ -129,5 +150,5 @@ pair SpyInput::swap_substr(size_t start_byte, size_t bytes_remo } void SpyInput::clear() { - strings_read.clear(); + ranges_read.clear(); } diff --git a/test/helpers/spy_input.h b/test/helpers/spy_input.h index a81213eb..9e0ee8d1 100644 --- a/test/helpers/spy_input.h +++ b/test/helpers/spy_input.h @@ -30,10 +30,11 @@ class SpyInput { void clear(); TSInputEdit replace(size_t start_char, size_t chars_removed, std::string text); TSInputEdit undo(); + std::vector strings_read() const; std::string content; TSInputEncoding encoding; - std::vector strings_read; + std::vector> ranges_read; }; #endif // HELPERS_SPY_INPUT_H_ diff --git a/test/helpers/stream_methods.cc b/test/helpers/stream_methods.cc index 91f9e87f..23a03d21 100644 --- a/test/helpers/stream_methods.cc +++ b/test/helpers/stream_methods.cc @@ -30,7 +30,7 @@ ostream &operator<<(ostream &stream, Associativity associativity) { return stream << "AssociativityLeft"; case AssociativityRight: return stream << "AssociativityRight"; - case AssociativityNone: + default: return stream << "AssociativityNone"; } } diff --git a/test/integration/real_grammars.cc b/test/integration/real_grammars.cc index d20f119b..d89c97fd 100644 --- a/test/integration/real_grammars.cc +++ b/test/integration/real_grammars.cc @@ -120,8 +120,8 @@ for (auto &language_name : test_languages) { ts_document_parse(document); }); - std::set> deletions; - std::set> insertions; + set> deletions; + set> insertions; for (size_t i = 0; i < 60; i++) { size_t edit_position = random() % utf8_char_count(entry.input); diff --git a/test/runtime/document_test.cc b/test/runtime/document_test.cc index 7757823c..6c321d75 100644 --- a/test/runtime/document_test.cc +++ b/test/runtime/document_test.cc @@ -76,11 +76,22 @@ describe("Document", [&]() { const char16_t content[] = u"[true, false]"; spy_input->content = string((const char *)content, sizeof(content)); spy_input->encoding = TSInputEncodingUTF16; - // spy_input->measure_columns_in_bytes + TSInput input = spy_input->input(); - ts_document_set_input(document, spy_input->input()); + input.measure_columns_in_bytes = false; + ts_document_set_input(document, input); ts_document_invalidate(document); ts_document_parse(document); + + TSNode root = ts_document_root_node(document); + AssertThat(ts_node_end_point(root), Equals({0, 13})); + + input.measure_columns_in_bytes = true; + ts_document_set_input(document, input); + ts_document_invalidate(document); + ts_document_parse(document); + root = ts_document_root_node(document); + AssertThat(ts_node_end_point(root), Equals({0, 26})); }); it("allows the input to be retrieved later", [&]() { @@ -94,7 +105,7 @@ describe("Document", [&]() { ts_document_set_input(document, spy_input->input()); AssertThat(ts_document_root_node(document), Equals(root)); AssertThat(ts_node_has_changes(root), IsFalse()); - AssertThat(spy_input->strings_read, Equals(vector({ "" }))); + AssertThat(spy_input->strings_read(), IsEmpty()); }); it("reads text from the new input for future parses", [&]() { @@ -113,7 +124,7 @@ describe("Document", [&]() { assert_node_string_equals( new_root, "(object (pair (string) (array (null) (number))))"); - AssertThat(spy_input->strings_read, Equals(vector({" [null, 2" }))); + AssertThat(spy_input->strings_read(), Equals(vector({" [null, 2" }))); }); it("allows setting input string with length", [&]() { diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc index e390b164..d9aee54a 100644 --- a/test/runtime/parser_test.cc +++ b/test/runtime/parser_test.cc @@ -254,7 +254,7 @@ describe("Parser", [&]() { "(identifier) " "(math_op (number) (member_access (identifier) (identifier))))))"); - AssertThat(input->strings_read, Equals(vector({ " + abc.d)" }))); + AssertThat(input->strings_read(), Equals(vector({ " abc.d);" }))); }); }); @@ -279,7 +279,7 @@ describe("Parser", [&]() { "(number) " "(math_op (number) (math_op (number) (identifier)))))))"); - AssertThat(input->strings_read, Equals(vector({ "123 || 5 +" }))); + AssertThat(input->strings_read(), Equals(vector({"123 || 5 ", ";"}))); }); }); @@ -289,19 +289,19 @@ describe("Parser", [&]() { set_text("var x = y;"); assert_root_node( - "(program (var_declaration (var_assignment " + "(program (variable_declaration (variable_declarator " "(identifier) (identifier))))"); insert_text(strlen("var x = y"), " *"); assert_root_node( - "(program (var_declaration (var_assignment " + "(program (variable_declaration (variable_declarator " "(identifier) (identifier)) (ERROR)))"); insert_text(strlen("var x = y *"), " z"); assert_root_node( - "(program (var_declaration (var_assignment " + "(program (variable_declaration (variable_declarator " "(identifier) (math_op (identifier) (identifier)))))"); }); });