From 97bcc86a3ac24fafb8c7f78483116e3fdb4336ba Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 19 Mar 2017 22:19:35 -0700 Subject: [PATCH 01/17] Use master version of all fixture grammars --- script/fetch-fixtures | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script/fetch-fixtures b/script/fetch-fixtures index a829962f..7709b219 100755 --- a/script/fetch-fixtures +++ b/script/fetch-fixtures @@ -21,8 +21,8 @@ fetch_grammar() { ) } -fetch_grammar 'javascript' '76cd7dd5eb793db21640c725e58301bde83781f7' +fetch_grammar 'javascript' 'origin/master' fetch_grammar 'json' 'origin/master' fetch_grammar 'c' 'origin/master' fetch_grammar 'cpp' 'origin/master' -fetch_grammar 'python' '179cb35e5b35baeef4a37f00732ff2de15e2e8bd' +fetch_grammar 'python' 'origin/master' From 20b8983749c68e19c001091340ba79138cb40d71 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 19 Mar 2017 22:20:16 -0700 Subject: [PATCH 02/17] Handle external scanner w/ .c extension in test helper --- test/helpers/load_language.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/helpers/load_language.cc b/test/helpers/load_language.cc index d5e70db0..d057e130 100644 --- a/test/helpers/load_language.cc +++ b/test/helpers/load_language.cc @@ -149,7 +149,10 @@ const TSLanguage *load_real_language(const string &language_name) { string parser_filename = language_dir + "/src/parser.c"; string external_scanner_filename = language_dir + "/src/scanner.cc"; if (!file_exists(external_scanner_filename)) { - external_scanner_filename = ""; + external_scanner_filename = language_dir + "/src/scanner.c"; + if (!file_exists(external_scanner_filename)) { + external_scanner_filename = ""; + } } int grammar_mtime = get_modified_time(grammar_filename); From ed31e82ee60ba05c699f6fd5077bb543a0f73fe8 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 19 Mar 2017 22:20:59 -0700 Subject: [PATCH 03/17] Skip empty tokens when recovering from errors --- src/runtime/parser.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index af65c7ea..5b6866d1 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -231,8 +231,14 @@ static Tree *parser__lex(Parser *self, StackVersion version) { ts_lexer_start(&self->lexer); if (self->language->external_scanner.scan(self->external_scanner_payload, &self->lexer.data, valid_external_tokens)) { - found_external_token = true; - break; + if (length_has_unknown_chars(self->lexer.token_end_position)) { + self->lexer.token_end_position = self->lexer.current_position; + } + if (lex_mode.lex_state != 0 || + self->lexer.token_end_position.bytes > current_position.bytes) { + found_external_token = true; + break; + } } ts_lexer_reset(&self->lexer, current_position); } @@ -241,6 +247,9 @@ static Tree *parser__lex(Parser *self, StackVersion version) { current_position.extent.row, current_position.extent.column); ts_lexer_start(&self->lexer); if (self->language->lex_fn(&self->lexer.data, lex_mode.lex_state)) { + if (length_has_unknown_chars(self->lexer.token_end_position)) { + self->lexer.token_end_position = self->lexer.current_position; + } break; } @@ -286,9 +295,6 @@ static Tree *parser__lex(Parser *self, StackVersion version) { symbol = self->language->external_scanner.symbol_map[symbol]; } - if (length_has_unknown_chars(self->lexer.token_end_position)) { - self->lexer.token_end_position = self->lexer.current_position; - } Length padding = length_sub(self->lexer.token_start_position, start_position); Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position); TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, symbol); From af553420bfb5d23ce8bdba263009983c9d2ff591 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 19 Mar 2017 22:21:12 -0700 Subject: [PATCH 04/17] Update JS error corpus --- .../error_corpus/javascript_errors.txt | 58 ++++++------------- 1 file changed, 17 insertions(+), 41 deletions(-) diff --git a/test/fixtures/error_corpus/javascript_errors.txt b/test/fixtures/error_corpus/javascript_errors.txt index 39f54f9a..5d6a7eb0 100644 --- a/test/fixtures/error_corpus/javascript_errors.txt +++ b/test/fixtures/error_corpus/javascript_errors.txt @@ -14,10 +14,8 @@ e f; (ERROR (identifier)) (identifier) (statement_block - (ERROR (identifier)) - (expression_statement (identifier)))) - (ERROR (identifier)) - (expression_statement (identifier))) + (expression_statement (ERROR (identifier)) (identifier)))) + (expression_statement (ERROR (identifier)) (identifier))) ======================================================= multiple invalid tokens right after the viable prefix @@ -35,10 +33,11 @@ h i j k; (ERROR (identifier) (identifier)) (identifier) (statement_block - (ERROR (identifier) (identifier) (identifier)) - (expression_statement (identifier)))) + (expression_statement + (ERROR (identifier) (jsx_attribute (identifier)) (jsx_attribute (identifier))) + (identifier)))) (expression_statement - (ERROR (identifier) (identifier) (identifier)) + (ERROR (identifier) (jsx_attribute (identifier)) (jsx_attribute (identifier))) (identifier))) =================================================== @@ -53,8 +52,8 @@ if ({a: 'b'} {c: 'd'}) { (program (if_statement - (object (pair (identifier) (string))) (ERROR (object (pair (identifier) (string)))) + (object (pair (identifier) (string))) (statement_block (expression_statement (assignment (identifier) @@ -75,9 +74,7 @@ a.b = (program (comment) - (trailing_expression_statement - (member_access (identifier) (identifier))) - (ERROR)) + (ERROR (member_access (identifier) (identifier)))) ================================================================= An invalid token at the end of a construct with extra line breaks @@ -97,27 +94,6 @@ a( (identifier) (ERROR))))) -=================================================== -Multi-line chained expressions in var declarations -=================================================== - -const one = two - .three(four) - .five() - ---- - -(program - (var_declaration (var_assignment - (identifier) - (function_call - (member_access - (function_call - (member_access (identifier) (identifier)) - (arguments (identifier))) - (identifier)) - (arguments))))) - =================================================== Errors after a sequence of function declarations =================================================== @@ -146,12 +122,12 @@ var x = !!! (program (comment) - (expression_statement (function (identifier) (formal_parameters) (statement_block))) - (expression_statement (function (identifier) (formal_parameters) (statement_block))) - (expression_statement (function (identifier) (formal_parameters) (statement_block))) - (expression_statement (function (identifier) (formal_parameters) (statement_block))) - (expression_statement (function (identifier) (formal_parameters) (statement_block))) - (expression_statement (function (identifier) (formal_parameters) (statement_block))) - (expression_statement (function (identifier) (formal_parameters) (statement_block))) - (expression_statement (function (identifier) (formal_parameters) (statement_block))) - (trailing_var_declaration (identifier)) (ERROR)) + (function (identifier) (formal_parameters) (statement_block)) + (function (identifier) (formal_parameters) (statement_block)) + (function (identifier) (formal_parameters) (statement_block)) + (function (identifier) (formal_parameters) (statement_block)) + (function (identifier) (formal_parameters) (statement_block)) + (function (identifier) (formal_parameters) (statement_block)) + (function (identifier) (formal_parameters) (statement_block)) + (function (identifier) (formal_parameters) (statement_block)) + (ERROR (identifier))) From f394a48c0b87fb05988480d1c526486651492949 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Mon, 20 Mar 2017 16:54:19 -0700 Subject: [PATCH 05/17] utf8proc_iterate can set codepoint_ref to -1 and returns negative error --- src/runtime/lexer.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index 123a29fd..5646e101 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -36,11 +36,17 @@ static void ts_lexer__get_lookahead(Lexer *self) { const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk; uint32_t size = self->chunk_size - position_in_chunk + 1; - if (self->input.encoding == TSInputEncodingUTF8) - self->lookahead_size = - utf8proc_iterate(chunk, size, &self->data.lookahead); - else + if (self->input.encoding == TSInputEncodingUTF8) { + int64_t lookahead_size = utf8proc_iterate(chunk, size, &self->data.lookahead); + if (lookahead_size < 0) { + self->lookahead_size = 1; + } else { + self->lookahead_size = lookahead_size; + } + } + else { self->lookahead_size = utf16_iterate(chunk, size, &self->data.lookahead); + } } static void ts_lexer__advance(void *payload, bool skip) { From 7092d4522a8d8928d5540c1d33f1d7bcbc036a04 Mon Sep 17 00:00:00 2001 From: Timothy Clem Date: Tue, 21 Mar 2017 09:58:35 -0700 Subject: [PATCH 06/17] Test demonstrating non-UT8 input failure --- test/runtime/parser_test.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc index 7dfcf26b..7e409d45 100644 --- a/test/runtime/parser_test.cc +++ b/test/runtime/parser_test.cc @@ -473,6 +473,16 @@ describe("Parser", [&]() { AssertThat(ts_node_end_char(root), Equals(strlen("'OOO - DD';"))); AssertThat(ts_node_end_byte(root), Equals(strlen("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';"))); }); + + it("handles non-UTF8 characters", [&]() { + // ts_document_set_logger(document, stderr_logger_new(true)); + ts_document_print_debugging_graphs(document, true); + ts_document_set_language(document, load_real_language("javascript")); + ts_document_set_input_string(document, "cons\xeb\x00e=ls\x83l6hi');\x0a"); + ts_document_parse(document); + + AssertThat(ts_node_end_byte(root), Equals(strlen("cons\xeb\x00e=ls\x83l6hi');\x0a"))); + }); }); }); From f032da198e127c8da97bcfbb9b360b3f65db83b4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 21 Mar 2017 11:05:32 -0700 Subject: [PATCH 07/17] Finish test for invalid UTF8 handling Signed-off-by: Tim Clem --- test/runtime/parser_test.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc index 7e409d45..e390b164 100644 --- a/test/runtime/parser_test.cc +++ b/test/runtime/parser_test.cc @@ -475,13 +475,14 @@ describe("Parser", [&]() { }); it("handles non-UTF8 characters", [&]() { - // ts_document_set_logger(document, stderr_logger_new(true)); - ts_document_print_debugging_graphs(document, true); + const char *string = "cons\xeb\x00e=ls\x83l6hi');\x0a"; + ts_document_set_language(document, load_real_language("javascript")); - ts_document_set_input_string(document, "cons\xeb\x00e=ls\x83l6hi');\x0a"); + ts_document_set_input_string(document, string); ts_document_parse(document); - AssertThat(ts_node_end_byte(root), Equals(strlen("cons\xeb\x00e=ls\x83l6hi');\x0a"))); + TSNode root = ts_document_root_node(document); + AssertThat(ts_node_end_byte(root), Equals(strlen(string))); }); }); }); From 7e13eac296de2e3c0104b32b56a199f09eead4af Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 21 Mar 2017 11:05:48 -0700 Subject: [PATCH 08/17] Fix lookahead_char type in ts_tree_make_error function --- src/runtime/tree.c | 2 +- src/runtime/tree.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 49f81e9c..195b6260 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -99,7 +99,7 @@ TreeArray ts_tree_array_remove_trailing_extras(TreeArray *self) { return result; } -Tree *ts_tree_make_error(Length size, Length padding, char lookahead_char) { +Tree *ts_tree_make_error(Length size, Length padding, int32_t lookahead_char) { Tree *result = ts_tree_make_leaf(ts_builtin_sym_error, padding, size, (TSSymbolMetadata){ .visible = true, .named = true, diff --git a/src/runtime/tree.h b/src/runtime/tree.h index c08ba24b..f205af97 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -76,7 +76,7 @@ Tree *ts_tree_make_leaf(TSSymbol, Length, Length, TSSymbolMetadata); Tree *ts_tree_make_node(TSSymbol, uint32_t, Tree **, TSSymbolMetadata); Tree *ts_tree_make_copy(Tree *child); Tree *ts_tree_make_error_node(TreeArray *); -Tree *ts_tree_make_error(Length, Length, char); +Tree *ts_tree_make_error(Length, Length, int32_t); void ts_tree_retain(Tree *tree); void ts_tree_release(Tree *tree); bool ts_tree_eq(const Tree *tree1, const Tree *tree2); From 7e0ae4505a4a286bf85c85ade559973e38dac8fe Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 21 Mar 2017 11:12:08 -0700 Subject: [PATCH 09/17] Handle invalid UTF8 in encoding test helpers Signed-off-by: Tim Clem --- test/helpers/encoding_helpers.cc | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/test/helpers/encoding_helpers.cc b/test/helpers/encoding_helpers.cc index 8ef9fec1..1169bb2d 100644 --- a/test/helpers/encoding_helpers.cc +++ b/test/helpers/encoding_helpers.cc @@ -4,10 +4,16 @@ #include "utf8proc.h" static inline int string_iterate(TSInputEncoding encoding, const uint8_t *string, size_t length, int32_t *code_point) { - if (encoding == TSInputEncodingUTF8) - return utf8proc_iterate(string, length, code_point); - else + if (encoding == TSInputEncodingUTF8) { + int32_t character_size = utf8proc_iterate(string, length, code_point); + if (character_size < 0) { + return 1; + } else { + return character_size; + } + } else { return utf16_iterate(string, length, code_point); + } } size_t string_char_count(TSInputEncoding encoding, const std::string &input) { From ca943f09a45acc85a1c4804e8b9863032b442662 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 21 Mar 2017 11:41:01 -0700 Subject: [PATCH 10/17] Update expected trees in error recovery test --- test/runtime/parser_test.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc index e390b164..0a075aba 100644 --- a/test/runtime/parser_test.cc +++ b/test/runtime/parser_test.cc @@ -289,19 +289,19 @@ describe("Parser", [&]() { set_text("var x = y;"); assert_root_node( - "(program (var_declaration (var_assignment " + "(program (variable_declaration (variable_declarator " "(identifier) (identifier))))"); insert_text(strlen("var x = y"), " *"); assert_root_node( - "(program (var_declaration (var_assignment " + "(program (variable_declaration (variable_declarator " "(identifier) (identifier)) (ERROR)))"); insert_text(strlen("var x = y *"), " z"); assert_root_node( - "(program (var_declaration (var_assignment " + "(program (variable_declaration (variable_declarator " "(identifier) (math_op (identifier) (identifier)))))"); }); }); From a15e9741504d8c8cae3e2b73e3e07287e72e4362 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 21 Mar 2017 12:14:04 -0700 Subject: [PATCH 11/17] Make clearer assertions about SpyInput's read strings --- test/helpers/spy_input.cc | 30 +++++++++++++++++++++++++----- test/helpers/spy_input.h | 3 ++- test/runtime/document_test.cc | 4 ++-- test/runtime/parser_test.cc | 4 ++-- 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/test/helpers/spy_input.cc b/test/helpers/spy_input.cc index 6c479c16..bdcb2709 100644 --- a/test/helpers/spy_input.cc +++ b/test/helpers/spy_input.cc @@ -6,6 +6,7 @@ using std::pair; using std::string; +using std::vector; static const size_t UTF8_MAX_CHAR_SIZE = 4; @@ -16,12 +17,25 @@ SpyInput::SpyInput(string content, size_t chars_per_chunk) : byte_offset(0), content(content), encoding(TSInputEncodingUTF8), - strings_read({""}) {} + ranges_read({}) {} SpyInput::~SpyInput() { delete[] buffer; } +static void add_byte_range(vector> *ranges, + uint32_t start, uint32_t count) { + uint32_t end = start + count; + for (auto &range : *ranges) { + if (range.first <= start && start <= range.second) { + if (start < range.first) range.first = start; + if (end > range.second) range.second = end; + return; + } + } + ranges->push_back({start, end}); +} + const char * SpyInput::read(void *payload, uint32_t *bytes_read) { auto spy = static_cast(payload); @@ -36,7 +50,7 @@ const char * SpyInput::read(void *payload, uint32_t *bytes_read) { string result = spy->content.substr(spy->byte_offset, byte_count); *bytes_read = byte_count; - spy->strings_read.back() += result; + add_byte_range(&spy->ranges_read, spy->byte_offset, byte_count); spy->byte_offset += byte_count; /* @@ -54,12 +68,18 @@ const char * SpyInput::read(void *payload, uint32_t *bytes_read) { int SpyInput::seek(void *payload, uint32_t character, uint32_t byte) { auto spy = static_cast(payload); - if (spy->strings_read.size() == 0 || spy->strings_read.back().size() > 0) - spy->strings_read.push_back(""); spy->byte_offset = byte; return 0; } +vector SpyInput::strings_read() const { + vector result; + for (auto &range : ranges_read) { + result.push_back(content.substr(range.first, range.second - range.first)); + } + return result; +} + TSInput SpyInput::input() { TSInput result; result.payload = this; @@ -129,5 +149,5 @@ pair SpyInput::swap_substr(size_t start_byte, size_t bytes_remo } void SpyInput::clear() { - strings_read.clear(); + ranges_read.clear(); } diff --git a/test/helpers/spy_input.h b/test/helpers/spy_input.h index a81213eb..9e0ee8d1 100644 --- a/test/helpers/spy_input.h +++ b/test/helpers/spy_input.h @@ -30,10 +30,11 @@ class SpyInput { void clear(); TSInputEdit replace(size_t start_char, size_t chars_removed, std::string text); TSInputEdit undo(); + std::vector strings_read() const; std::string content; TSInputEncoding encoding; - std::vector strings_read; + std::vector> ranges_read; }; #endif // HELPERS_SPY_INPUT_H_ diff --git a/test/runtime/document_test.cc b/test/runtime/document_test.cc index 71d7b8c7..d732fbc8 100644 --- a/test/runtime/document_test.cc +++ b/test/runtime/document_test.cc @@ -94,7 +94,7 @@ describe("Document", [&]() { ts_document_set_input(document, spy_input->input()); AssertThat(ts_document_root_node(document), Equals(root)); AssertThat(ts_node_has_changes(root), IsFalse()); - AssertThat(spy_input->strings_read, Equals(vector({ "" }))); + AssertThat(spy_input->strings_read(), IsEmpty()); }); it("reads text from the new input for future parses", [&]() { @@ -113,7 +113,7 @@ describe("Document", [&]() { assert_node_string_equals( new_root, "(object (pair (string) (array (null) (number))))"); - AssertThat(spy_input->strings_read, Equals(vector({" [null, 2" }))); + AssertThat(spy_input->strings_read(), Equals(vector({" [null, 2" }))); }); it("allows setting input string with length", [&]() { diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc index 0a075aba..d9aee54a 100644 --- a/test/runtime/parser_test.cc +++ b/test/runtime/parser_test.cc @@ -254,7 +254,7 @@ describe("Parser", [&]() { "(identifier) " "(math_op (number) (member_access (identifier) (identifier))))))"); - AssertThat(input->strings_read, Equals(vector({ " + abc.d)" }))); + AssertThat(input->strings_read(), Equals(vector({ " abc.d);" }))); }); }); @@ -279,7 +279,7 @@ describe("Parser", [&]() { "(number) " "(math_op (number) (math_op (number) (identifier)))))))"); - AssertThat(input->strings_read, Equals(vector({ "123 || 5 +" }))); + AssertThat(input->strings_read(), Equals(vector({"123 || 5 ", ";"}))); }); }); From 1f908324dc60417904369cf7f690a2b30d488fdc Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 21 Mar 2017 12:14:44 -0700 Subject: [PATCH 12/17] Prevent infinite loop in skip_preceding_trees error recovery strategy --- src/runtime/parser.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 5b6866d1..22c1639d 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -904,6 +904,11 @@ static StackIterateAction parser__skip_preceding_trees_callback( void *payload, TSStateId state, TreeArray *trees, uint32_t tree_count, bool is_done, bool is_pending) { if (tree_count > 0 && state != ERROR_STATE) { + uint32_t bytes_skipped = 0; + for (uint32_t i = 0; i < trees->size; i++) { + bytes_skipped += ts_tree_total_bytes(trees->contents[i]); + } + if (bytes_skipped == 0) return StackIterateNone; SkipPrecedingTreesSession *session = payload; Parser *self = session->parser; TSSymbol lookahead_symbol = session->lookahead_symbol; From c66fddd3aa7d7e8d1dd0286c0f77d467081d2636 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 15 Jun 2017 16:35:34 -0700 Subject: [PATCH 13/17] Add TSInput option to measure columns in bytes not characters --- include/tree_sitter/runtime.h | 1 + src/runtime/document.c | 9 +++++++-- src/runtime/lexer.c | 2 ++ src/runtime/string_input.c | 3 ++- test/helpers/spy_input.cc | 1 + test/runtime/document_test.cc | 15 +++++++++++++-- 6 files changed, 26 insertions(+), 5 deletions(-) diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index 95da0787..638bc5bd 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -31,6 +31,7 @@ typedef struct { const char *(*read)(void *payload, uint32_t *bytes_read); int (*seek)(void *payload, uint32_t character_index, uint32_t byte_index); TSInputEncoding encoding; + bool measure_columns_in_bytes; } TSInput; typedef enum { diff --git a/src/runtime/document.c b/src/runtime/document.c index 6bcc5fbc..64677cb4 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -26,8 +26,13 @@ void ts_document_free(TSDocument *self) { parser_destroy(&self->parser); if (self->tree) ts_tree_release(self->tree); - ts_document_set_input(self, - (TSInput){ NULL, NULL, NULL, TSInputEncodingUTF8 }); + ts_document_set_input(self, (TSInput){ + NULL, + NULL, + NULL, + TSInputEncodingUTF8, + false + }); ts_free(self); } diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index 7e0ef51f..21ce2b96 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -60,6 +60,8 @@ static void ts_lexer__advance(void *payload, bool skip) { if (self->data.lookahead == '\n') { self->current_position.extent.row++; self->current_position.extent.column = 0; + } else if (self->input.measure_columns_in_bytes) { + self->current_position.extent.column += self->lookahead_size; } else { self->current_position.extent.column++; } diff --git a/src/runtime/string_input.c b/src/runtime/string_input.c index 3b951a10..6cbf5b2c 100644 --- a/src/runtime/string_input.c +++ b/src/runtime/string_input.c @@ -43,8 +43,9 @@ TSInput ts_string_input_make_with_length(const char *string, uint32_t length) { .read = ts_string_input_read, .seek = ts_string_input_seek, .encoding = TSInputEncodingUTF8, + .measure_columns_in_bytes = false, }; error: - return (TSInput){ NULL, NULL, NULL, TSInputEncodingUTF8 }; + return (TSInput){ NULL, NULL, NULL, TSInputEncodingUTF8, false }; } diff --git a/test/helpers/spy_input.cc b/test/helpers/spy_input.cc index bdcb2709..9edaf554 100644 --- a/test/helpers/spy_input.cc +++ b/test/helpers/spy_input.cc @@ -86,6 +86,7 @@ TSInput SpyInput::input() { result.encoding = encoding; result.seek = seek; result.read = read; + result.measure_columns_in_bytes = true; return result; } diff --git a/test/runtime/document_test.cc b/test/runtime/document_test.cc index df71ea02..6c321d75 100644 --- a/test/runtime/document_test.cc +++ b/test/runtime/document_test.cc @@ -76,11 +76,22 @@ describe("Document", [&]() { const char16_t content[] = u"[true, false]"; spy_input->content = string((const char *)content, sizeof(content)); spy_input->encoding = TSInputEncodingUTF16; - // spy_input->measure_columns_in_bytes + TSInput input = spy_input->input(); - ts_document_set_input(document, spy_input->input()); + input.measure_columns_in_bytes = false; + ts_document_set_input(document, input); ts_document_invalidate(document); ts_document_parse(document); + + TSNode root = ts_document_root_node(document); + AssertThat(ts_node_end_point(root), Equals({0, 13})); + + input.measure_columns_in_bytes = true; + ts_document_set_input(document, input); + ts_document_invalidate(document); + ts_document_parse(document); + root = ts_document_root_node(document); + AssertThat(ts_node_end_point(root), Equals({0, 26})); }); it("allows the input to be retrieved later", [&]() { From 599367d36d8ac345379049cfd84fcf415f5f51e4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 15 Jun 2017 17:06:48 -0700 Subject: [PATCH 14/17] Always recur into error nodes when reporting changed ranges --- src/runtime/tree_path.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/runtime/tree_path.h b/src/runtime/tree_path.h index f64dd02f..b8f3aa8b 100644 --- a/src/runtime/tree_path.h +++ b/src/runtime/tree_path.h @@ -126,6 +126,7 @@ static bool tree_must_eq(Tree *old_tree, Tree *new_tree) { return old_tree == new_tree || ( !old_tree->has_changes && old_tree->symbol == new_tree->symbol && + old_tree->symbol != ts_builtin_sym_error && old_tree->size.bytes == new_tree->size.bytes && old_tree->parse_state != TS_TREE_STATE_NONE && new_tree->parse_state != TS_TREE_STATE_NONE && From fa81a764fbca647c47982df7b8f73b9ab8b61c3d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 15 Jun 2017 17:12:14 -0700 Subject: [PATCH 15/17] Cache test grammar directory on Travis --- .travis.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.travis.yml b/.travis.yml index 5a8e2be9..b37fa80d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,3 +16,7 @@ install: script: - script/ci + +cache: + directories: + - test/fixtures/grammars From 932feb2498f7e591516b03b96e7b1c3659105086 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 15 Jun 2017 17:13:01 -0700 Subject: [PATCH 16/17] Fix gcc warning in test helper file --- test/helpers/stream_methods.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/helpers/stream_methods.cc b/test/helpers/stream_methods.cc index 91f9e87f..23a03d21 100644 --- a/test/helpers/stream_methods.cc +++ b/test/helpers/stream_methods.cc @@ -30,7 +30,7 @@ ostream &operator<<(ostream &stream, Associativity associativity) { return stream << "AssociativityLeft"; case AssociativityRight: return stream << "AssociativityRight"; - case AssociativityNone: + default: return stream << "AssociativityNone"; } } From e19393eff592333dec6c5549448a6dca64cce576 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 15 Jun 2017 17:25:21 -0700 Subject: [PATCH 17/17] :art: --- test/integration/real_grammars.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/integration/real_grammars.cc b/test/integration/real_grammars.cc index d20f119b..d89c97fd 100644 --- a/test/integration/real_grammars.cc +++ b/test/integration/real_grammars.cc @@ -120,8 +120,8 @@ for (auto &language_name : test_languages) { ts_document_parse(document); }); - std::set> deletions; - std::set> insertions; + set> deletions; + set> insertions; for (size_t i = 0; i < 60; i++) { size_t edit_position = random() % utf8_char_count(entry.input);