From 87c992a7f0b65f9374a10111a9abc5e2c9a1dcab Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 17 Jul 2018 13:58:26 -0700 Subject: [PATCH 1/6] Add lexer API for detecting boundaries of included ranges Co-Authored-By: Ashi Krishnan --- include/tree_sitter/parser.h | 13 ++++++++----- include/tree_sitter/runtime.h | 2 +- src/runtime/lexer.c | 19 ++++++++++++++----- src/runtime/parser.c | 4 ++-- test/helpers/point_helpers.cc | 26 ++++++++++++++++++++++++++ test/helpers/point_helpers.h | 4 ++++ test/helpers/spy_input.cc | 22 +++++----------------- test/runtime/parser_test.cc | 23 +++++++++++++++++++++++ test/runtime/tree_test.cc | 19 +++++-------------- 9 files changed, 88 insertions(+), 44 deletions(-) diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index 1b6c2b13..a757eac0 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -25,13 +25,16 @@ typedef struct { bool named : 1; } TSSymbolMetadata; -typedef struct { - void (*advance)(void *, bool); - void (*mark_end)(void *); - uint32_t (*get_column)(void *); +typedef struct TSLexer TSLexer; + +struct TSLexer { int32_t lookahead; TSSymbol result_symbol; -} TSLexer; + void (*advance)(TSLexer *, bool); + void (*mark_end)(TSLexer *); + uint32_t (*get_column)(TSLexer *); + bool (*is_at_included_range_start)(TSLexer *); +}; typedef enum { TSParseActionTypeShift, diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index b2ee39a3..95bd0e42 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -10,7 +10,7 @@ extern "C" { #include #include -#define TREE_SITTER_LANGUAGE_VERSION 8 +#define TREE_SITTER_LANGUAGE_VERSION 9 typedef uint16_t TSSymbol; typedef struct TSLanguage TSLanguage; diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index fcd1fd3f..5ecd5a84 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -50,7 +50,7 @@ static void ts_lexer__get_lookahead(Lexer *self) { } } -static void ts_lexer__advance(void *payload, bool skip) { +static void ts_lexer__advance(TSLexer *payload, bool skip) { Lexer *self = (Lexer *)payload; if (self->chunk == empty_chunk) return; @@ -95,7 +95,7 @@ static void ts_lexer__advance(void *payload, bool skip) { ts_lexer__get_lookahead(self); } -static void ts_lexer__mark_end(void *payload) { +static void ts_lexer__mark_end(TSLexer *payload) { Lexer *self = (Lexer *)payload; TSRange *current_included_range = &self->included_ranges[self->current_included_range_index]; if (self->current_included_range_index > 0 && @@ -110,7 +110,7 @@ static void ts_lexer__mark_end(void *payload) { } } -static uint32_t ts_lexer__get_column(void *payload) { +static uint32_t ts_lexer__get_column(TSLexer *payload) { Lexer *self = (Lexer *)payload; uint32_t goal_byte = self->current_position.bytes; @@ -123,13 +123,19 @@ static uint32_t ts_lexer__get_column(void *payload) { uint32_t result = 0; while (self->current_position.bytes < goal_byte) { - ts_lexer__advance(self, false); + ts_lexer__advance(payload, false); result++; } return result; } +static bool ts_lexer__is_at_included_range_start(TSLexer *payload) { + const Lexer *self = (const Lexer *)payload; + TSRange *current_range = &self->included_ranges[self->current_included_range_index]; + return self->current_position.bytes == current_range->start_byte; +} + // The lexer's methods are stored as a struct field so that generated // parsers can call them without needing to be linked against this library. @@ -139,6 +145,7 @@ void ts_lexer_init(Lexer *self) { .advance = ts_lexer__advance, .mark_end = ts_lexer__mark_end, .get_column = ts_lexer__get_column, + .is_at_included_range_start = ts_lexer__is_at_included_range_start, .lookahead = 0, .result_symbol = 0, }, @@ -227,7 +234,9 @@ void ts_lexer_start(Lexer *self) { } void ts_lexer_advance_to_end(Lexer *self) { - while (self->data.lookahead != 0) ts_lexer__advance(self, false); + while (self->data.lookahead != 0) { + ts_lexer__advance((TSLexer *)self, false); + } } static const TSRange DEFAULT_RANGES[] = { diff --git a/src/runtime/parser.c b/src/runtime/parser.c index cda33c58..52e7c96b 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -327,7 +327,7 @@ static const Subtree *ts_parser__lex(TSParser *self, StackVersion version, TSSta valid_external_tokens )) { if (length_is_undefined(self->lexer.token_end_position)) { - self->lexer.token_end_position = self->lexer.current_position; + self->lexer.data.mark_end(&self->lexer.data); } if (!error_mode || self->lexer.token_end_position.bytes > current_position.bytes) { @@ -380,7 +380,7 @@ static const Subtree *ts_parser__lex(TSParser *self, StackVersion version, TSSta self->lexer.data.result_symbol = ts_builtin_sym_error; break; } - self->lexer.data.advance(&self->lexer, false); + self->lexer.data.advance(&self->lexer.data, false); } error_end_position = self->lexer.current_position; diff --git a/test/helpers/point_helpers.cc b/test/helpers/point_helpers.cc index fd6d8bb1..42152b50 100644 --- a/test/helpers/point_helpers.cc +++ b/test/helpers/point_helpers.cc @@ -45,3 +45,29 @@ std::ostream &operator<<(std::ostream &stream, const TSRange &range) { ostream &operator<<(ostream &stream, const Length &length) { return stream << "{bytes:" << length.bytes << ", extent:" << length.extent << "}"; } + +TSPoint extent_for_string(const string &text, size_t end_index) { + if (end_index > text.size()) end_index = text.size(); + TSPoint result = {0, 0}; + for (size_t i = 0; i < end_index; i++) { + if (text[i] == '\n') { + result.row++; + result.column = 0; + } else { + result.column++; + } + } + return result; +} + +TSRange range_for_substring(const string &text, const string &substring) { + size_t start = text.find(substring); + assert(start != string::npos); + size_t end = start + substring.size(); + return TSRange { + extent_for_string(text, start), + extent_for_string(text, end), + static_cast(start), + static_cast(end), + }; +}; diff --git a/test/helpers/point_helpers.h b/test/helpers/point_helpers.h index 3e954a7a..58558663 100644 --- a/test/helpers/point_helpers.h +++ b/test/helpers/point_helpers.h @@ -20,4 +20,8 @@ std::ostream &operator<<(std::ostream &stream, const TSRange &range); std::ostream &operator<<(std::ostream &stream, const Length &length); +TSPoint extent_for_string(const std::string &text, size_t end_index = std::string::npos); + +TSRange range_for_substring(const std::string &text, const std::string &substring); + #endif // HELPERS_POINT_HELPERS_H_ diff --git a/test/helpers/spy_input.cc b/test/helpers/spy_input.cc index 1b89e270..34c5d997 100644 --- a/test/helpers/spy_input.cc +++ b/test/helpers/spy_input.cc @@ -1,5 +1,6 @@ #include "helpers/spy_input.h" #include "helpers/encoding_helpers.h" +#include "helpers/point_helpers.h" #include "runtime/point.h" #include #include @@ -95,19 +96,6 @@ TSInput SpyInput::input() { return result; } -static TSPoint get_extent(string text) { - TSPoint result = {0, 0}; - for (auto i = text.begin(); i != text.end(); i++) { - if (*i == '\n') { - result.row++; - result.column = 0; - } else { - result.column++; - } - } - return result; -} - TSInputEdit SpyInput::replace(size_t start_byte, size_t bytes_removed, string text) { auto swap = swap_substr(start_byte, bytes_removed, text); size_t bytes_added = text.size(); @@ -117,8 +105,8 @@ TSInputEdit SpyInput::replace(size_t start_byte, size_t bytes_removed, string te result.old_end_byte = start_byte + bytes_removed; result.new_end_byte = start_byte + bytes_added; result.start_point = swap.second; - result.old_end_point = result.start_point + get_extent(swap.first); - result.new_end_point = result.start_point + get_extent(text); + result.old_end_point = result.start_point + extent_for_string(swap.first); + result.new_end_point = result.start_point + extent_for_string(text); return result; } @@ -131,8 +119,8 @@ TSInputEdit SpyInput::undo() { result.old_end_byte = entry.start_byte + entry.bytes_removed; result.new_end_byte = entry.start_byte + entry.text_inserted.size(); result.start_point = swap.second; - result.old_end_point = result.start_point + get_extent(swap.first); - result.new_end_point = result.start_point + get_extent(entry.text_inserted); + result.old_end_point = result.start_point + extent_for_string(swap.first); + result.new_end_point = result.start_point + extent_for_string(entry.text_inserted); return result; } diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc index 1367849e..5bd3fb2d 100644 --- a/test/runtime/parser_test.cc +++ b/test/runtime/parser_test.cc @@ -918,6 +918,29 @@ describe("Parser", [&]() { assert_root_node("(program (ERROR (identifier)))"); }); + + it("allows external scanners to detect the boundaries of included ranges", [&]() { + string source_code = "a <%= b() %> c <% d() %>"; + + TSRange included_ranges[] = { + range_for_substring(source_code, "b()"), + range_for_substring(source_code, "d()"), + }; + + ts_parser_set_included_ranges(parser, included_ranges, 2); + ts_parser_set_language(parser, load_real_language("javascript")); + tree = ts_parser_parse_string(parser, nullptr, source_code.c_str(), source_code.size()); + + assert_root_node("(program " + "(expression_statement (call_expression (identifier) (arguments))) " + "(expression_statement (call_expression (identifier) (arguments))))"); + + TSNode statement_node1 = ts_node_child(ts_tree_root_node(tree), 0); + TSNode statement_node2 = ts_node_child(ts_tree_root_node(tree), 1); + + AssertThat(ts_node_end_point(statement_node1), Equals(extent_for_string("a <%= b()"))); + AssertThat(ts_node_end_point(statement_node2), Equals(extent_for_string("a <%= b() %> c <% d()"))); + }); }); }); diff --git a/test/runtime/tree_test.cc b/test/runtime/tree_test.cc index d703cd60..0993944a 100644 --- a/test/runtime/tree_test.cc +++ b/test/runtime/tree_test.cc @@ -131,22 +131,13 @@ describe("Tree", [&]() { return result; }; - auto range_for_text = [&](string start_text, string end_text) { - return TSRange { - point(0, input->content.find(start_text)), - point(0, input->content.find(end_text)), - static_cast(input->content.find(start_text)), - static_cast(input->content.find(end_text)), - }; - }; - it("reports changes when one token has been updated", [&]() { // Replace `null` with `nothing` auto ranges = get_changed_ranges_for_edit([&]() { return input->replace(input->content.find("ull"), 1, "othing"); }); AssertThat(ranges, Equals(vector({ - range_for_text("nothing", "}"), + range_for_substring(input->content, "nothing"), }))); // Replace `nothing` with `null` again @@ -154,7 +145,7 @@ describe("Tree", [&]() { return input->undo(); }); AssertThat(ranges, Equals(vector({ - range_for_text("null", "}"), + range_for_substring(input->content, "null"), }))); }); @@ -195,7 +186,7 @@ describe("Tree", [&]() { return input->replace(input->content.find("}"), 0, ", b: false"); }); AssertThat(ranges, Equals(vector({ - range_for_text(",", "}"), + range_for_substring(input->content, ", b: false"), }))); // Add a third key-value pair in between the first two @@ -209,7 +200,7 @@ describe("Tree", [&]() { "(pair (property_identifier) (false)))))" ); AssertThat(ranges, Equals(vector({ - range_for_text(", c", ", b"), + range_for_substring(input->content, ", c: 1, b: false"), }))); // Delete the middle pair. @@ -244,7 +235,7 @@ describe("Tree", [&]() { "(pair (property_identifier) (binary_expression (identifier) (null))))))" ); AssertThat(ranges, Equals(vector({ - range_for_text("b ===", "}"), + range_for_substring(input->content, "b === null"), }))); }); }); From d8a420cad1c05a3d403c8045871232a2d9e9601a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 17 Jul 2018 14:04:37 -0700 Subject: [PATCH 2/6] Use included range branch of javascript parser for testing --- script/fetch-fixtures | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/fetch-fixtures b/script/fetch-fixtures index 89b9c5fa..fae41b4e 100755 --- a/script/fetch-fixtures +++ b/script/fetch-fixtures @@ -21,7 +21,7 @@ fetch_grammar() { ) } -fetch_grammar javascript master +fetch_grammar javascript included-range-boundaries fetch_grammar json master fetch_grammar c master fetch_grammar cpp master From 8b772df3fff2707adb3e34093b5585f569a08ed5 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 17 Jul 2018 14:13:08 -0700 Subject: [PATCH 3/6] Add missing #include in point_helpers Co-Authored-By: Ashi Krishnan --- test/helpers/point_helpers.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/test/helpers/point_helpers.cc b/test/helpers/point_helpers.cc index 42152b50..eabd0a82 100644 --- a/test/helpers/point_helpers.cc +++ b/test/helpers/point_helpers.cc @@ -1,6 +1,7 @@ #include "./point_helpers.h" #include #include +#include #include "runtime/length.h" #include "tree_sitter/runtime.h" From 999ef4fd127a9631c70fa1b14bd4a9e051848349 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 17 Jul 2018 15:18:46 -0700 Subject: [PATCH 4/6] Fix get_changed_ranges tests Co-Authored-By: Ashi Krishnan --- test/runtime/tree_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/runtime/tree_test.cc b/test/runtime/tree_test.cc index 0993944a..c60f2af6 100644 --- a/test/runtime/tree_test.cc +++ b/test/runtime/tree_test.cc @@ -134,7 +134,7 @@ describe("Tree", [&]() { it("reports changes when one token has been updated", [&]() { // Replace `null` with `nothing` auto ranges = get_changed_ranges_for_edit([&]() { - return input->replace(input->content.find("ull"), 1, "othing"); + return input->replace(input->content.find("ull"), 3, "othing"); }); AssertThat(ranges, Equals(vector({ range_for_substring(input->content, "nothing"), @@ -200,7 +200,7 @@ describe("Tree", [&]() { "(pair (property_identifier) (false)))))" ); AssertThat(ranges, Equals(vector({ - range_for_substring(input->content, ", c: 1, b: false"), + range_for_substring(input->content, ", c: 1"), }))); // Delete the middle pair. From 483881ec6a063065f687818e555c1a1dc78eba6c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 17 Jul 2018 15:26:29 -0700 Subject: [PATCH 5/6] Use included range branch of javascript parser for testing on windows CI Co-Authored-By: Ashi Krishnan --- script/fetch-fixtures.cmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/fetch-fixtures.cmd b/script/fetch-fixtures.cmd index 187aba6d..86c65e81 100644 --- a/script/fetch-fixtures.cmd +++ b/script/fetch-fixtures.cmd @@ -1,6 +1,6 @@ @echo off -call:fetch_grammar javascript master +call:fetch_grammar javascript included-range-boundaries call:fetch_grammar json master call:fetch_grammar c master call:fetch_grammar cpp master From 9ecb20650bb8106d15fe85a92cb982d8aeca84da Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 17 Jul 2018 15:46:14 -0700 Subject: [PATCH 6/6] Fix logic for checking out branches in fixture repos --- script/fetch-fixtures | 4 ++-- script/fetch-fixtures.cmd | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/script/fetch-fixtures b/script/fetch-fixtures index fae41b4e..c8306185 100755 --- a/script/fetch-fixtures +++ b/script/fetch-fixtures @@ -15,9 +15,9 @@ fetch_grammar() { fi ( - cd $grammar_dir; + cd $grammar_dir git fetch origin $ref --depth=1 - git reset --hard origin/$ref; + git reset --hard FETCH_HEAD ) } diff --git a/script/fetch-fixtures.cmd b/script/fetch-fixtures.cmd index 86c65e81..73e0a535 100644 --- a/script/fetch-fixtures.cmd +++ b/script/fetch-fixtures.cmd @@ -22,6 +22,6 @@ SET grammar_branch=%~2 ) pushd %grammar_dir% git fetch origin %2 --depth=1 -git reset --hard origin/%grammar_branch% +git reset --hard FETCH_HEAD popd EXIT /B 0