diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index 1b6c2b13..a757eac0 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -25,13 +25,16 @@ typedef struct { bool named : 1; } TSSymbolMetadata; -typedef struct { - void (*advance)(void *, bool); - void (*mark_end)(void *); - uint32_t (*get_column)(void *); +typedef struct TSLexer TSLexer; + +struct TSLexer { int32_t lookahead; TSSymbol result_symbol; -} TSLexer; + void (*advance)(TSLexer *, bool); + void (*mark_end)(TSLexer *); + uint32_t (*get_column)(TSLexer *); + bool (*is_at_included_range_start)(TSLexer *); +}; typedef enum { TSParseActionTypeShift, diff --git a/include/tree_sitter/runtime.h b/include/tree_sitter/runtime.h index b2ee39a3..95bd0e42 100644 --- a/include/tree_sitter/runtime.h +++ b/include/tree_sitter/runtime.h @@ -10,7 +10,7 @@ extern "C" { #include #include -#define TREE_SITTER_LANGUAGE_VERSION 8 +#define TREE_SITTER_LANGUAGE_VERSION 9 typedef uint16_t TSSymbol; typedef struct TSLanguage TSLanguage; diff --git a/script/fetch-fixtures b/script/fetch-fixtures index 89b9c5fa..c8306185 100755 --- a/script/fetch-fixtures +++ b/script/fetch-fixtures @@ -15,13 +15,13 @@ fetch_grammar() { fi ( - cd $grammar_dir; + cd $grammar_dir git fetch origin $ref --depth=1 - git reset --hard origin/$ref; + git reset --hard FETCH_HEAD ) } -fetch_grammar javascript master +fetch_grammar javascript included-range-boundaries fetch_grammar json master fetch_grammar c master fetch_grammar cpp master diff --git a/script/fetch-fixtures.cmd b/script/fetch-fixtures.cmd index 187aba6d..73e0a535 100644 --- a/script/fetch-fixtures.cmd +++ b/script/fetch-fixtures.cmd @@ -1,6 +1,6 @@ @echo off -call:fetch_grammar javascript master +call:fetch_grammar javascript included-range-boundaries call:fetch_grammar json master call:fetch_grammar c master call:fetch_grammar cpp master @@ -22,6 +22,6 @@ SET grammar_branch=%~2 ) pushd %grammar_dir% git fetch origin %2 --depth=1 -git reset --hard origin/%grammar_branch% +git reset --hard FETCH_HEAD popd EXIT /B 0 diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index fcd1fd3f..5ecd5a84 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -50,7 +50,7 @@ static void ts_lexer__get_lookahead(Lexer *self) { } } -static void ts_lexer__advance(void *payload, bool skip) { +static void ts_lexer__advance(TSLexer *payload, bool skip) { Lexer *self = (Lexer *)payload; if (self->chunk == empty_chunk) return; @@ -95,7 +95,7 @@ static void ts_lexer__advance(void *payload, bool skip) { ts_lexer__get_lookahead(self); } -static void ts_lexer__mark_end(void *payload) { +static void ts_lexer__mark_end(TSLexer *payload) { Lexer *self = (Lexer *)payload; TSRange *current_included_range = &self->included_ranges[self->current_included_range_index]; if (self->current_included_range_index > 0 && @@ -110,7 +110,7 @@ static void ts_lexer__mark_end(void *payload) { } } -static uint32_t ts_lexer__get_column(void *payload) { +static uint32_t ts_lexer__get_column(TSLexer *payload) { Lexer *self = (Lexer *)payload; uint32_t goal_byte = self->current_position.bytes; @@ -123,13 +123,19 @@ static uint32_t ts_lexer__get_column(void *payload) { uint32_t result = 0; while (self->current_position.bytes < goal_byte) { - ts_lexer__advance(self, false); + ts_lexer__advance(payload, false); result++; } return result; } +static bool ts_lexer__is_at_included_range_start(TSLexer *payload) { + const Lexer *self = (const Lexer *)payload; + TSRange *current_range = &self->included_ranges[self->current_included_range_index]; + return self->current_position.bytes == current_range->start_byte; +} + // The lexer's methods are stored as a struct field so that generated // parsers can call them without needing to be linked against this library. @@ -139,6 +145,7 @@ void ts_lexer_init(Lexer *self) { .advance = ts_lexer__advance, .mark_end = ts_lexer__mark_end, .get_column = ts_lexer__get_column, + .is_at_included_range_start = ts_lexer__is_at_included_range_start, .lookahead = 0, .result_symbol = 0, }, @@ -227,7 +234,9 @@ void ts_lexer_start(Lexer *self) { } void ts_lexer_advance_to_end(Lexer *self) { - while (self->data.lookahead != 0) ts_lexer__advance(self, false); + while (self->data.lookahead != 0) { + ts_lexer__advance((TSLexer *)self, false); + } } static const TSRange DEFAULT_RANGES[] = { diff --git a/src/runtime/parser.c b/src/runtime/parser.c index cda33c58..52e7c96b 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -327,7 +327,7 @@ static const Subtree *ts_parser__lex(TSParser *self, StackVersion version, TSSta valid_external_tokens )) { if (length_is_undefined(self->lexer.token_end_position)) { - self->lexer.token_end_position = self->lexer.current_position; + self->lexer.data.mark_end(&self->lexer.data); } if (!error_mode || self->lexer.token_end_position.bytes > current_position.bytes) { @@ -380,7 +380,7 @@ static const Subtree *ts_parser__lex(TSParser *self, StackVersion version, TSSta self->lexer.data.result_symbol = ts_builtin_sym_error; break; } - self->lexer.data.advance(&self->lexer, false); + self->lexer.data.advance(&self->lexer.data, false); } error_end_position = self->lexer.current_position; diff --git a/test/helpers/point_helpers.cc b/test/helpers/point_helpers.cc index fd6d8bb1..eabd0a82 100644 --- a/test/helpers/point_helpers.cc +++ b/test/helpers/point_helpers.cc @@ -1,6 +1,7 @@ #include "./point_helpers.h" #include #include +#include #include "runtime/length.h" #include "tree_sitter/runtime.h" @@ -45,3 +46,29 @@ std::ostream &operator<<(std::ostream &stream, const TSRange &range) { ostream &operator<<(ostream &stream, const Length &length) { return stream << "{bytes:" << length.bytes << ", extent:" << length.extent << "}"; } + +TSPoint extent_for_string(const string &text, size_t end_index) { + if (end_index > text.size()) end_index = text.size(); + TSPoint result = {0, 0}; + for (size_t i = 0; i < end_index; i++) { + if (text[i] == '\n') { + result.row++; + result.column = 0; + } else { + result.column++; + } + } + return result; +} + +TSRange range_for_substring(const string &text, const string &substring) { + size_t start = text.find(substring); + assert(start != string::npos); + size_t end = start + substring.size(); + return TSRange { + extent_for_string(text, start), + extent_for_string(text, end), + static_cast(start), + static_cast(end), + }; +}; diff --git a/test/helpers/point_helpers.h b/test/helpers/point_helpers.h index 3e954a7a..58558663 100644 --- a/test/helpers/point_helpers.h +++ b/test/helpers/point_helpers.h @@ -20,4 +20,8 @@ std::ostream &operator<<(std::ostream &stream, const TSRange &range); std::ostream &operator<<(std::ostream &stream, const Length &length); +TSPoint extent_for_string(const std::string &text, size_t end_index = std::string::npos); + +TSRange range_for_substring(const std::string &text, const std::string &substring); + #endif // HELPERS_POINT_HELPERS_H_ diff --git a/test/helpers/spy_input.cc b/test/helpers/spy_input.cc index 1b89e270..34c5d997 100644 --- a/test/helpers/spy_input.cc +++ b/test/helpers/spy_input.cc @@ -1,5 +1,6 @@ #include "helpers/spy_input.h" #include "helpers/encoding_helpers.h" +#include "helpers/point_helpers.h" #include "runtime/point.h" #include #include @@ -95,19 +96,6 @@ TSInput SpyInput::input() { return result; } -static TSPoint get_extent(string text) { - TSPoint result = {0, 0}; - for (auto i = text.begin(); i != text.end(); i++) { - if (*i == '\n') { - result.row++; - result.column = 0; - } else { - result.column++; - } - } - return result; -} - TSInputEdit SpyInput::replace(size_t start_byte, size_t bytes_removed, string text) { auto swap = swap_substr(start_byte, bytes_removed, text); size_t bytes_added = text.size(); @@ -117,8 +105,8 @@ TSInputEdit SpyInput::replace(size_t start_byte, size_t bytes_removed, string te result.old_end_byte = start_byte + bytes_removed; result.new_end_byte = start_byte + bytes_added; result.start_point = swap.second; - result.old_end_point = result.start_point + get_extent(swap.first); - result.new_end_point = result.start_point + get_extent(text); + result.old_end_point = result.start_point + extent_for_string(swap.first); + result.new_end_point = result.start_point + extent_for_string(text); return result; } @@ -131,8 +119,8 @@ TSInputEdit SpyInput::undo() { result.old_end_byte = entry.start_byte + entry.bytes_removed; result.new_end_byte = entry.start_byte + entry.text_inserted.size(); result.start_point = swap.second; - result.old_end_point = result.start_point + get_extent(swap.first); - result.new_end_point = result.start_point + get_extent(entry.text_inserted); + result.old_end_point = result.start_point + extent_for_string(swap.first); + result.new_end_point = result.start_point + extent_for_string(entry.text_inserted); return result; } diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc index 1367849e..5bd3fb2d 100644 --- a/test/runtime/parser_test.cc +++ b/test/runtime/parser_test.cc @@ -918,6 +918,29 @@ describe("Parser", [&]() { assert_root_node("(program (ERROR (identifier)))"); }); + + it("allows external scanners to detect the boundaries of included ranges", [&]() { + string source_code = "a <%= b() %> c <% d() %>"; + + TSRange included_ranges[] = { + range_for_substring(source_code, "b()"), + range_for_substring(source_code, "d()"), + }; + + ts_parser_set_included_ranges(parser, included_ranges, 2); + ts_parser_set_language(parser, load_real_language("javascript")); + tree = ts_parser_parse_string(parser, nullptr, source_code.c_str(), source_code.size()); + + assert_root_node("(program " + "(expression_statement (call_expression (identifier) (arguments))) " + "(expression_statement (call_expression (identifier) (arguments))))"); + + TSNode statement_node1 = ts_node_child(ts_tree_root_node(tree), 0); + TSNode statement_node2 = ts_node_child(ts_tree_root_node(tree), 1); + + AssertThat(ts_node_end_point(statement_node1), Equals(extent_for_string("a <%= b()"))); + AssertThat(ts_node_end_point(statement_node2), Equals(extent_for_string("a <%= b() %> c <% d()"))); + }); }); }); diff --git a/test/runtime/tree_test.cc b/test/runtime/tree_test.cc index d703cd60..c60f2af6 100644 --- a/test/runtime/tree_test.cc +++ b/test/runtime/tree_test.cc @@ -131,22 +131,13 @@ describe("Tree", [&]() { return result; }; - auto range_for_text = [&](string start_text, string end_text) { - return TSRange { - point(0, input->content.find(start_text)), - point(0, input->content.find(end_text)), - static_cast(input->content.find(start_text)), - static_cast(input->content.find(end_text)), - }; - }; - it("reports changes when one token has been updated", [&]() { // Replace `null` with `nothing` auto ranges = get_changed_ranges_for_edit([&]() { - return input->replace(input->content.find("ull"), 1, "othing"); + return input->replace(input->content.find("ull"), 3, "othing"); }); AssertThat(ranges, Equals(vector({ - range_for_text("nothing", "}"), + range_for_substring(input->content, "nothing"), }))); // Replace `nothing` with `null` again @@ -154,7 +145,7 @@ describe("Tree", [&]() { return input->undo(); }); AssertThat(ranges, Equals(vector({ - range_for_text("null", "}"), + range_for_substring(input->content, "null"), }))); }); @@ -195,7 +186,7 @@ describe("Tree", [&]() { return input->replace(input->content.find("}"), 0, ", b: false"); }); AssertThat(ranges, Equals(vector({ - range_for_text(",", "}"), + range_for_substring(input->content, ", b: false"), }))); // Add a third key-value pair in between the first two @@ -209,7 +200,7 @@ describe("Tree", [&]() { "(pair (property_identifier) (false)))))" ); AssertThat(ranges, Equals(vector({ - range_for_text(", c", ", b"), + range_for_substring(input->content, ", c: 1"), }))); // Delete the middle pair. @@ -244,7 +235,7 @@ describe("Tree", [&]() { "(pair (property_identifier) (binary_expression (identifier) (null))))))" ); AssertThat(ranges, Equals(vector({ - range_for_text("b ===", "}"), + range_for_substring(input->content, "b === null"), }))); }); });