Merge pull request #183 from tree-sitter/detect-included-range-boundaries
Add lexer API for detecting boundaries of included ranges
This commit is contained in:
commit
16376c43f5
11 changed files with 95 additions and 50 deletions
|
|
@ -25,13 +25,16 @@ typedef struct {
|
|||
bool named : 1;
|
||||
} TSSymbolMetadata;
|
||||
|
||||
typedef struct {
|
||||
void (*advance)(void *, bool);
|
||||
void (*mark_end)(void *);
|
||||
uint32_t (*get_column)(void *);
|
||||
typedef struct TSLexer TSLexer;
|
||||
|
||||
struct TSLexer {
|
||||
int32_t lookahead;
|
||||
TSSymbol result_symbol;
|
||||
} TSLexer;
|
||||
void (*advance)(TSLexer *, bool);
|
||||
void (*mark_end)(TSLexer *);
|
||||
uint32_t (*get_column)(TSLexer *);
|
||||
bool (*is_at_included_range_start)(TSLexer *);
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
TSParseActionTypeShift,
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ extern "C" {
|
|||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#define TREE_SITTER_LANGUAGE_VERSION 8
|
||||
#define TREE_SITTER_LANGUAGE_VERSION 9
|
||||
|
||||
typedef uint16_t TSSymbol;
|
||||
typedef struct TSLanguage TSLanguage;
|
||||
|
|
|
|||
|
|
@ -15,13 +15,13 @@ fetch_grammar() {
|
|||
fi
|
||||
|
||||
(
|
||||
cd $grammar_dir;
|
||||
cd $grammar_dir
|
||||
git fetch origin $ref --depth=1
|
||||
git reset --hard origin/$ref;
|
||||
git reset --hard FETCH_HEAD
|
||||
)
|
||||
}
|
||||
|
||||
fetch_grammar javascript master
|
||||
fetch_grammar javascript included-range-boundaries
|
||||
fetch_grammar json master
|
||||
fetch_grammar c master
|
||||
fetch_grammar cpp master
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
@echo off
|
||||
|
||||
call:fetch_grammar javascript master
|
||||
call:fetch_grammar javascript included-range-boundaries
|
||||
call:fetch_grammar json master
|
||||
call:fetch_grammar c master
|
||||
call:fetch_grammar cpp master
|
||||
|
|
@ -22,6 +22,6 @@ SET grammar_branch=%~2
|
|||
)
|
||||
pushd %grammar_dir%
|
||||
git fetch origin %2 --depth=1
|
||||
git reset --hard origin/%grammar_branch%
|
||||
git reset --hard FETCH_HEAD
|
||||
popd
|
||||
EXIT /B 0
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ static void ts_lexer__get_lookahead(Lexer *self) {
|
|||
}
|
||||
}
|
||||
|
||||
static void ts_lexer__advance(void *payload, bool skip) {
|
||||
static void ts_lexer__advance(TSLexer *payload, bool skip) {
|
||||
Lexer *self = (Lexer *)payload;
|
||||
if (self->chunk == empty_chunk)
|
||||
return;
|
||||
|
|
@ -95,7 +95,7 @@ static void ts_lexer__advance(void *payload, bool skip) {
|
|||
ts_lexer__get_lookahead(self);
|
||||
}
|
||||
|
||||
static void ts_lexer__mark_end(void *payload) {
|
||||
static void ts_lexer__mark_end(TSLexer *payload) {
|
||||
Lexer *self = (Lexer *)payload;
|
||||
TSRange *current_included_range = &self->included_ranges[self->current_included_range_index];
|
||||
if (self->current_included_range_index > 0 &&
|
||||
|
|
@ -110,7 +110,7 @@ static void ts_lexer__mark_end(void *payload) {
|
|||
}
|
||||
}
|
||||
|
||||
static uint32_t ts_lexer__get_column(void *payload) {
|
||||
static uint32_t ts_lexer__get_column(TSLexer *payload) {
|
||||
Lexer *self = (Lexer *)payload;
|
||||
uint32_t goal_byte = self->current_position.bytes;
|
||||
|
||||
|
|
@ -123,13 +123,19 @@ static uint32_t ts_lexer__get_column(void *payload) {
|
|||
|
||||
uint32_t result = 0;
|
||||
while (self->current_position.bytes < goal_byte) {
|
||||
ts_lexer__advance(self, false);
|
||||
ts_lexer__advance(payload, false);
|
||||
result++;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool ts_lexer__is_at_included_range_start(TSLexer *payload) {
|
||||
const Lexer *self = (const Lexer *)payload;
|
||||
TSRange *current_range = &self->included_ranges[self->current_included_range_index];
|
||||
return self->current_position.bytes == current_range->start_byte;
|
||||
}
|
||||
|
||||
// The lexer's methods are stored as a struct field so that generated
|
||||
// parsers can call them without needing to be linked against this library.
|
||||
|
||||
|
|
@ -139,6 +145,7 @@ void ts_lexer_init(Lexer *self) {
|
|||
.advance = ts_lexer__advance,
|
||||
.mark_end = ts_lexer__mark_end,
|
||||
.get_column = ts_lexer__get_column,
|
||||
.is_at_included_range_start = ts_lexer__is_at_included_range_start,
|
||||
.lookahead = 0,
|
||||
.result_symbol = 0,
|
||||
},
|
||||
|
|
@ -227,7 +234,9 @@ void ts_lexer_start(Lexer *self) {
|
|||
}
|
||||
|
||||
void ts_lexer_advance_to_end(Lexer *self) {
|
||||
while (self->data.lookahead != 0) ts_lexer__advance(self, false);
|
||||
while (self->data.lookahead != 0) {
|
||||
ts_lexer__advance((TSLexer *)self, false);
|
||||
}
|
||||
}
|
||||
|
||||
static const TSRange DEFAULT_RANGES[] = {
|
||||
|
|
|
|||
|
|
@ -327,7 +327,7 @@ static const Subtree *ts_parser__lex(TSParser *self, StackVersion version, TSSta
|
|||
valid_external_tokens
|
||||
)) {
|
||||
if (length_is_undefined(self->lexer.token_end_position)) {
|
||||
self->lexer.token_end_position = self->lexer.current_position;
|
||||
self->lexer.data.mark_end(&self->lexer.data);
|
||||
}
|
||||
|
||||
if (!error_mode || self->lexer.token_end_position.bytes > current_position.bytes) {
|
||||
|
|
@ -380,7 +380,7 @@ static const Subtree *ts_parser__lex(TSParser *self, StackVersion version, TSSta
|
|||
self->lexer.data.result_symbol = ts_builtin_sym_error;
|
||||
break;
|
||||
}
|
||||
self->lexer.data.advance(&self->lexer, false);
|
||||
self->lexer.data.advance(&self->lexer.data, false);
|
||||
}
|
||||
|
||||
error_end_position = self->lexer.current_position;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "./point_helpers.h"
|
||||
#include <string>
|
||||
#include <ostream>
|
||||
#include <cassert>
|
||||
#include "runtime/length.h"
|
||||
#include "tree_sitter/runtime.h"
|
||||
|
||||
|
|
@ -45,3 +46,29 @@ std::ostream &operator<<(std::ostream &stream, const TSRange &range) {
|
|||
ostream &operator<<(ostream &stream, const Length &length) {
|
||||
return stream << "{bytes:" << length.bytes << ", extent:" << length.extent << "}";
|
||||
}
|
||||
|
||||
TSPoint extent_for_string(const string &text, size_t end_index) {
|
||||
if (end_index > text.size()) end_index = text.size();
|
||||
TSPoint result = {0, 0};
|
||||
for (size_t i = 0; i < end_index; i++) {
|
||||
if (text[i] == '\n') {
|
||||
result.row++;
|
||||
result.column = 0;
|
||||
} else {
|
||||
result.column++;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
TSRange range_for_substring(const string &text, const string &substring) {
|
||||
size_t start = text.find(substring);
|
||||
assert(start != string::npos);
|
||||
size_t end = start + substring.size();
|
||||
return TSRange {
|
||||
extent_for_string(text, start),
|
||||
extent_for_string(text, end),
|
||||
static_cast<uint32_t>(start),
|
||||
static_cast<uint32_t>(end),
|
||||
};
|
||||
};
|
||||
|
|
|
|||
|
|
@ -20,4 +20,8 @@ std::ostream &operator<<(std::ostream &stream, const TSRange &range);
|
|||
|
||||
std::ostream &operator<<(std::ostream &stream, const Length &length);
|
||||
|
||||
TSPoint extent_for_string(const std::string &text, size_t end_index = std::string::npos);
|
||||
|
||||
TSRange range_for_substring(const std::string &text, const std::string &substring);
|
||||
|
||||
#endif // HELPERS_POINT_HELPERS_H_
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#include "helpers/spy_input.h"
|
||||
#include "helpers/encoding_helpers.h"
|
||||
#include "helpers/point_helpers.h"
|
||||
#include "runtime/point.h"
|
||||
#include <string.h>
|
||||
#include <algorithm>
|
||||
|
|
@ -95,19 +96,6 @@ TSInput SpyInput::input() {
|
|||
return result;
|
||||
}
|
||||
|
||||
static TSPoint get_extent(string text) {
|
||||
TSPoint result = {0, 0};
|
||||
for (auto i = text.begin(); i != text.end(); i++) {
|
||||
if (*i == '\n') {
|
||||
result.row++;
|
||||
result.column = 0;
|
||||
} else {
|
||||
result.column++;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
TSInputEdit SpyInput::replace(size_t start_byte, size_t bytes_removed, string text) {
|
||||
auto swap = swap_substr(start_byte, bytes_removed, text);
|
||||
size_t bytes_added = text.size();
|
||||
|
|
@ -117,8 +105,8 @@ TSInputEdit SpyInput::replace(size_t start_byte, size_t bytes_removed, string te
|
|||
result.old_end_byte = start_byte + bytes_removed;
|
||||
result.new_end_byte = start_byte + bytes_added;
|
||||
result.start_point = swap.second;
|
||||
result.old_end_point = result.start_point + get_extent(swap.first);
|
||||
result.new_end_point = result.start_point + get_extent(text);
|
||||
result.old_end_point = result.start_point + extent_for_string(swap.first);
|
||||
result.new_end_point = result.start_point + extent_for_string(text);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -131,8 +119,8 @@ TSInputEdit SpyInput::undo() {
|
|||
result.old_end_byte = entry.start_byte + entry.bytes_removed;
|
||||
result.new_end_byte = entry.start_byte + entry.text_inserted.size();
|
||||
result.start_point = swap.second;
|
||||
result.old_end_point = result.start_point + get_extent(swap.first);
|
||||
result.new_end_point = result.start_point + get_extent(entry.text_inserted);
|
||||
result.old_end_point = result.start_point + extent_for_string(swap.first);
|
||||
result.new_end_point = result.start_point + extent_for_string(entry.text_inserted);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -918,6 +918,29 @@ describe("Parser", [&]() {
|
|||
|
||||
assert_root_node("(program (ERROR (identifier)))");
|
||||
});
|
||||
|
||||
it("allows external scanners to detect the boundaries of included ranges", [&]() {
|
||||
string source_code = "a <%= b() %> c <% d() %>";
|
||||
|
||||
TSRange included_ranges[] = {
|
||||
range_for_substring(source_code, "b()"),
|
||||
range_for_substring(source_code, "d()"),
|
||||
};
|
||||
|
||||
ts_parser_set_included_ranges(parser, included_ranges, 2);
|
||||
ts_parser_set_language(parser, load_real_language("javascript"));
|
||||
tree = ts_parser_parse_string(parser, nullptr, source_code.c_str(), source_code.size());
|
||||
|
||||
assert_root_node("(program "
|
||||
"(expression_statement (call_expression (identifier) (arguments))) "
|
||||
"(expression_statement (call_expression (identifier) (arguments))))");
|
||||
|
||||
TSNode statement_node1 = ts_node_child(ts_tree_root_node(tree), 0);
|
||||
TSNode statement_node2 = ts_node_child(ts_tree_root_node(tree), 1);
|
||||
|
||||
AssertThat(ts_node_end_point(statement_node1), Equals(extent_for_string("a <%= b()")));
|
||||
AssertThat(ts_node_end_point(statement_node2), Equals(extent_for_string("a <%= b() %> c <% d()")));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -131,22 +131,13 @@ describe("Tree", [&]() {
|
|||
return result;
|
||||
};
|
||||
|
||||
auto range_for_text = [&](string start_text, string end_text) {
|
||||
return TSRange {
|
||||
point(0, input->content.find(start_text)),
|
||||
point(0, input->content.find(end_text)),
|
||||
static_cast<uint32_t>(input->content.find(start_text)),
|
||||
static_cast<uint32_t>(input->content.find(end_text)),
|
||||
};
|
||||
};
|
||||
|
||||
it("reports changes when one token has been updated", [&]() {
|
||||
// Replace `null` with `nothing`
|
||||
auto ranges = get_changed_ranges_for_edit([&]() {
|
||||
return input->replace(input->content.find("ull"), 1, "othing");
|
||||
return input->replace(input->content.find("ull"), 3, "othing");
|
||||
});
|
||||
AssertThat(ranges, Equals(vector<TSRange>({
|
||||
range_for_text("nothing", "}"),
|
||||
range_for_substring(input->content, "nothing"),
|
||||
})));
|
||||
|
||||
// Replace `nothing` with `null` again
|
||||
|
|
@ -154,7 +145,7 @@ describe("Tree", [&]() {
|
|||
return input->undo();
|
||||
});
|
||||
AssertThat(ranges, Equals(vector<TSRange>({
|
||||
range_for_text("null", "}"),
|
||||
range_for_substring(input->content, "null"),
|
||||
})));
|
||||
});
|
||||
|
||||
|
|
@ -195,7 +186,7 @@ describe("Tree", [&]() {
|
|||
return input->replace(input->content.find("}"), 0, ", b: false");
|
||||
});
|
||||
AssertThat(ranges, Equals(vector<TSRange>({
|
||||
range_for_text(",", "}"),
|
||||
range_for_substring(input->content, ", b: false"),
|
||||
})));
|
||||
|
||||
// Add a third key-value pair in between the first two
|
||||
|
|
@ -209,7 +200,7 @@ describe("Tree", [&]() {
|
|||
"(pair (property_identifier) (false)))))"
|
||||
);
|
||||
AssertThat(ranges, Equals(vector<TSRange>({
|
||||
range_for_text(", c", ", b"),
|
||||
range_for_substring(input->content, ", c: 1"),
|
||||
})));
|
||||
|
||||
// Delete the middle pair.
|
||||
|
|
@ -244,7 +235,7 @@ describe("Tree", [&]() {
|
|||
"(pair (property_identifier) (binary_expression (identifier) (null))))))"
|
||||
);
|
||||
AssertThat(ranges, Equals(vector<TSRange>({
|
||||
range_for_text("b ===", "}"),
|
||||
range_for_substring(input->content, "b === null"),
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue