Merge pull request #183 from tree-sitter/detect-included-range-boundaries

Add lexer API for detecting boundaries of included ranges
This commit is contained in:
Max Brunsfeld 2018-07-18 09:37:16 -07:00 committed by GitHub
commit 16376c43f5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 95 additions and 50 deletions

View file

@ -25,13 +25,16 @@ typedef struct {
bool named : 1;
} TSSymbolMetadata;
typedef struct {
void (*advance)(void *, bool);
void (*mark_end)(void *);
uint32_t (*get_column)(void *);
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
} TSLexer;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(TSLexer *);
};
typedef enum {
TSParseActionTypeShift,

View file

@ -10,7 +10,7 @@ extern "C" {
#include <stdint.h>
#include <stdbool.h>
#define TREE_SITTER_LANGUAGE_VERSION 8
#define TREE_SITTER_LANGUAGE_VERSION 9
typedef uint16_t TSSymbol;
typedef struct TSLanguage TSLanguage;

View file

@ -15,13 +15,13 @@ fetch_grammar() {
fi
(
cd $grammar_dir;
cd $grammar_dir
git fetch origin $ref --depth=1
git reset --hard origin/$ref;
git reset --hard FETCH_HEAD
)
}
fetch_grammar javascript master
fetch_grammar javascript included-range-boundaries
fetch_grammar json master
fetch_grammar c master
fetch_grammar cpp master

View file

@ -1,6 +1,6 @@
@echo off
call:fetch_grammar javascript master
call:fetch_grammar javascript included-range-boundaries
call:fetch_grammar json master
call:fetch_grammar c master
call:fetch_grammar cpp master
@ -22,6 +22,6 @@ SET grammar_branch=%~2
)
pushd %grammar_dir%
git fetch origin %2 --depth=1
git reset --hard origin/%grammar_branch%
git reset --hard FETCH_HEAD
popd
EXIT /B 0

View file

@ -50,7 +50,7 @@ static void ts_lexer__get_lookahead(Lexer *self) {
}
}
static void ts_lexer__advance(void *payload, bool skip) {
static void ts_lexer__advance(TSLexer *payload, bool skip) {
Lexer *self = (Lexer *)payload;
if (self->chunk == empty_chunk)
return;
@ -95,7 +95,7 @@ static void ts_lexer__advance(void *payload, bool skip) {
ts_lexer__get_lookahead(self);
}
static void ts_lexer__mark_end(void *payload) {
static void ts_lexer__mark_end(TSLexer *payload) {
Lexer *self = (Lexer *)payload;
TSRange *current_included_range = &self->included_ranges[self->current_included_range_index];
if (self->current_included_range_index > 0 &&
@ -110,7 +110,7 @@ static void ts_lexer__mark_end(void *payload) {
}
}
static uint32_t ts_lexer__get_column(void *payload) {
static uint32_t ts_lexer__get_column(TSLexer *payload) {
Lexer *self = (Lexer *)payload;
uint32_t goal_byte = self->current_position.bytes;
@ -123,13 +123,19 @@ static uint32_t ts_lexer__get_column(void *payload) {
uint32_t result = 0;
while (self->current_position.bytes < goal_byte) {
ts_lexer__advance(self, false);
ts_lexer__advance(payload, false);
result++;
}
return result;
}
static bool ts_lexer__is_at_included_range_start(TSLexer *payload) {
const Lexer *self = (const Lexer *)payload;
TSRange *current_range = &self->included_ranges[self->current_included_range_index];
return self->current_position.bytes == current_range->start_byte;
}
// The lexer's methods are stored as a struct field so that generated
// parsers can call them without needing to be linked against this library.
@ -139,6 +145,7 @@ void ts_lexer_init(Lexer *self) {
.advance = ts_lexer__advance,
.mark_end = ts_lexer__mark_end,
.get_column = ts_lexer__get_column,
.is_at_included_range_start = ts_lexer__is_at_included_range_start,
.lookahead = 0,
.result_symbol = 0,
},
@ -227,7 +234,9 @@ void ts_lexer_start(Lexer *self) {
}
void ts_lexer_advance_to_end(Lexer *self) {
while (self->data.lookahead != 0) ts_lexer__advance(self, false);
while (self->data.lookahead != 0) {
ts_lexer__advance((TSLexer *)self, false);
}
}
static const TSRange DEFAULT_RANGES[] = {

View file

@ -327,7 +327,7 @@ static const Subtree *ts_parser__lex(TSParser *self, StackVersion version, TSSta
valid_external_tokens
)) {
if (length_is_undefined(self->lexer.token_end_position)) {
self->lexer.token_end_position = self->lexer.current_position;
self->lexer.data.mark_end(&self->lexer.data);
}
if (!error_mode || self->lexer.token_end_position.bytes > current_position.bytes) {
@ -380,7 +380,7 @@ static const Subtree *ts_parser__lex(TSParser *self, StackVersion version, TSSta
self->lexer.data.result_symbol = ts_builtin_sym_error;
break;
}
self->lexer.data.advance(&self->lexer, false);
self->lexer.data.advance(&self->lexer.data, false);
}
error_end_position = self->lexer.current_position;

View file

@ -1,6 +1,7 @@
#include "./point_helpers.h"
#include <string>
#include <ostream>
#include <cassert>
#include "runtime/length.h"
#include "tree_sitter/runtime.h"
@ -45,3 +46,29 @@ std::ostream &operator<<(std::ostream &stream, const TSRange &range) {
ostream &operator<<(ostream &stream, const Length &length) {
return stream << "{bytes:" << length.bytes << ", extent:" << length.extent << "}";
}
TSPoint extent_for_string(const string &text, size_t end_index) {
if (end_index > text.size()) end_index = text.size();
TSPoint result = {0, 0};
for (size_t i = 0; i < end_index; i++) {
if (text[i] == '\n') {
result.row++;
result.column = 0;
} else {
result.column++;
}
}
return result;
}
TSRange range_for_substring(const string &text, const string &substring) {
size_t start = text.find(substring);
assert(start != string::npos);
size_t end = start + substring.size();
return TSRange {
extent_for_string(text, start),
extent_for_string(text, end),
static_cast<uint32_t>(start),
static_cast<uint32_t>(end),
};
};

View file

@ -20,4 +20,8 @@ std::ostream &operator<<(std::ostream &stream, const TSRange &range);
std::ostream &operator<<(std::ostream &stream, const Length &length);
TSPoint extent_for_string(const std::string &text, size_t end_index = std::string::npos);
TSRange range_for_substring(const std::string &text, const std::string &substring);
#endif // HELPERS_POINT_HELPERS_H_

View file

@ -1,5 +1,6 @@
#include "helpers/spy_input.h"
#include "helpers/encoding_helpers.h"
#include "helpers/point_helpers.h"
#include "runtime/point.h"
#include <string.h>
#include <algorithm>
@ -95,19 +96,6 @@ TSInput SpyInput::input() {
return result;
}
static TSPoint get_extent(string text) {
TSPoint result = {0, 0};
for (auto i = text.begin(); i != text.end(); i++) {
if (*i == '\n') {
result.row++;
result.column = 0;
} else {
result.column++;
}
}
return result;
}
TSInputEdit SpyInput::replace(size_t start_byte, size_t bytes_removed, string text) {
auto swap = swap_substr(start_byte, bytes_removed, text);
size_t bytes_added = text.size();
@ -117,8 +105,8 @@ TSInputEdit SpyInput::replace(size_t start_byte, size_t bytes_removed, string te
result.old_end_byte = start_byte + bytes_removed;
result.new_end_byte = start_byte + bytes_added;
result.start_point = swap.second;
result.old_end_point = result.start_point + get_extent(swap.first);
result.new_end_point = result.start_point + get_extent(text);
result.old_end_point = result.start_point + extent_for_string(swap.first);
result.new_end_point = result.start_point + extent_for_string(text);
return result;
}
@ -131,8 +119,8 @@ TSInputEdit SpyInput::undo() {
result.old_end_byte = entry.start_byte + entry.bytes_removed;
result.new_end_byte = entry.start_byte + entry.text_inserted.size();
result.start_point = swap.second;
result.old_end_point = result.start_point + get_extent(swap.first);
result.new_end_point = result.start_point + get_extent(entry.text_inserted);
result.old_end_point = result.start_point + extent_for_string(swap.first);
result.new_end_point = result.start_point + extent_for_string(entry.text_inserted);
return result;
}

View file

@ -918,6 +918,29 @@ describe("Parser", [&]() {
assert_root_node("(program (ERROR (identifier)))");
});
it("allows external scanners to detect the boundaries of included ranges", [&]() {
string source_code = "a <%= b() %> c <% d() %>";
TSRange included_ranges[] = {
range_for_substring(source_code, "b()"),
range_for_substring(source_code, "d()"),
};
ts_parser_set_included_ranges(parser, included_ranges, 2);
ts_parser_set_language(parser, load_real_language("javascript"));
tree = ts_parser_parse_string(parser, nullptr, source_code.c_str(), source_code.size());
assert_root_node("(program "
"(expression_statement (call_expression (identifier) (arguments))) "
"(expression_statement (call_expression (identifier) (arguments))))");
TSNode statement_node1 = ts_node_child(ts_tree_root_node(tree), 0);
TSNode statement_node2 = ts_node_child(ts_tree_root_node(tree), 1);
AssertThat(ts_node_end_point(statement_node1), Equals(extent_for_string("a <%= b()")));
AssertThat(ts_node_end_point(statement_node2), Equals(extent_for_string("a <%= b() %> c <% d()")));
});
});
});

View file

@ -131,22 +131,13 @@ describe("Tree", [&]() {
return result;
};
auto range_for_text = [&](string start_text, string end_text) {
return TSRange {
point(0, input->content.find(start_text)),
point(0, input->content.find(end_text)),
static_cast<uint32_t>(input->content.find(start_text)),
static_cast<uint32_t>(input->content.find(end_text)),
};
};
it("reports changes when one token has been updated", [&]() {
// Replace `null` with `nothing`
auto ranges = get_changed_ranges_for_edit([&]() {
return input->replace(input->content.find("ull"), 1, "othing");
return input->replace(input->content.find("ull"), 3, "othing");
});
AssertThat(ranges, Equals(vector<TSRange>({
range_for_text("nothing", "}"),
range_for_substring(input->content, "nothing"),
})));
// Replace `nothing` with `null` again
@ -154,7 +145,7 @@ describe("Tree", [&]() {
return input->undo();
});
AssertThat(ranges, Equals(vector<TSRange>({
range_for_text("null", "}"),
range_for_substring(input->content, "null"),
})));
});
@ -195,7 +186,7 @@ describe("Tree", [&]() {
return input->replace(input->content.find("}"), 0, ", b: false");
});
AssertThat(ranges, Equals(vector<TSRange>({
range_for_text(",", "}"),
range_for_substring(input->content, ", b: false"),
})));
// Add a third key-value pair in between the first two
@ -209,7 +200,7 @@ describe("Tree", [&]() {
"(pair (property_identifier) (false)))))"
);
AssertThat(ranges, Equals(vector<TSRange>({
range_for_text(", c", ", b"),
range_for_substring(input->content, ", c: 1"),
})));
// Delete the middle pair.
@ -244,7 +235,7 @@ describe("Tree", [&]() {
"(pair (property_identifier) (binary_expression (identifier) (null))))))"
);
AssertThat(ranges, Equals(vector<TSRange>({
range_for_text("b ===", "}"),
range_for_substring(input->content, "b === null"),
})));
});
});