Merge pull request #178 from tree-sitter/simplify-input-interface
Consolidate TSInput interface down to one function
This commit is contained in:
commit
9b05142439
8 changed files with 39 additions and 65 deletions
|
|
@ -40,8 +40,7 @@ typedef struct {
|
|||
|
||||
typedef struct {
|
||||
void *payload;
|
||||
const char *(*read)(void *payload, uint32_t *bytes_read);
|
||||
int (*seek)(void *payload, uint32_t byte_index, TSPoint position);
|
||||
const char *(*read)(void *payload, uint32_t byte_index, TSPoint position, uint32_t *bytes_read);
|
||||
TSInputEncoding encoding;
|
||||
} TSInput;
|
||||
|
||||
|
|
|
|||
|
|
@ -17,14 +17,13 @@
|
|||
static const char empty_chunk[2] = { 0, 0 };
|
||||
|
||||
static void ts_lexer__get_chunk(Lexer *self) {
|
||||
TSInput input = self->input;
|
||||
if (!self->chunk ||
|
||||
self->current_position.bytes != self->chunk_start + self->chunk_size) {
|
||||
input.seek(input.payload, self->current_position.bytes, self->current_position.extent);
|
||||
}
|
||||
|
||||
self->chunk_start = self->current_position.bytes;
|
||||
self->chunk = input.read(input.payload, &self->chunk_size);
|
||||
self->chunk = self->input.read(
|
||||
self->input.payload,
|
||||
self->current_position.bytes,
|
||||
self->current_position.extent,
|
||||
&self->chunk_size
|
||||
);
|
||||
if (!self->chunk_size) self->chunk = empty_chunk;
|
||||
}
|
||||
|
||||
|
|
@ -74,8 +73,9 @@ static void ts_lexer__advance(void *payload, bool skip) {
|
|||
LOG_CHARACTER("consume", self->data.lookahead);
|
||||
}
|
||||
|
||||
if (self->current_position.bytes >= self->chunk_start + self->chunk_size)
|
||||
if (self->current_position.bytes >= self->chunk_start + self->chunk_size) {
|
||||
ts_lexer__get_chunk(self);
|
||||
}
|
||||
|
||||
ts_lexer__get_lookahead(self);
|
||||
}
|
||||
|
|
@ -105,10 +105,8 @@ static uint32_t ts_lexer__get_column(void *payload) {
|
|||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* The lexer's advance method is stored as a struct field so that generated
|
||||
* parsers can call it without needing to be linked against this library.
|
||||
*/
|
||||
// The lexer's methods are stored as a struct field so that generated
|
||||
// parsers can call them without needing to be linked against this library.
|
||||
|
||||
void ts_lexer_init(Lexer *self) {
|
||||
*self = (Lexer){
|
||||
|
|
@ -163,11 +161,8 @@ void ts_lexer_start(Lexer *self) {
|
|||
self->token_start_position = self->current_position;
|
||||
self->token_end_position = LENGTH_UNDEFINED;
|
||||
self->data.result_symbol = 0;
|
||||
|
||||
if (!self->chunk)
|
||||
ts_lexer__get_chunk(self);
|
||||
if (!self->lookahead_size)
|
||||
ts_lexer__get_lookahead(self);
|
||||
if (!self->chunk) ts_lexer__get_chunk(self);
|
||||
if (!self->lookahead_size) ts_lexer__get_lookahead(self);
|
||||
}
|
||||
|
||||
void ts_lexer_advance_to_end(Lexer *self) {
|
||||
|
|
|
|||
|
|
@ -62,6 +62,7 @@ struct TSParser {
|
|||
size_t operation_limit;
|
||||
volatile bool enabled;
|
||||
bool halt_on_error;
|
||||
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -1461,7 +1462,7 @@ TSTree *ts_parser_resume(TSParser *self) {
|
|||
self->finished_tree = NULL;
|
||||
ts_stack_clear(self->stack);
|
||||
ts_parser__set_cached_token(self, 0, NULL, NULL);
|
||||
ts_lexer_set_input(&self->lexer, (TSInput) { NULL, NULL, NULL, 0 });
|
||||
ts_lexer_set_input(&self->lexer, (TSInput) { NULL, NULL, 0 });
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -1475,5 +1476,5 @@ TSTree *ts_parser_parse_string(TSParser *self, const TSTree *old_tree,
|
|||
const char *string, uint32_t length) {
|
||||
TSStringInput input;
|
||||
ts_string_input_init(&input, string, length);
|
||||
return ts_parser_parse(self, old_tree, input.input);
|
||||
return ts_parser_parse(self, old_tree, ts_string_input_get(&input));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,32 +2,27 @@
|
|||
#include "runtime/string_input.h"
|
||||
#include <string.h>
|
||||
|
||||
static const char *ts_string_input__read(void *payload, uint32_t *bytes_read) {
|
||||
static const char *ts_string_input__read(void *payload, uint32_t byte_offset,
|
||||
TSPoint _, uint32_t *bytes_read) {
|
||||
TSStringInput *input = (TSStringInput *)payload;
|
||||
if (input->position >= input->length) {
|
||||
if (byte_offset >= input->length) {
|
||||
*bytes_read = 0;
|
||||
return "";
|
||||
} else {
|
||||
*bytes_read = input->length - byte_offset;
|
||||
return input->string + byte_offset;
|
||||
}
|
||||
uint32_t previous_position = input->position;
|
||||
input->position = input->length;
|
||||
*bytes_read = input->position - previous_position;
|
||||
return input->string + previous_position;
|
||||
}
|
||||
|
||||
static int ts_string_input__seek(void *payload, uint32_t byte, TSPoint _) {
|
||||
TSStringInput *input = (TSStringInput *)payload;
|
||||
input->position = byte;
|
||||
return (byte < input->length);
|
||||
}
|
||||
|
||||
void ts_string_input_init(TSStringInput *self, const char *string, uint32_t length) {
|
||||
self->string = string;
|
||||
self->position = 0;
|
||||
self->length = length;
|
||||
self->input = (TSInput) {
|
||||
}
|
||||
|
||||
TSInput ts_string_input_get(TSStringInput *self) {
|
||||
return (TSInput) {
|
||||
.payload = self,
|
||||
.read = ts_string_input__read,
|
||||
.seek = ts_string_input__seek,
|
||||
.encoding = TSInputEncodingUTF8,
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,12 +9,11 @@ extern "C" {
|
|||
|
||||
typedef struct {
|
||||
const char *string;
|
||||
uint32_t position;
|
||||
uint32_t length;
|
||||
TSInput input;
|
||||
} TSStringInput;
|
||||
|
||||
void ts_string_input_init(TSStringInput *, const char *, uint32_t);
|
||||
TSInput ts_string_input_get(TSStringInput *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ using std::vector;
|
|||
|
||||
SpyInput::SpyInput(string content, size_t chars_per_chunk) :
|
||||
buffer(nullptr),
|
||||
byte_offset(0),
|
||||
chars_per_chunk(chars_per_chunk),
|
||||
content(content),
|
||||
encoding(TSInputEncodingUTF8),
|
||||
|
|
@ -42,22 +41,23 @@ static void add_byte_range(vector<pair<uint32_t, uint32_t>> *ranges,
|
|||
ranges->push_back({start, end});
|
||||
}
|
||||
|
||||
const char * SpyInput::read(void *payload, uint32_t *bytes_read) {
|
||||
const char *SpyInput::read(void *payload, uint32_t byte_offset,
|
||||
TSPoint position, uint32_t *bytes_read) {
|
||||
auto spy = static_cast<SpyInput *>(payload);
|
||||
|
||||
if (spy->byte_offset > spy->content.size()) {
|
||||
if (byte_offset >= spy->content.size()) {
|
||||
*bytes_read = 0;
|
||||
return "";
|
||||
}
|
||||
|
||||
long byte_count = string_byte_for_character(spy->encoding, spy->content, spy->byte_offset, spy->chars_per_chunk);
|
||||
if (byte_count < 0)
|
||||
byte_count = spy->content.size() - spy->byte_offset;
|
||||
long byte_count = string_byte_for_character(spy->encoding, spy->content, byte_offset, spy->chars_per_chunk);
|
||||
if (byte_count < 0) {
|
||||
byte_count = spy->content.size() - byte_offset;
|
||||
}
|
||||
|
||||
string result = spy->content.substr(spy->byte_offset, byte_count);
|
||||
string result = spy->content.substr(byte_offset, byte_count);
|
||||
*bytes_read = byte_count;
|
||||
add_byte_range(&spy->ranges_read, spy->byte_offset, byte_count);
|
||||
spy->byte_offset += byte_count;
|
||||
add_byte_range(&spy->ranges_read, byte_offset, byte_count);
|
||||
|
||||
/*
|
||||
* This class stores its entire `content` in a contiguous buffer, but we want
|
||||
|
|
@ -79,12 +79,6 @@ const char * SpyInput::read(void *payload, uint32_t *bytes_read) {
|
|||
return spy->buffer;
|
||||
}
|
||||
|
||||
int SpyInput::seek(void *payload, uint32_t byte, TSPoint position) {
|
||||
auto spy = static_cast<SpyInput *>(payload);
|
||||
spy->byte_offset = byte;
|
||||
return 0;
|
||||
}
|
||||
|
||||
vector<string> SpyInput::strings_read() const {
|
||||
vector<string> result;
|
||||
for (auto &range : ranges_read) {
|
||||
|
|
@ -97,7 +91,6 @@ TSInput SpyInput::input() {
|
|||
TSInput result;
|
||||
result.payload = this;
|
||||
result.encoding = encoding;
|
||||
result.seek = seek;
|
||||
result.read = read;
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,11 +13,9 @@ struct SpyInputEdit {
|
|||
|
||||
class SpyInput {
|
||||
char *buffer;
|
||||
uint32_t byte_offset;
|
||||
std::vector<SpyInputEdit> undo_stack;
|
||||
|
||||
static const char * read(void *, uint32_t *);
|
||||
static int seek(void *, uint32_t, TSPoint);
|
||||
static const char *read(void *, uint32_t, TSPoint, uint32_t *);
|
||||
std::pair<std::string, TSPoint> swap_substr(size_t, size_t, std::string);
|
||||
|
||||
public:
|
||||
|
|
|
|||
|
|
@ -630,15 +630,12 @@ describe("Parser", [&]() {
|
|||
size_t read_count = 0;
|
||||
TSInput infinite_input = {
|
||||
&read_count,
|
||||
[](void *payload, uint32_t *bytes_read) {
|
||||
[](void *payload, uint32_t byte, TSPoint position, uint32_t *bytes_read) {
|
||||
size_t *read_count = static_cast<size_t *>(payload);
|
||||
assert((*read_count)++ < 100000);
|
||||
*bytes_read = 1;
|
||||
return "[";
|
||||
},
|
||||
[](void *payload, unsigned byte, TSPoint position) -> int {
|
||||
return true;
|
||||
},
|
||||
TSInputEncodingUTF8
|
||||
};
|
||||
|
||||
|
|
@ -681,15 +678,12 @@ describe("Parser", [&]() {
|
|||
// it has been read.
|
||||
TSInput infinite_input = {
|
||||
&state,
|
||||
[](void *payload, uint32_t *bytes_read) {
|
||||
[](void *payload, uint32_t byte, TSPoint position, uint32_t *bytes_read) {
|
||||
InputState *state = static_cast<InputState *>(payload);
|
||||
assert(state->read_count++ <= 10);
|
||||
*bytes_read = strlen(state->string);
|
||||
return state->string;
|
||||
},
|
||||
[](void *payload, unsigned byte, TSPoint position) -> int {
|
||||
return true;
|
||||
},
|
||||
TSInputEncodingUTF8
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue