Merge pull request #178 from tree-sitter/simplify-input-interface

Consolidate TSInput interface down to one function
This commit is contained in:
Max Brunsfeld 2018-06-19 11:13:26 -07:00 committed by GitHub
commit 9b05142439
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 39 additions and 65 deletions

View file

@ -40,8 +40,7 @@ typedef struct {
typedef struct {
void *payload;
const char *(*read)(void *payload, uint32_t *bytes_read);
int (*seek)(void *payload, uint32_t byte_index, TSPoint position);
const char *(*read)(void *payload, uint32_t byte_index, TSPoint position, uint32_t *bytes_read);
TSInputEncoding encoding;
} TSInput;

View file

@ -17,14 +17,13 @@
static const char empty_chunk[2] = { 0, 0 };
static void ts_lexer__get_chunk(Lexer *self) {
TSInput input = self->input;
if (!self->chunk ||
self->current_position.bytes != self->chunk_start + self->chunk_size) {
input.seek(input.payload, self->current_position.bytes, self->current_position.extent);
}
self->chunk_start = self->current_position.bytes;
self->chunk = input.read(input.payload, &self->chunk_size);
self->chunk = self->input.read(
self->input.payload,
self->current_position.bytes,
self->current_position.extent,
&self->chunk_size
);
if (!self->chunk_size) self->chunk = empty_chunk;
}
@ -74,8 +73,9 @@ static void ts_lexer__advance(void *payload, bool skip) {
LOG_CHARACTER("consume", self->data.lookahead);
}
if (self->current_position.bytes >= self->chunk_start + self->chunk_size)
if (self->current_position.bytes >= self->chunk_start + self->chunk_size) {
ts_lexer__get_chunk(self);
}
ts_lexer__get_lookahead(self);
}
@ -105,10 +105,8 @@ static uint32_t ts_lexer__get_column(void *payload) {
return result;
}
/*
* The lexer's advance method is stored as a struct field so that generated
* parsers can call it without needing to be linked against this library.
*/
// The lexer's methods are stored as a struct field so that generated
// parsers can call them without needing to be linked against this library.
void ts_lexer_init(Lexer *self) {
*self = (Lexer){
@ -163,11 +161,8 @@ void ts_lexer_start(Lexer *self) {
self->token_start_position = self->current_position;
self->token_end_position = LENGTH_UNDEFINED;
self->data.result_symbol = 0;
if (!self->chunk)
ts_lexer__get_chunk(self);
if (!self->lookahead_size)
ts_lexer__get_lookahead(self);
if (!self->chunk) ts_lexer__get_chunk(self);
if (!self->lookahead_size) ts_lexer__get_lookahead(self);
}
void ts_lexer_advance_to_end(Lexer *self) {

View file

@ -62,6 +62,7 @@ struct TSParser {
size_t operation_limit;
volatile bool enabled;
bool halt_on_error;
};
typedef struct {
@ -1461,7 +1462,7 @@ TSTree *ts_parser_resume(TSParser *self) {
self->finished_tree = NULL;
ts_stack_clear(self->stack);
ts_parser__set_cached_token(self, 0, NULL, NULL);
ts_lexer_set_input(&self->lexer, (TSInput) { NULL, NULL, NULL, 0 });
ts_lexer_set_input(&self->lexer, (TSInput) { NULL, NULL, 0 });
return result;
}
@ -1475,5 +1476,5 @@ TSTree *ts_parser_parse_string(TSParser *self, const TSTree *old_tree,
const char *string, uint32_t length) {
TSStringInput input;
ts_string_input_init(&input, string, length);
return ts_parser_parse(self, old_tree, input.input);
return ts_parser_parse(self, old_tree, ts_string_input_get(&input));
}

View file

@ -2,32 +2,27 @@
#include "runtime/string_input.h"
#include <string.h>
static const char *ts_string_input__read(void *payload, uint32_t *bytes_read) {
static const char *ts_string_input__read(void *payload, uint32_t byte_offset,
TSPoint _, uint32_t *bytes_read) {
TSStringInput *input = (TSStringInput *)payload;
if (input->position >= input->length) {
if (byte_offset >= input->length) {
*bytes_read = 0;
return "";
} else {
*bytes_read = input->length - byte_offset;
return input->string + byte_offset;
}
uint32_t previous_position = input->position;
input->position = input->length;
*bytes_read = input->position - previous_position;
return input->string + previous_position;
}
static int ts_string_input__seek(void *payload, uint32_t byte, TSPoint _) {
TSStringInput *input = (TSStringInput *)payload;
input->position = byte;
return (byte < input->length);
}
void ts_string_input_init(TSStringInput *self, const char *string, uint32_t length) {
self->string = string;
self->position = 0;
self->length = length;
self->input = (TSInput) {
}
TSInput ts_string_input_get(TSStringInput *self) {
return (TSInput) {
.payload = self,
.read = ts_string_input__read,
.seek = ts_string_input__seek,
.encoding = TSInputEncodingUTF8,
};
}

View file

@ -9,12 +9,11 @@ extern "C" {
typedef struct {
const char *string;
uint32_t position;
uint32_t length;
TSInput input;
} TSStringInput;
void ts_string_input_init(TSStringInput *, const char *, uint32_t);
TSInput ts_string_input_get(TSStringInput *);
#ifdef __cplusplus
}

View file

@ -11,7 +11,6 @@ using std::vector;
SpyInput::SpyInput(string content, size_t chars_per_chunk) :
buffer(nullptr),
byte_offset(0),
chars_per_chunk(chars_per_chunk),
content(content),
encoding(TSInputEncodingUTF8),
@ -42,22 +41,23 @@ static void add_byte_range(vector<pair<uint32_t, uint32_t>> *ranges,
ranges->push_back({start, end});
}
const char * SpyInput::read(void *payload, uint32_t *bytes_read) {
const char *SpyInput::read(void *payload, uint32_t byte_offset,
TSPoint position, uint32_t *bytes_read) {
auto spy = static_cast<SpyInput *>(payload);
if (spy->byte_offset > spy->content.size()) {
if (byte_offset >= spy->content.size()) {
*bytes_read = 0;
return "";
}
long byte_count = string_byte_for_character(spy->encoding, spy->content, spy->byte_offset, spy->chars_per_chunk);
if (byte_count < 0)
byte_count = spy->content.size() - spy->byte_offset;
long byte_count = string_byte_for_character(spy->encoding, spy->content, byte_offset, spy->chars_per_chunk);
if (byte_count < 0) {
byte_count = spy->content.size() - byte_offset;
}
string result = spy->content.substr(spy->byte_offset, byte_count);
string result = spy->content.substr(byte_offset, byte_count);
*bytes_read = byte_count;
add_byte_range(&spy->ranges_read, spy->byte_offset, byte_count);
spy->byte_offset += byte_count;
add_byte_range(&spy->ranges_read, byte_offset, byte_count);
/*
* This class stores its entire `content` in a contiguous buffer, but we want
@ -79,12 +79,6 @@ const char * SpyInput::read(void *payload, uint32_t *bytes_read) {
return spy->buffer;
}
int SpyInput::seek(void *payload, uint32_t byte, TSPoint position) {
auto spy = static_cast<SpyInput *>(payload);
spy->byte_offset = byte;
return 0;
}
vector<string> SpyInput::strings_read() const {
vector<string> result;
for (auto &range : ranges_read) {
@ -97,7 +91,6 @@ TSInput SpyInput::input() {
TSInput result;
result.payload = this;
result.encoding = encoding;
result.seek = seek;
result.read = read;
return result;
}

View file

@ -13,11 +13,9 @@ struct SpyInputEdit {
class SpyInput {
char *buffer;
uint32_t byte_offset;
std::vector<SpyInputEdit> undo_stack;
static const char * read(void *, uint32_t *);
static int seek(void *, uint32_t, TSPoint);
static const char *read(void *, uint32_t, TSPoint, uint32_t *);
std::pair<std::string, TSPoint> swap_substr(size_t, size_t, std::string);
public:

View file

@ -630,15 +630,12 @@ describe("Parser", [&]() {
size_t read_count = 0;
TSInput infinite_input = {
&read_count,
[](void *payload, uint32_t *bytes_read) {
[](void *payload, uint32_t byte, TSPoint position, uint32_t *bytes_read) {
size_t *read_count = static_cast<size_t *>(payload);
assert((*read_count)++ < 100000);
*bytes_read = 1;
return "[";
},
[](void *payload, unsigned byte, TSPoint position) -> int {
return true;
},
TSInputEncodingUTF8
};
@ -681,15 +678,12 @@ describe("Parser", [&]() {
// it has been read.
TSInput infinite_input = {
&state,
[](void *payload, uint32_t *bytes_read) {
[](void *payload, uint32_t byte, TSPoint position, uint32_t *bytes_read) {
InputState *state = static_cast<InputState *>(payload);
assert(state->read_count++ <= 10);
*bytes_read = strlen(state->string);
return state->string;
},
[](void *payload, unsigned byte, TSPoint position) -> int {
return true;
},
TSInputEncodingUTF8
};