In parser, read chunked input correctly
This commit is contained in:
parent
3aaa08b948
commit
42e9a264f3
6 changed files with 134 additions and 18 deletions
|
|
@ -38,11 +38,15 @@ typedef struct {
|
|||
|
||||
typedef struct {
|
||||
ts_input input;
|
||||
const char *current_chunk;
|
||||
size_t current_chunk_end;
|
||||
size_t position;
|
||||
|
||||
const char *chunk;
|
||||
size_t chunk_start;
|
||||
size_t chunk_size;
|
||||
size_t position_in_chunk;
|
||||
|
||||
size_t token_end_position;
|
||||
size_t token_start_position;
|
||||
|
||||
ts_tree *lookahead_node;
|
||||
ts_tree *prev_lookahead_node;
|
||||
ts_state lex_state;
|
||||
|
|
@ -60,9 +64,12 @@ static ts_parser ts_parser_make(ts_input input) {
|
|||
.input = input,
|
||||
.token_start_position = 0,
|
||||
.token_end_position = 0,
|
||||
.position = 0,
|
||||
.current_chunk = chunk,
|
||||
.current_chunk_end = bytes_read,
|
||||
|
||||
.chunk = chunk,
|
||||
.chunk_size = bytes_read,
|
||||
.chunk_start = 0,
|
||||
.position_in_chunk = 0,
|
||||
|
||||
.lookahead_node = NULL,
|
||||
.prev_lookahead_node = NULL,
|
||||
.lex_state = 0,
|
||||
|
|
@ -71,9 +78,13 @@ static ts_parser ts_parser_make(ts_input input) {
|
|||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
static size_t ts_parser_position(const ts_parser *parser) {
|
||||
return parser->chunk_start + parser->position_in_chunk;
|
||||
}
|
||||
|
||||
static char ts_parser_lookahead_char(const ts_parser *parser) {
|
||||
return parser->current_chunk[parser->position];
|
||||
return parser->chunk[parser->position_in_chunk];
|
||||
}
|
||||
|
||||
static ts_symbol ts_parser_lookahead_sym(const ts_parser *parser) {
|
||||
|
|
@ -143,13 +154,19 @@ static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_
|
|||
DEBUG_PARSE("reduce: %s, state: %u \n", ts_symbol_names[symbol], ts_parser_parse_state(parser));
|
||||
}
|
||||
|
||||
static const char empty_chunk[1] = { '\0' };
|
||||
|
||||
static void ts_parser_advance(ts_parser *parser) {
|
||||
if (parser->position < parser->current_chunk_end) {
|
||||
parser->position++;
|
||||
if (parser->position_in_chunk < parser->chunk_size - 1) {
|
||||
parser->position_in_chunk++;
|
||||
} else {
|
||||
size_t bytes_read = 0;
|
||||
parser->current_chunk = parser->input.read_fn(parser->input.data, &bytes_read);
|
||||
parser->current_chunk_end += bytes_read;
|
||||
parser->chunk_start += parser->chunk_size;
|
||||
parser->chunk = parser->input.read_fn(parser->input.data, &parser->chunk_size);
|
||||
if (parser->chunk_size == 0) {
|
||||
parser->chunk = empty_chunk;
|
||||
parser->chunk_size = 1;
|
||||
}
|
||||
parser->position_in_chunk = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -161,10 +178,11 @@ static void ts_parser_advance_to_state(ts_parser *parser, ts_state lex_state) {
|
|||
|
||||
static void ts_parser_set_lookahead_sym(ts_parser *parser, ts_symbol symbol) {
|
||||
DEBUG_LEX("token: %s \n", ts_symbol_names[symbol]);
|
||||
size_t size = parser->position - parser->token_start_position;
|
||||
size_t position = ts_parser_position(parser);
|
||||
size_t size = position - parser->token_start_position;
|
||||
size_t offset = parser->token_start_position - parser->token_end_position;
|
||||
parser->lookahead_node = ts_tree_make_leaf(symbol, size, offset);
|
||||
parser->token_end_position = parser->position;
|
||||
parser->token_end_position = position;
|
||||
}
|
||||
|
||||
static ts_tree * ts_parser_tree(ts_parser *parser) {
|
||||
|
|
@ -175,7 +193,7 @@ static ts_tree * ts_parser_tree(ts_parser *parser) {
|
|||
static void ts_parser_skip_whitespace(ts_parser *parser) {
|
||||
while (isspace(ts_parser_lookahead_char(parser)))
|
||||
ts_parser_advance(parser);
|
||||
parser->token_start_position = parser->position;
|
||||
parser->token_start_position = ts_parser_position(parser);
|
||||
}
|
||||
|
||||
static int ts_parser_handle_error(ts_parser *parser, size_t count, const ts_symbol *expected_symbols) {
|
||||
|
|
|
|||
37
spec/runtime/helpers/spy_reader.cc
Normal file
37
spec/runtime/helpers/spy_reader.cc
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
#include "helpers/spy_reader.h"
|
||||
#include <algorithm>
|
||||
|
||||
using std::string;
|
||||
|
||||
static const char * spy_read(void *data, size_t *bytes_read) {
|
||||
SpyReader *reader = static_cast<SpyReader *>(data);
|
||||
size_t size = std::min(reader->chunk_size,
|
||||
reader->content.length() - reader->position);
|
||||
const char *result = reader->content.data() + reader->position;
|
||||
reader->chunks_read.push_back(string(result, size));
|
||||
reader->position += size;
|
||||
*bytes_read = size;
|
||||
return result;
|
||||
}
|
||||
|
||||
static int spy_seek(void *data, size_t position) {
|
||||
SpyReader *reader = static_cast<SpyReader *>(data);
|
||||
reader->position = position;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void spy_release(void *data) {
|
||||
SpyReader *reader = static_cast<SpyReader *>(data);
|
||||
delete reader;
|
||||
}
|
||||
|
||||
SpyReader::SpyReader(string content, size_t chunk_size) :
|
||||
content(content),
|
||||
position(0),
|
||||
chunk_size(chunk_size),
|
||||
input({
|
||||
.read_fn = spy_read,
|
||||
.seek_fn = spy_seek,
|
||||
.release_fn = spy_release,
|
||||
.data = this
|
||||
}) {}
|
||||
19
spec/runtime/helpers/spy_reader.h
Normal file
19
spec/runtime/helpers/spy_reader.h
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
#ifndef HELPERS_SPY_READER_H_
|
||||
#define HELPERS_SPY_READER_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "tree_sitter/runtime.h"
|
||||
|
||||
class SpyReader {
|
||||
public:
|
||||
SpyReader(std::string content, size_t chunk_size);
|
||||
|
||||
std::string content;
|
||||
size_t position;
|
||||
size_t chunk_size;
|
||||
ts_input input;
|
||||
std::vector<std::string> chunks_read;
|
||||
};
|
||||
|
||||
#endif // HELPERS_SPY_READER_H_
|
||||
36
spec/runtime/parser_spec.cc
Normal file
36
spec/runtime/parser_spec.cc
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
#include "runtime_spec_helper.h"
|
||||
#include "helpers/spy_reader.h"
|
||||
|
||||
extern ts_parse_config ts_parse_config_json;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("reading from an input", [&]() {
|
||||
ts_document *doc;
|
||||
|
||||
before_each([&]() {
|
||||
doc = ts_document_make();
|
||||
ts_document_set_parser(doc, ts_parse_config_json);
|
||||
});
|
||||
|
||||
after_each([&]() {
|
||||
ts_document_free(doc);
|
||||
});
|
||||
|
||||
it("reads the entire input", [&]() {
|
||||
SpyReader reader("\"ok go do it!\"", 3);
|
||||
ts_document_set_input(doc, reader.input);
|
||||
|
||||
AssertThat(string(ts_document_string(doc)), Equals("(value (string))"));
|
||||
AssertThat(reader.chunks_read, Equals(vector<string>({
|
||||
"\"ok",
|
||||
" go",
|
||||
" do",
|
||||
" it",
|
||||
"!\"",
|
||||
""
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar : public Grammar {
|
||||
public:
|
||||
public:
|
||||
PreparedGrammar(std::string start_rule_name,
|
||||
const std::map<const std::string, const rules::rule_ptr> &rules,
|
||||
const std::map<const std::string, const rules::rule_ptr> &aux_rules);
|
||||
|
|
|
|||
|
|
@ -39,8 +39,14 @@ typedef struct {
|
|||
|
||||
const char * ts_string_input_read(void *d, size_t *bytes_read) {
|
||||
ts_string_input_data *data = (ts_string_input_data *)d;
|
||||
*bytes_read = data->length;
|
||||
return data->string + data->position;
|
||||
if (data->position >= data->length) {
|
||||
*bytes_read = 0;
|
||||
return "";
|
||||
}
|
||||
size_t previous_position = data->position;
|
||||
data->position = data->length;
|
||||
*bytes_read = data->position - previous_position;
|
||||
return data->string + previous_position;
|
||||
}
|
||||
|
||||
int ts_string_input_seek(void *d, size_t position) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue