In parser, read chunked input correctly

This commit is contained in:
Max Brunsfeld 2014-03-10 13:25:31 -07:00
parent 3aaa08b948
commit 42e9a264f3
6 changed files with 134 additions and 18 deletions

View file

@ -38,11 +38,15 @@ typedef struct {
typedef struct {
ts_input input;
const char *current_chunk;
size_t current_chunk_end;
size_t position;
const char *chunk;
size_t chunk_start;
size_t chunk_size;
size_t position_in_chunk;
size_t token_end_position;
size_t token_start_position;
ts_tree *lookahead_node;
ts_tree *prev_lookahead_node;
ts_state lex_state;
@ -60,9 +64,12 @@ static ts_parser ts_parser_make(ts_input input) {
.input = input,
.token_start_position = 0,
.token_end_position = 0,
.position = 0,
.current_chunk = chunk,
.current_chunk_end = bytes_read,
.chunk = chunk,
.chunk_size = bytes_read,
.chunk_start = 0,
.position_in_chunk = 0,
.lookahead_node = NULL,
.prev_lookahead_node = NULL,
.lex_state = 0,
@ -71,9 +78,13 @@ static ts_parser ts_parser_make(ts_input input) {
};
return result;
}
static size_t ts_parser_position(const ts_parser *parser) {
return parser->chunk_start + parser->position_in_chunk;
}
static char ts_parser_lookahead_char(const ts_parser *parser) {
return parser->current_chunk[parser->position];
return parser->chunk[parser->position_in_chunk];
}
static ts_symbol ts_parser_lookahead_sym(const ts_parser *parser) {
@ -143,13 +154,19 @@ static void ts_parser_reduce(ts_parser *parser, ts_symbol symbol, int immediate_
DEBUG_PARSE("reduce: %s, state: %u \n", ts_symbol_names[symbol], ts_parser_parse_state(parser));
}
static const char empty_chunk[1] = { '\0' };
static void ts_parser_advance(ts_parser *parser) {
if (parser->position < parser->current_chunk_end) {
parser->position++;
if (parser->position_in_chunk < parser->chunk_size - 1) {
parser->position_in_chunk++;
} else {
size_t bytes_read = 0;
parser->current_chunk = parser->input.read_fn(parser->input.data, &bytes_read);
parser->current_chunk_end += bytes_read;
parser->chunk_start += parser->chunk_size;
parser->chunk = parser->input.read_fn(parser->input.data, &parser->chunk_size);
if (parser->chunk_size == 0) {
parser->chunk = empty_chunk;
parser->chunk_size = 1;
}
parser->position_in_chunk = 0;
}
}
@ -161,10 +178,11 @@ static void ts_parser_advance_to_state(ts_parser *parser, ts_state lex_state) {
static void ts_parser_set_lookahead_sym(ts_parser *parser, ts_symbol symbol) {
DEBUG_LEX("token: %s \n", ts_symbol_names[symbol]);
size_t size = parser->position - parser->token_start_position;
size_t position = ts_parser_position(parser);
size_t size = position - parser->token_start_position;
size_t offset = parser->token_start_position - parser->token_end_position;
parser->lookahead_node = ts_tree_make_leaf(symbol, size, offset);
parser->token_end_position = parser->position;
parser->token_end_position = position;
}
static ts_tree * ts_parser_tree(ts_parser *parser) {
@ -175,7 +193,7 @@ static ts_tree * ts_parser_tree(ts_parser *parser) {
static void ts_parser_skip_whitespace(ts_parser *parser) {
while (isspace(ts_parser_lookahead_char(parser)))
ts_parser_advance(parser);
parser->token_start_position = parser->position;
parser->token_start_position = ts_parser_position(parser);
}
static int ts_parser_handle_error(ts_parser *parser, size_t count, const ts_symbol *expected_symbols) {

View file

@ -0,0 +1,37 @@
#include "helpers/spy_reader.h"
#include <algorithm>
using std::string;
static const char * spy_read(void *data, size_t *bytes_read) {
SpyReader *reader = static_cast<SpyReader *>(data);
size_t size = std::min(reader->chunk_size,
reader->content.length() - reader->position);
const char *result = reader->content.data() + reader->position;
reader->chunks_read.push_back(string(result, size));
reader->position += size;
*bytes_read = size;
return result;
}
static int spy_seek(void *data, size_t position) {
SpyReader *reader = static_cast<SpyReader *>(data);
reader->position = position;
return 0;
}
static void spy_release(void *data) {
SpyReader *reader = static_cast<SpyReader *>(data);
delete reader;
}
SpyReader::SpyReader(string content, size_t chunk_size) :
content(content),
position(0),
chunk_size(chunk_size),
input({
.read_fn = spy_read,
.seek_fn = spy_seek,
.release_fn = spy_release,
.data = this
}) {}

View file

@ -0,0 +1,19 @@
#ifndef HELPERS_SPY_READER_H_
#define HELPERS_SPY_READER_H_
#include <string>
#include <vector>
#include "tree_sitter/runtime.h"
class SpyReader {
public:
SpyReader(std::string content, size_t chunk_size);
std::string content;
size_t position;
size_t chunk_size;
ts_input input;
std::vector<std::string> chunks_read;
};
#endif // HELPERS_SPY_READER_H_

View file

@ -0,0 +1,36 @@
#include "runtime_spec_helper.h"
#include "helpers/spy_reader.h"
extern ts_parse_config ts_parse_config_json;
START_TEST
describe("reading from an input", [&]() {
ts_document *doc;
before_each([&]() {
doc = ts_document_make();
ts_document_set_parser(doc, ts_parse_config_json);
});
after_each([&]() {
ts_document_free(doc);
});
it("reads the entire input", [&]() {
SpyReader reader("\"ok go do it!\"", 3);
ts_document_set_input(doc, reader.input);
AssertThat(string(ts_document_string(doc)), Equals("(value (string))"));
AssertThat(reader.chunks_read, Equals(vector<string>({
"\"ok",
" go",
" do",
" it",
"!\"",
""
})));
});
});
END_TEST

View file

@ -9,7 +9,7 @@
namespace tree_sitter {
class PreparedGrammar : public Grammar {
public:
public:
PreparedGrammar(std::string start_rule_name,
const std::map<const std::string, const rules::rule_ptr> &rules,
const std::map<const std::string, const rules::rule_ptr> &aux_rules);

View file

@ -39,8 +39,14 @@ typedef struct {
const char * ts_string_input_read(void *d, size_t *bytes_read) {
ts_string_input_data *data = (ts_string_input_data *)d;
*bytes_read = data->length;
return data->string + data->position;
if (data->position >= data->length) {
*bytes_read = 0;
return "";
}
size_t previous_position = data->position;
data->position = data->length;
*bytes_read = data->position - previous_position;
return data->string + previous_position;
}
int ts_string_input_seek(void *d, size_t position) {