Merge pull request #71 from tree-sitter/update-fixture-grammars

Run randomized tests against the latest Javascript grammar
This commit is contained in:
Max Brunsfeld 2017-06-15 17:29:39 -07:00 committed by GitHub
commit f29c41b7e8
16 changed files with 107 additions and 70 deletions

View file

@ -16,3 +16,7 @@ install:
script:
- script/ci
cache:
directories:
- test/fixtures/grammars

View file

@ -31,6 +31,7 @@ typedef struct {
const char *(*read)(void *payload, uint32_t *bytes_read);
int (*seek)(void *payload, uint32_t character_index, uint32_t byte_index);
TSInputEncoding encoding;
bool measure_columns_in_bytes;
} TSInput;
typedef enum {

View file

@ -21,8 +21,8 @@ fetch_grammar() {
)
}
fetch_grammar 'javascript' '76cd7dd5eb793db21640c725e58301bde83781f7'
fetch_grammar 'javascript' 'origin/master'
fetch_grammar 'json' 'origin/master'
fetch_grammar 'c' 'origin/master'
fetch_grammar 'cpp' 'origin/master'
fetch_grammar 'python' '179cb35e5b35baeef4a37f00732ff2de15e2e8bd'
fetch_grammar 'python' 'origin/master'

View file

@ -26,8 +26,13 @@ void ts_document_free(TSDocument *self) {
parser_destroy(&self->parser);
if (self->tree)
ts_tree_release(self->tree);
ts_document_set_input(self,
(TSInput){ NULL, NULL, NULL, TSInputEncodingUTF8 });
ts_document_set_input(self, (TSInput){
NULL,
NULL,
NULL,
TSInputEncodingUTF8,
false
});
ts_free(self);
}

View file

@ -60,6 +60,8 @@ static void ts_lexer__advance(void *payload, bool skip) {
if (self->data.lookahead == '\n') {
self->current_position.extent.row++;
self->current_position.extent.column = 0;
} else if (self->input.measure_columns_in_bytes) {
self->current_position.extent.column += self->lookahead_size;
} else {
self->current_position.extent.column++;
}

View file

@ -243,8 +243,14 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
ts_lexer_start(&self->lexer);
if (self->language->external_scanner.scan(self->external_scanner_payload,
&self->lexer.data, valid_external_tokens)) {
found_external_token = true;
break;
if (length_has_unknown_chars(self->lexer.token_end_position)) {
self->lexer.token_end_position = self->lexer.current_position;
}
if (lex_mode.lex_state != 0 ||
self->lexer.token_end_position.bytes > current_position.bytes) {
found_external_token = true;
break;
}
}
ts_lexer_reset(&self->lexer, current_position);
}
@ -253,6 +259,9 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
current_position.extent.row, current_position.extent.column);
ts_lexer_start(&self->lexer);
if (self->language->lex_fn(&self->lexer.data, lex_mode.lex_state)) {
if (length_has_unknown_chars(self->lexer.token_end_position)) {
self->lexer.token_end_position = self->lexer.current_position;
}
break;
}
@ -298,9 +307,6 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
symbol = self->language->external_scanner.symbol_map[symbol];
}
if (length_has_unknown_chars(self->lexer.token_end_position)) {
self->lexer.token_end_position = self->lexer.current_position;
}
Length padding = length_sub(self->lexer.token_start_position, start_position);
Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position);
TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, symbol);
@ -910,6 +916,11 @@ static StackIterateAction parser__skip_preceding_trees_callback(
void *payload, TSStateId state, TreeArray *trees, uint32_t tree_count,
bool is_done, bool is_pending) {
if (tree_count > 0 && state != ERROR_STATE) {
uint32_t bytes_skipped = 0;
for (uint32_t i = 0; i < trees->size; i++) {
bytes_skipped += ts_tree_total_bytes(trees->contents[i]);
}
if (bytes_skipped == 0) return StackIterateNone;
SkipPrecedingTreesSession *session = payload;
Parser *self = session->parser;
TSSymbol lookahead_symbol = session->lookahead_symbol;

View file

@ -43,8 +43,9 @@ TSInput ts_string_input_make_with_length(const char *string, uint32_t length) {
.read = ts_string_input_read,
.seek = ts_string_input_seek,
.encoding = TSInputEncodingUTF8,
.measure_columns_in_bytes = false,
};
error:
return (TSInput){ NULL, NULL, NULL, TSInputEncodingUTF8 };
return (TSInput){ NULL, NULL, NULL, TSInputEncodingUTF8, false };
}

View file

@ -126,6 +126,7 @@ static bool tree_must_eq(Tree *old_tree, Tree *new_tree) {
return old_tree == new_tree || (
!old_tree->has_changes &&
old_tree->symbol == new_tree->symbol &&
old_tree->symbol != ts_builtin_sym_error &&
old_tree->size.bytes == new_tree->size.bytes &&
old_tree->parse_state != TS_TREE_STATE_NONE &&
new_tree->parse_state != TS_TREE_STATE_NONE &&

View file

@ -14,10 +14,8 @@ e f;
(ERROR (identifier))
(identifier)
(statement_block
(ERROR (identifier))
(expression_statement (identifier))))
(ERROR (identifier))
(expression_statement (identifier)))
(expression_statement (ERROR (identifier)) (identifier))))
(expression_statement (ERROR (identifier)) (identifier)))
=======================================================
multiple invalid tokens right after the viable prefix
@ -35,10 +33,11 @@ h i j k;
(ERROR (identifier) (identifier))
(identifier)
(statement_block
(ERROR (identifier) (identifier) (identifier))
(expression_statement (identifier))))
(expression_statement
(ERROR (identifier) (jsx_attribute (identifier)) (jsx_attribute (identifier)))
(identifier))))
(expression_statement
(ERROR (identifier) (identifier) (identifier))
(ERROR (identifier) (jsx_attribute (identifier)) (jsx_attribute (identifier)))
(identifier)))
===================================================
@ -53,8 +52,8 @@ if ({a: 'b'} {c: 'd'}) {
(program
(if_statement
(object (pair (identifier) (string)))
(ERROR (object (pair (identifier) (string))))
(object (pair (identifier) (string)))
(statement_block
(expression_statement (assignment
(identifier)
@ -75,9 +74,7 @@ a.b =
(program
(comment)
(trailing_expression_statement
(member_access (identifier) (identifier)))
(ERROR))
(ERROR (member_access (identifier) (identifier))))
=================================================================
An invalid token at the end of a construct with extra line breaks
@ -97,27 +94,6 @@ a(
(identifier)
(ERROR)))))
===================================================
Multi-line chained expressions in var declarations
===================================================
const one = two
.three(four)
.five()
---
(program
(var_declaration (var_assignment
(identifier)
(function_call
(member_access
(function_call
(member_access (identifier) (identifier))
(arguments (identifier)))
(identifier))
(arguments)))))
===================================================
Errors after a sequence of function declarations
===================================================
@ -146,12 +122,12 @@ var x = !!!
(program
(comment)
(expression_statement (function (identifier) (formal_parameters) (statement_block)))
(expression_statement (function (identifier) (formal_parameters) (statement_block)))
(expression_statement (function (identifier) (formal_parameters) (statement_block)))
(expression_statement (function (identifier) (formal_parameters) (statement_block)))
(expression_statement (function (identifier) (formal_parameters) (statement_block)))
(expression_statement (function (identifier) (formal_parameters) (statement_block)))
(expression_statement (function (identifier) (formal_parameters) (statement_block)))
(expression_statement (function (identifier) (formal_parameters) (statement_block)))
(trailing_var_declaration (identifier)) (ERROR))
(function (identifier) (formal_parameters) (statement_block))
(function (identifier) (formal_parameters) (statement_block))
(function (identifier) (formal_parameters) (statement_block))
(function (identifier) (formal_parameters) (statement_block))
(function (identifier) (formal_parameters) (statement_block))
(function (identifier) (formal_parameters) (statement_block))
(function (identifier) (formal_parameters) (statement_block))
(function (identifier) (formal_parameters) (statement_block))
(ERROR (identifier)))

View file

@ -149,7 +149,10 @@ const TSLanguage *load_real_language(const string &language_name) {
string parser_filename = language_dir + "/src/parser.c";
string external_scanner_filename = language_dir + "/src/scanner.cc";
if (!file_exists(external_scanner_filename)) {
external_scanner_filename = "";
external_scanner_filename = language_dir + "/src/scanner.c";
if (!file_exists(external_scanner_filename)) {
external_scanner_filename = "";
}
}
int grammar_mtime = get_modified_time(grammar_filename);

View file

@ -6,6 +6,7 @@
using std::pair;
using std::string;
using std::vector;
static const size_t UTF8_MAX_CHAR_SIZE = 4;
@ -16,12 +17,25 @@ SpyInput::SpyInput(string content, size_t chars_per_chunk) :
byte_offset(0),
content(content),
encoding(TSInputEncodingUTF8),
strings_read({""}) {}
ranges_read({}) {}
SpyInput::~SpyInput() {
delete[] buffer;
}
static void add_byte_range(vector<pair<uint32_t, uint32_t>> *ranges,
uint32_t start, uint32_t count) {
uint32_t end = start + count;
for (auto &range : *ranges) {
if (range.first <= start && start <= range.second) {
if (start < range.first) range.first = start;
if (end > range.second) range.second = end;
return;
}
}
ranges->push_back({start, end});
}
const char * SpyInput::read(void *payload, uint32_t *bytes_read) {
auto spy = static_cast<SpyInput *>(payload);
@ -36,7 +50,7 @@ const char * SpyInput::read(void *payload, uint32_t *bytes_read) {
string result = spy->content.substr(spy->byte_offset, byte_count);
*bytes_read = byte_count;
spy->strings_read.back() += result;
add_byte_range(&spy->ranges_read, spy->byte_offset, byte_count);
spy->byte_offset += byte_count;
/*
@ -54,18 +68,25 @@ const char * SpyInput::read(void *payload, uint32_t *bytes_read) {
int SpyInput::seek(void *payload, uint32_t character, uint32_t byte) {
auto spy = static_cast<SpyInput *>(payload);
if (spy->strings_read.size() == 0 || spy->strings_read.back().size() > 0)
spy->strings_read.push_back("");
spy->byte_offset = byte;
return 0;
}
vector<string> SpyInput::strings_read() const {
vector<string> result;
for (auto &range : ranges_read) {
result.push_back(content.substr(range.first, range.second - range.first));
}
return result;
}
TSInput SpyInput::input() {
TSInput result;
result.payload = this;
result.encoding = encoding;
result.seek = seek;
result.read = read;
result.measure_columns_in_bytes = true;
return result;
}
@ -129,5 +150,5 @@ pair<string, TSPoint> SpyInput::swap_substr(size_t start_byte, size_t bytes_remo
}
void SpyInput::clear() {
strings_read.clear();
ranges_read.clear();
}

View file

@ -30,10 +30,11 @@ class SpyInput {
void clear();
TSInputEdit replace(size_t start_char, size_t chars_removed, std::string text);
TSInputEdit undo();
std::vector<std::string> strings_read() const;
std::string content;
TSInputEncoding encoding;
std::vector<std::string> strings_read;
std::vector<std::pair<uint32_t, uint32_t>> ranges_read;
};
#endif // HELPERS_SPY_INPUT_H_

View file

@ -30,7 +30,7 @@ ostream &operator<<(ostream &stream, Associativity associativity) {
return stream << "AssociativityLeft";
case AssociativityRight:
return stream << "AssociativityRight";
case AssociativityNone:
default:
return stream << "AssociativityNone";
}
}

View file

@ -120,8 +120,8 @@ for (auto &language_name : test_languages) {
ts_document_parse(document);
});
std::set<std::pair<size_t, size_t>> deletions;
std::set<std::pair<size_t, string>> insertions;
set<pair<size_t, size_t>> deletions;
set<pair<size_t, string>> insertions;
for (size_t i = 0; i < 60; i++) {
size_t edit_position = random() % utf8_char_count(entry.input);

View file

@ -76,11 +76,22 @@ describe("Document", [&]() {
const char16_t content[] = u"[true, false]";
spy_input->content = string((const char *)content, sizeof(content));
spy_input->encoding = TSInputEncodingUTF16;
// spy_input->measure_columns_in_bytes
TSInput input = spy_input->input();
ts_document_set_input(document, spy_input->input());
input.measure_columns_in_bytes = false;
ts_document_set_input(document, input);
ts_document_invalidate(document);
ts_document_parse(document);
TSNode root = ts_document_root_node(document);
AssertThat(ts_node_end_point(root), Equals<TSPoint>({0, 13}));
input.measure_columns_in_bytes = true;
ts_document_set_input(document, input);
ts_document_invalidate(document);
ts_document_parse(document);
root = ts_document_root_node(document);
AssertThat(ts_node_end_point(root), Equals<TSPoint>({0, 26}));
});
it("allows the input to be retrieved later", [&]() {
@ -94,7 +105,7 @@ describe("Document", [&]() {
ts_document_set_input(document, spy_input->input());
AssertThat(ts_document_root_node(document), Equals<TSNode>(root));
AssertThat(ts_node_has_changes(root), IsFalse());
AssertThat(spy_input->strings_read, Equals(vector<string>({ "" })));
AssertThat(spy_input->strings_read(), IsEmpty());
});
it("reads text from the new input for future parses", [&]() {
@ -113,7 +124,7 @@ describe("Document", [&]() {
assert_node_string_equals(
new_root,
"(object (pair (string) (array (null) (number))))");
AssertThat(spy_input->strings_read, Equals(vector<string>({" [null, 2" })));
AssertThat(spy_input->strings_read(), Equals(vector<string>({" [null, 2" })));
});
it("allows setting input string with length", [&]() {

View file

@ -254,7 +254,7 @@ describe("Parser", [&]() {
"(identifier) "
"(math_op (number) (member_access (identifier) (identifier))))))");
AssertThat(input->strings_read, Equals(vector<string>({ " + abc.d)" })));
AssertThat(input->strings_read(), Equals(vector<string>({ " abc.d);" })));
});
});
@ -279,7 +279,7 @@ describe("Parser", [&]() {
"(number) "
"(math_op (number) (math_op (number) (identifier)))))))");
AssertThat(input->strings_read, Equals(vector<string>({ "123 || 5 +" })));
AssertThat(input->strings_read(), Equals(vector<string>({"123 || 5 ", ";"})));
});
});
@ -289,19 +289,19 @@ describe("Parser", [&]() {
set_text("var x = y;");
assert_root_node(
"(program (var_declaration (var_assignment "
"(program (variable_declaration (variable_declarator "
"(identifier) (identifier))))");
insert_text(strlen("var x = y"), " *");
assert_root_node(
"(program (var_declaration (var_assignment "
"(program (variable_declaration (variable_declarator "
"(identifier) (identifier)) (ERROR)))");
insert_text(strlen("var x = y *"), " z");
assert_root_node(
"(program (var_declaration (var_assignment "
"(program (variable_declaration (variable_declarator "
"(identifier) (math_op (identifier) (identifier)))))");
});
});