Merge pull request #97 from tree-sitter/windows-build

Get the library building and the tests passing on Windows
This commit is contained in:
Max Brunsfeld 2017-08-09 10:23:38 -07:00 committed by GitHub
commit c9a87687fc
41 changed files with 482 additions and 258 deletions

24
appveyor.yml Normal file
View file

@ -0,0 +1,24 @@
image: Visual Studio 2017
branches:
only:
- master
platform:
- x86
init:
- git config --global core.autocrlf false
install:
- call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" x86
- script\configure.cmd
- script\fetch-fixtures.cmd
test_script:
- script\test.cmd
build: off
cache:
- test\fixtures\grammars

2
externals/gyp vendored

@ -1 +1 @@
Subproject commit d277a083a05f5683a073ddd4991f5f79741b3f93
Subproject commit e0ee72ddc7fb97eb33d530cf684efcbe4d27ecb3

View file

@ -172,25 +172,6 @@ typedef struct TSLanguage {
{ .type = TSParseActionTypeAccept } \
}
#define GET_LANGUAGE(...) \
static TSLanguage language = { \
.version = LANGUAGE_VERSION, \
.symbol_count = SYMBOL_COUNT, \
.alias_count = ALIAS_COUNT, \
.token_count = TOKEN_COUNT, \
.symbol_metadata = ts_symbol_metadata, \
.parse_table = (const unsigned short *)ts_parse_table, \
.parse_actions = ts_parse_actions, \
.lex_modes = ts_lex_modes, \
.symbol_names = ts_symbol_names, \
.alias_sequences = (const TSSymbol *)ts_alias_sequences, \
.max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH, \
.lex_fn = ts_lex, \
.external_token_count = EXTERNAL_TOKEN_COUNT, \
.external_scanner = {__VA_ARGS__} \
}; \
return &language \
#ifdef __cplusplus
}
#endif

View file

@ -153,6 +153,8 @@
'-Wno-unused-parameter'
],
'defines': ['UTF8PROC_EXPORTS'],
'xcode_settings': {
'ALWAYS_SEARCH_USER_PATHS': 'NO',
'WARNING_CFLAGS': [

3
script/configure.cmd Normal file
View file

@ -0,0 +1,3 @@
git submodule update --init --recursive
call .\externals\gyp\gyp.bat project.gyp --depth .
call .\externals\gyp\gyp.bat tests.gyp --depth .

View file

@ -11,22 +11,22 @@ fetch_grammar() {
echo "Updating ${grammar} grammar..."
if [ ! -d $grammar_dir ]; then
git clone $grammar_url $grammar_dir
git clone $grammar_url $grammar_dir --depth=1
fi
(
cd $grammar_dir;
git fetch origin
git reset --hard $ref;
git fetch origin $ref --depth=1
git reset --hard origin/$ref;
)
}
fetch_grammar 'javascript' 'origin/master'
fetch_grammar 'json' 'origin/master'
fetch_grammar 'c' 'origin/master'
fetch_grammar 'cpp' 'origin/master'
fetch_grammar 'python' 'origin/master'
fetch_grammar 'go' 'origin/master'
fetch_grammar 'ruby' 'origin/master'
fetch_grammar 'typescript' 'origin/master'
fetch_grammar 'bash' 'origin/master'
fetch_grammar javascript master
fetch_grammar json master
fetch_grammar c master
fetch_grammar cpp master
fetch_grammar python master
fetch_grammar go master
fetch_grammar ruby master
fetch_grammar typescript master
fetch_grammar bash master

26
script/fetch-fixtures.cmd Normal file
View file

@ -0,0 +1,26 @@
@echo off
call:fetch_grammar javascript master
call:fetch_grammar json master
call:fetch_grammar c master
call:fetch_grammar cpp master
call:fetch_grammar python master
call:fetch_grammar go master
call:fetch_grammar ruby master
call:fetch_grammar typescript master
call:fetch_grammar bash master
EXIT /B 0
:fetch_grammar
SETLOCAL
SET grammar_dir=test\fixtures\grammars\%~1
SET grammar_url=https://github.com/tree-sitter/tree-sitter-%~1
SET grammar_branch=%~2
@IF NOT EXIST %grammar_dir% (
git clone %grammar_url% %grammar_dir% --depth=1
)
pushd %grammar_dir%
git fetch origin %2 --depth=1
git reset --hard origin/%grammar_branch%
popd
EXIT /B 0

2
script/test.cmd Normal file
View file

@ -0,0 +1,2 @@
msbuild /p:Configuration=Test tests.vcxproj
.\test\tests.exe --reporter=singleline --no-color

View file

@ -73,7 +73,9 @@ class ParseTableBuilder {
Symbol start_symbol = grammar.variables.empty() ?
Symbol::terminal(0) :
Symbol::non_terminal(0);
Production start_production{{{start_symbol, 0, rules::AssociativityNone, {"", false}}}, 0};
Production start_production({{start_symbol, 0, rules::AssociativityNone, rules::Alias{}}}, 0);
add_parse_state({}, ParseItemSet{{
{
ParseItem(rules::START(), start_production, 0),
@ -633,7 +635,10 @@ class ParseTableBuilder {
description += " " + symbol_name(symbol);
}
description += " \u2022 " + symbol_name(lookahead) + " \u2026";
const string dot = "\xE2\x80\xA2";
const string ellipsis = "\xE2\x80\xA6";
description += " " + dot + " " + symbol_name(lookahead) + " " + ellipsis;
description += "\n\n";
description += "Possible interpretations:\n\n";
@ -648,14 +653,14 @@ class ParseTableBuilder {
description += " (" + symbol_name(item.lhs());
for (size_t i = 0; i < item.production->size(); i++) {
if (i == item.step_index) {
description += " \u2022";
description += " " + dot;
}
description += " " + symbol_name(item.production->at(i).symbol);
}
description += ")";
if (item.is_done()) {
description += " \u2022 " + symbol_name(lookahead) + " \u2026";
description += " " + dot + " " + symbol_name(lookahead) + " " + ellipsis;
}
description += "\n";

View file

@ -282,7 +282,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
if (duplicates.empty()) break;
map<size_t, size_t> new_replacements;
map<LexStateId, LexStateId> new_replacements;
for (LexStateId i = 0, size = lex_table.states.size(); i < size; i++) {
LexStateId new_state_index = i;
auto duplicate = duplicates.find(i);
@ -297,7 +297,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
}
new_state_index -= prior_removed;
new_replacements.insert({ i, new_state_index });
new_replacements.insert({i, new_state_index});
replacements.insert({ i, new_state_index });
for (auto &replacement : replacements) {
if (replacement.second == i) {

View file

@ -207,14 +207,15 @@ const vector<Production> &ParseItemSetBuilder::inline_production(const ParseItem
auto begin = item.production->steps.begin();
auto end = item.production->steps.end();
auto step = begin + item.step_index;
Production production({begin, step}, item.production->dynamic_precedence);
Production production{{begin, step}, item.production->dynamic_precedence};
for (auto &step : *production_to_insert) {
production.steps.push_back(step);
if (!inlined_step.alias.value.empty()) {
production.steps.back().alias = inlined_step.alias;
}
}
production.back().precedence = inlined_step.precedence;
production.back().associativity = inlined_step.associativity;
production.steps.insert(

View file

@ -100,7 +100,11 @@ class CCodeGenerator {
add_symbol_enum();
add_symbol_names_list();
add_symbol_metadata_list();
add_alias_sequences();
if (parse_table.alias_sequences.size() > 1) {
add_alias_sequences();
}
add_lex_function();
add_lex_modes_list();
@ -123,8 +127,10 @@ class CCodeGenerator {
}
void add_warning_pragma() {
line("#if defined(__GNUC__) || defined(__clang__)");
line("#pragma GCC diagnostic push");
line("#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"");
line("#endif");
line();
}
@ -427,30 +433,57 @@ class CCodeGenerator {
if (!syntax_grammar.external_tokens.empty()) {
line("void *" + external_scanner_name + "_create();");
line("void " + external_scanner_name + "_destroy();");
line("void " + external_scanner_name + "_destroy(void *);");
line("bool " + external_scanner_name + "_scan(void *, TSLexer *, const bool *);");
line("unsigned " + external_scanner_name + "_serialize(void *, char *);");
line("void " + external_scanner_name + "_deserialize(void *, const char *, unsigned);");
line();
}
line("const TSLanguage *" + language_function_name + "() {");
line("#ifdef _WIN32");
line("#define extern __declspec(dllexport)");
line("#endif");
line();
line("extern const TSLanguage *" + language_function_name + "() {");
indent([&]() {
line("GET_LANGUAGE(");
if (syntax_grammar.external_tokens.empty()) {
add(");");
} else {
indent([&]() {
line("(const bool *)ts_external_scanner_states,");
line("ts_external_scanner_symbol_map,");
line(external_scanner_name + "_create,");
line(external_scanner_name + "_destroy,");
line(external_scanner_name + "_scan,");
line(external_scanner_name + "_serialize,");
line(external_scanner_name + "_deserialize,");
});
line(");");
}
line("static TSLanguage language = {");
indent([&]() {
line(".version = LANGUAGE_VERSION,");
line(".symbol_count = SYMBOL_COUNT,");
line(".alias_count = ALIAS_COUNT,");
line(".token_count = TOKEN_COUNT,");
line(".symbol_metadata = ts_symbol_metadata,");
line(".parse_table = (const unsigned short *)ts_parse_table,");
line(".parse_actions = ts_parse_actions,");
line(".lex_modes = ts_lex_modes,");
line(".symbol_names = ts_symbol_names,");
if (parse_table.alias_sequences.size() > 1) {
line(".alias_sequences = (const TSSymbol *)ts_alias_sequences,");
}
line(".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH,");
line(".lex_fn = ts_lex,");
line(".external_token_count = EXTERNAL_TOKEN_COUNT,");
if (!syntax_grammar.external_tokens.empty()) {
line(".external_scanner = {");
indent([&]() {
line("(const bool *)ts_external_scanner_states,");
line("ts_external_scanner_symbol_map,");
line(external_scanner_name + "_create,");
line(external_scanner_name + "_destroy,");
line(external_scanner_name + "_scan,");
line(external_scanner_name + "_serialize,");
line(external_scanner_name + "_deserialize,");
});
line("},");
}
});
line("};");
line("return &language;");
});
line("}");
line();

View file

@ -36,10 +36,9 @@ class ExpandRepeats {
},
[&](const rules::Seq &sequence) {
return rules::Seq{
apply(*sequence.left),
apply(*sequence.right)
};
auto left = apply(*sequence.left);
auto right = apply(*sequence.right);
return rules::Seq{left, right};
},
[&](const rules::Repeat &repeat) {

View file

@ -93,7 +93,8 @@ Metadata Metadata::main_token(const Rule &rule) {
Metadata Metadata::alias(string &&value, bool is_named, const Rule &rule) {
MetadataParams params;
params.alias = {move(value), is_named};
params.alias.value = move(value);
params.alias.is_named = is_named;
return Metadata{rule, params};
}

View file

@ -22,7 +22,12 @@ struct ProductionStep {
struct Production {
std::vector<ProductionStep> steps;
int dynamic_precedence = 0;
int dynamic_precedence;
inline Production() : dynamic_precedence(0) {}
inline Production(std::vector<ProductionStep> &&steps, int dynamic_precedence = 0) :
steps(move(steps)), dynamic_precedence(dynamic_precedence) {}
bool operator==(const Production &) const;
inline ProductionStep &back() { return steps.back(); }

View file

@ -30,7 +30,7 @@ static inline void *ts_realloc(void *buffer, size_t size) {
}
static inline void ts_free(void *buffer) {
return ts_record_free(buffer);
ts_record_free(buffer);
}
static inline bool ts_toggle_allocation_recording(bool value) {

View file

@ -58,9 +58,6 @@ extern "C" {
#define array_pop(self) ((self)->contents[--(self)->size])
#define array_reverse(self) \
array__reverse((VoidArray *)(self), array__elem_size(self))
// Private
typedef Array(void) VoidArray;
@ -118,18 +115,6 @@ static inline void array__splice(VoidArray *self, size_t element_size,
self->size += new_count - old_count;
}
static inline void array__reverse(VoidArray *self, size_t element_size) {
char swap[element_size];
char *contents = (char *)self->contents;
for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) {
size_t offset = i * element_size;
size_t reverse_offset = (self->size - 1 - i) * element_size;
memcpy(&swap, contents + offset, element_size);
memcpy(contents + offset, contents + reverse_offset, element_size);
memcpy(contents + reverse_offset, &swap, element_size);
}
}
#ifdef __cplusplus
}
#endif

View file

@ -104,7 +104,7 @@ void ts_document_edit(TSDocument *self, TSInputEdit edit) {
}
void ts_document_parse(TSDocument *self) {
return ts_document_parse_with_options(self, (TSParseOptions){
ts_document_parse_with_options(self, (TSParseOptions){
.halt_on_error = false,
.changed_ranges = NULL,
.changed_range_count = NULL,
@ -113,7 +113,7 @@ void ts_document_parse(TSDocument *self) {
void ts_document_parse_and_get_changed_ranges(TSDocument *self, TSRange **ranges,
uint32_t *range_count) {
return ts_document_parse_with_options(self, (TSParseOptions){
ts_document_parse_with_options(self, (TSParseOptions){
.halt_on_error = false,
.changed_ranges = ranges,
.changed_range_count = range_count,

View file

@ -2,7 +2,6 @@
static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE;
static const unsigned MAX_PUSH_COUNT_WITH_COUNT_DIFFERENCE = 24;
static const unsigned MAX_DEPTH_TO_ALLOW_MULTIPLE = 12;
ErrorComparison error_status_compare(ErrorStatus a, ErrorStatus b, bool are_mergeable) {
if (a.count < b.count) {
@ -43,14 +42,5 @@ ErrorComparison error_status_compare(ErrorStatus a, ErrorStatus b, bool are_merg
}
}
if (a.count > 0) {
if (a.depth > MAX_DEPTH_TO_ALLOW_MULTIPLE ||
b.depth > MAX_DEPTH_TO_ALLOW_MULTIPLE) {
return a.depth <= b.depth ?
ErrorComparisonTakeLeft :
ErrorComparisonTakeRight;
}
}
return ErrorComparisonNone;
}

View file

@ -53,7 +53,8 @@ static inline Length length_sub(Length len1, Length len2) {
}
static inline Length length_zero() {
return (Length){ 0, 0, {0, 0} };
Length result = {0, 0, {0, 0}};
return result;
}
#endif

View file

@ -3,18 +3,23 @@
#include "tree_sitter/runtime.h"
static inline TSPoint point__new(unsigned row, unsigned column) {
TSPoint result = {row, column};
return result;
}
static inline TSPoint point_add(TSPoint a, TSPoint b) {
if (b.row > 0)
return (TSPoint){a.row + b.row, b.column};
return point__new(a.row + b.row, b.column);
else
return (TSPoint){a.row, a.column + b.column};
return point__new(a.row, a.column + b.column);
}
static inline TSPoint point_sub(TSPoint a, TSPoint b) {
if (a.row > b.row)
return (TSPoint){a.row - b.row, a.column};
return point__new(a.row - b.row, a.column);
else
return (TSPoint){0, a.column - b.column};
return point__new(0, a.column - b.column);
}
static inline bool point_lte(TSPoint a, TSPoint b) {

View file

@ -7,11 +7,8 @@ typedef struct {
} ReusableNode;
static inline ReusableNode reusable_node_new(Tree *tree) {
return (ReusableNode){
.tree = tree,
.byte_index = 0,
.preceding_external_token = NULL,
};
ReusableNode result = {tree, 0, NULL};
return result;
}
static inline void reusable_node_pop(ReusableNode *self) {

View file

@ -10,7 +10,11 @@
#define MAX_NODE_POOL_SIZE 50
#define MAX_ITERATOR_COUNT 64
#ifdef _WIN32
#define inline __forceinline
#else
#define inline static inline __attribute__((always_inline))
#endif
typedef struct StackNode StackNode;
@ -259,7 +263,7 @@ inline StackPopResult stack__iter(Stack *self, StackVersion version,
TreeArray trees = iterator->trees;
if (!should_stop)
ts_tree_array_copy(trees, &trees);
array_reverse(&trees);
ts_tree_array_reverse(&trees);
ts_stack__add_slice(
self,
node,

View file

@ -133,10 +133,19 @@ TreeArray ts_tree_array_remove_trailing_extras(TreeArray *self) {
}
self->size = i + 1;
array_reverse(&result);
ts_tree_array_reverse(&result);
return result;
}
void ts_tree_array_reverse(TreeArray *self) {
for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) {
size_t reverse_index = self->size - 1 - i;
Tree *swap = self->contents[i];
self->contents[i] = self->contents[reverse_index];
self->contents[reverse_index] = swap;
}
}
Tree *ts_tree_make_error(Length size, Length padding, int32_t lookahead_char,
const TSLanguage *language) {
Tree *result = ts_tree_make_leaf(ts_builtin_sym_error, padding, size, language);
@ -380,7 +389,7 @@ int ts_tree_compare(const Tree *left, const Tree *right) {
return 0;
}
static inline long min(long a, long b) {
static inline long min_byte(long a, long b) {
return a <= b ? a : b;
}
@ -470,7 +479,7 @@ void ts_tree_edit(Tree *self, const TSInputEdit *edit) {
TSInputEdit child_edit = {
.start_byte = 0,
.bytes_added = 0,
.bytes_removed = min(remaining_bytes_to_delete, ts_tree_total_bytes(child)),
.bytes_removed = min_byte(remaining_bytes_to_delete, ts_tree_total_bytes(child)),
.start_point = {0, 0},
.extent_added = {0, 0},
.extent_removed = point_min(remaining_extent_to_delete, ts_tree_total_size(child).extent),
@ -594,7 +603,7 @@ void ts_tree_print_dot_graph(const Tree *self, const TSLanguage *language,
fprintf(f, "}\n");
}
TSExternalTokenState empty_state = {.length = 0, .short_data = {}};
static const TSExternalTokenState empty_state = {.length = 0, .short_data = {0}};
bool ts_tree_external_token_state_eq(const Tree *self, const Tree *other) {
const TSExternalTokenState *state1 = &empty_state;

View file

@ -86,6 +86,7 @@ void ts_tree_array_delete(TreeArray *);
uint32_t ts_tree_array_essential_count(const TreeArray *);
TreeArray ts_tree_array_remove_last_n(TreeArray *, uint32_t);
TreeArray ts_tree_array_remove_trailing_extras(TreeArray *);
void ts_tree_array_reverse(TreeArray *);
Tree *ts_tree_make_leaf(TSSymbol, Length, Length, const TSLanguage *);
Tree *ts_tree_make_node(TSSymbol, uint32_t, Tree **, unsigned, const TSLanguage *);

View file

@ -27,25 +27,25 @@ describe("ParseItemSetBuilder", []() {
it("adds items at the beginnings of referenced rules", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable{"rule0", VariableTypeNamed, {
Production{{
Production({
{Symbol::non_terminal(1), 0, AssociativityNone, Alias{}},
{Symbol::terminal(11), 0, AssociativityNone, Alias{}},
}, 0},
}, 0),
}},
SyntaxVariable{"rule1", VariableTypeNamed, {
Production{{
Production({
{Symbol::terminal(12), 0, AssociativityNone, Alias{}},
{Symbol::terminal(13), 0, AssociativityNone, Alias{}},
}, 0},
Production{{
}, 0),
Production({
{Symbol::non_terminal(2), 0, AssociativityNone, Alias{}},
}, 0}
}, 0)
}},
SyntaxVariable{"rule2", VariableTypeNamed, {
Production{{
Production({
{Symbol::terminal(14), 0, AssociativityNone, Alias{}},
{Symbol::terminal(15), 0, AssociativityNone, Alias{}},
}, 0}
}, 0)
}},
}, {}, {}, {}, {}};
@ -86,16 +86,16 @@ describe("ParseItemSetBuilder", []() {
it("handles rules with empty productions", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable{"rule0", VariableTypeNamed, {
Production{{
Production({
{Symbol::non_terminal(1), 0, AssociativityNone, Alias{}},
{Symbol::terminal(11), 0, AssociativityNone, Alias{}},
}, 0},
}, 0),
}},
SyntaxVariable{"rule1", VariableTypeNamed, {
Production{{
Production({
{Symbol::terminal(12), 0, AssociativityNone, Alias{}},
{Symbol::terminal(13), 0, AssociativityNone, Alias{}},
}, 0},
}, 0),
Production{{}, 0}
}},
}, {}, {}, {}, {}};

View file

@ -35,7 +35,7 @@ describe("expand_tokens", []() {
it("handles strings containing non-ASCII UTF8 characters", [&]() {
AssertThat(
expand_token(String{"\u03B1 \u03B2"}).rule,
expand_token(String{"\xCE\xB1 \xCE\xB2"}).rule,
Equals(Rule::seq({
CharacterSet{{ 945 }},
CharacterSet{{ ' ' }},
@ -63,7 +63,7 @@ describe("expand_tokens", []() {
it("handles regexps containing non-ASCII UTF8 characters", [&]() {
AssertThat(
expand_token(Pattern{"[^\u03B1-\u03B4]+"}).rule,
expand_token(Pattern{"[^\xCE\xB1-\xCE\xB4]+"}).rule,
Equals(Rule(Repeat{
CharacterSet().include_all().exclude(945, 948)
}))

View file

@ -34,21 +34,21 @@ describe("flatten_grammar", []() {
AssertThat(result.name, Equals("test"));
AssertThat(result.type, Equals(VariableTypeNamed));
AssertThat(result.productions, Equals(vector<Production>({
Production{{
Production({
{Symbol::non_terminal(1), 0, AssociativityNone, Alias{}},
{Symbol::non_terminal(2), 101, AssociativityLeft, Alias{}},
{Symbol::non_terminal(3), 102, AssociativityRight, Alias{}},
{Symbol::non_terminal(4), 101, AssociativityLeft, Alias{}},
{Symbol::non_terminal(6), 0, AssociativityNone, Alias{}},
{Symbol::non_terminal(7), 0, AssociativityNone, Alias{}},
}, 0},
Production{{
}, 0),
Production({
{Symbol::non_terminal(1), 0, AssociativityNone, Alias{}},
{Symbol::non_terminal(2), 101, AssociativityLeft, Alias{}},
{Symbol::non_terminal(5), 101, AssociativityLeft, Alias{}},
{Symbol::non_terminal(6), 0, AssociativityNone, Alias{}},
{Symbol::non_terminal(7), 0, AssociativityNone, Alias{}},
}, 0}
}, 0)
})));
});
@ -76,21 +76,21 @@ describe("flatten_grammar", []() {
AssertThat(result.name, Equals("test"));
AssertThat(result.type, Equals(VariableTypeNamed));
AssertThat(result.productions, Equals(vector<Production>({
Production{{
Production({
{Symbol::non_terminal(1), 0, AssociativityNone, Alias{}},
{Symbol::non_terminal(2), 0, AssociativityNone, Alias{}},
{Symbol::non_terminal(3), 0, AssociativityNone, Alias{}},
{Symbol::non_terminal(4), 0, AssociativityNone, Alias{}},
{Symbol::non_terminal(6), 0, AssociativityNone, Alias{}},
{Symbol::non_terminal(7), 0, AssociativityNone, Alias{}},
}, 102},
Production{{
}, 102),
Production({
{Symbol::non_terminal(1), 0, AssociativityNone, Alias{}},
{Symbol::non_terminal(2), 0, AssociativityNone, Alias{}},
{Symbol::non_terminal(5), 0, AssociativityNone, Alias{}},
{Symbol::non_terminal(6), 0, AssociativityNone, Alias{}},
{Symbol::non_terminal(7), 0, AssociativityNone, Alias{}},
}, 101}
}, 101),
})));
});
@ -105,10 +105,10 @@ describe("flatten_grammar", []() {
});
AssertThat(result.productions, Equals(vector<Production>({
Production{{
Production({
{Symbol::non_terminal(1), 101, AssociativityLeft, Alias{}},
{Symbol::non_terminal(2), 101, AssociativityLeft, Alias{}},
}, 0}
}, 0)
})));
result = flatten_rule({
@ -120,9 +120,9 @@ describe("flatten_grammar", []() {
});
AssertThat(result.productions, Equals(vector<Production>({
Production{{
Production({
{Symbol::non_terminal(1), 101, AssociativityLeft, Alias{}},
}, 0}
}, 0)
})));
});
});

View file

@ -2,7 +2,6 @@
#include <sys/stat.h>
#include <errno.h>
#include <fstream>
#include <dirent.h>
using std::string;
using std::ifstream;
@ -26,7 +25,7 @@ int get_modified_time(const string &path) {
}
string read_file(const string &path) {
ifstream file(path);
ifstream file(path, std::ios::binary);
istreambuf_iterator<char> file_iterator(file), end_iterator;
string content(file_iterator, end_iterator);
file.close();
@ -39,6 +38,32 @@ void write_file(const string &path, const string &content) {
file.close();
}
#ifdef _WIN32
#include <windows.h>
const char *path_separator = "\\";
vector<string> list_directory(const string &path) {
vector<string> result;
WIN32_FIND_DATA search_data;
HANDLE handle = FindFirstFile((path + "\\*").c_str(), &search_data);
while (handle != INVALID_HANDLE_VALUE) {
string name(search_data.cFileName);
result.push_back(name);
if (FindNextFile(handle, &search_data) == FALSE) break;
}
return result;
}
#else
#include <dirent.h>
const char *path_separator = "/";
vector<string> list_directory(const string &path) {
vector<string> result;
@ -58,4 +83,15 @@ vector<string> list_directory(const string &path) {
closedir(dir);
return result;
}
}
#endif
string join_path(const vector<string> &parts) {
string result;
for (const string &part : parts) {
if (!result.empty()) result += path_separator;
result += part;
}
return result;
}

View file

@ -10,5 +10,6 @@ int get_modified_time(const std::string &path);
std::string read_file(const std::string &path);
void write_file(const std::string &path, const std::string &content);
std::vector<std::string> list_directory(const std::string &path);
std::string join_path(const std::vector<std::string> &parts);
#endif // HELPERS_FILE_HELPERS_H_

View file

@ -1,10 +1,7 @@
#include "helpers/load_language.h"
#include "helpers/file_helpers.h"
#include <cassert>
#include <unistd.h>
#include <dlfcn.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <map>
#include <string>
#include <fstream>
@ -23,47 +20,135 @@ map<string, const TSLanguage *> loaded_languages;
int libcompiler_mtime = -1;
int compile_result_count = 0;
const char *libcompiler_path =
#if defined(__linux)
"out/Test/obj.target/libcompiler.a";
#else
"out/Test/libcompiler.a";
#ifdef _WIN32
#include <windows.h>
const char *libcompiler_path = "test\\lib\\compiler.lib";
const char *dylib_extension = ".dll";
static string get_cwd() {
string result(255, 0);
result.resize(GetCurrentDirectory(result.size(), &result[0]));
return result;
}
static int compile_parser(
string source_filename,
string scanner_source_filename,
string output_filename,
string header_dirname
) {
CreateDirectory("out", nullptr);
CreateDirectory("out\\tmp", nullptr);
string command = "cl.exe";
command += " /nologo";
command += " /LD";
command += " /I " + header_dirname;
command += " /Od";
command += " " + source_filename;
command += " " + scanner_source_filename;
command += " /link /out:" + output_filename;
return system(command.c_str());
}
static void *load_function_from_library(string library_path, string function_name) {
HINSTANCE library = LoadLibrary(library_path.c_str());
if (!library) {
fputs(("Could not load library " + library_path).c_str(), stderr);
abort();
}
void *function = static_cast<void *>(GetProcAddress(library, function_name.c_str()));
if (!function) {
fputs(("Could not find function + " + function_name).c_str(), stderr);
abort();
}
return function;
}
#else // POSIX
#ifdef __linux
const char *libcompiler_path = "out/Test/obj.target/libcompiler.a";
const char *dylib_extension = ".so";
#else // macOS
const char *libcompiler_path = "out/Test/libcompiler.a";
const char *dylib_extension = ".dylib";
#endif
static std::string run_command(const char *cmd, const char *args[]) {
int child_pid = fork();
if (child_pid < 0) {
return "fork failed";
}
#include <unistd.h>
#include <dlfcn.h>
if (child_pid == 0) {
close(0);
dup2(1, 0);
dup2(2, 1);
dup2(1, 2);
execvp(cmd, (char * const * )args);
}
int status;
do {
waitpid(child_pid, &status, 0);
} while (!WIFEXITED(status));
if (WEXITSTATUS(status) == 0) {
return "";
} else {
return "command failed";
}
static string get_cwd() {
return string(getenv("PWD"));
}
static int compile_parser(
string source_filename,
string scanner_source_filename,
string output_filename,
string header_dirname
) {
mkdir("out", 0777);
mkdir("out/tmp", 0777);
const char *compiler_name = getenv("CXX");
if (!compiler_name) compiler_name = "c++";
string command = compiler_name;
command += " -shared";
command += " -fPIC ";
command += " -I " + header_dirname;
command += " -o " + output_filename;
command += " -O0";
command += " -xc " + source_filename;
if (!scanner_source_filename.empty()) {
command += " -g";
string extension = scanner_source_filename.substr(scanner_source_filename.rfind("."));
if (extension == ".c") {
command += " -xc " + scanner_source_filename;
} else {
command += " -xc++ " + scanner_source_filename;
}
}
return system(command.c_str());
}
static void *load_function_from_library(string library_path, string function_name) {
void *parser_lib = dlopen(library_path.c_str(), RTLD_NOW);
if (!parser_lib) {
fputs(dlerror(), stderr);
abort();
}
void *language_function = dlsym(parser_lib, function_name.c_str());
if (!language_function) {
fputs(dlerror(), stderr);
abort();
}
return language_function;
}
#endif
static const TSLanguage *load_language(const string &source_filename,
const string &lib_filename,
const string &language_name,
string external_scanner_filename = "") {
string language_function_name = "tree_sitter_" + language_name;
string header_dir = getenv("PWD") + string("/include");
string header_dir = join_path({get_cwd(), "include"});
int source_mtime = get_modified_time(source_filename);
int header_mtime = get_modified_time(header_dir + "/tree_sitter/parser.h");
int header_mtime = get_modified_time(join_path({header_dir, "tree_sitter", "parser.h"}));
int lib_mtime = get_modified_time(lib_filename);
int external_scanner_mtime = get_modified_time(external_scanner_filename);
@ -72,47 +157,17 @@ static const TSLanguage *load_language(const string &source_filename,
const char *compiler_name = getenv("CXX");
if (!compiler_name) compiler_name = "c++";
vector<const char *> compile_args = {
compiler_name,
"-shared",
"-fPIC",
"-I", header_dir.c_str(),
"-o", lib_filename.c_str(),
"-x", "c",
source_filename.c_str()
};
int status_code = compile_parser(
source_filename,
external_scanner_filename,
lib_filename,
header_dir
);
if (!external_scanner_filename.empty()) {
compile_args.push_back("-g");
string extension = external_scanner_filename.substr(external_scanner_filename.rfind("."));
if (extension == ".c") {
compile_args.push_back("-xc");
} else {
compile_args.push_back("-xc++");
}
compile_args.push_back(external_scanner_filename.c_str());
}
compile_args.push_back(nullptr);
string compile_error = run_command(compiler_name, compile_args.data());
if (!compile_error.empty()) {
fputs(compile_error.c_str(), stderr);
abort();
}
if (status_code != 0) abort();
}
void *parser_lib = dlopen(lib_filename.c_str(), RTLD_NOW);
if (!parser_lib) {
fputs(dlerror(), stderr);
abort();
}
void *language_function = dlsym(parser_lib, language_function_name.c_str());
if (!language_function) {
fputs(dlerror(), stderr);
abort();
}
void *language_function = load_function_from_library(lib_filename, language_function_name);
return reinterpret_cast<TSLanguage *(*)()>(language_function)();
}
@ -125,9 +180,8 @@ const TSLanguage *load_test_language(const string &name,
abort();
}
mkdir("out/tmp", 0777);
string source_filename = "out/tmp/compile-result-" + to_string(compile_result_count) + ".c";
string lib_filename = source_filename + ".so";
string source_filename = join_path({"out", "tmp", "compile-result-" + to_string(compile_result_count) + ".c"});
string lib_filename = source_filename + dylib_extension;
compile_result_count++;
ofstream source_file;
@ -144,25 +198,23 @@ const TSLanguage *load_real_language(const string &language_name) {
if (loaded_languages[language_name])
return loaded_languages[language_name];
string language_dir = string("test/fixtures/grammars/") + language_name;
string grammar_filename = language_dir + "/src/grammar.json";
string parser_filename = language_dir + "/src/parser.c";
string external_scanner_filename = language_dir + "/src/scanner.cc";
string language_dir = join_path({"test", "fixtures", "grammars", language_name});
string grammar_filename = join_path({language_dir, "src", "grammar.json"});
string parser_filename = join_path({language_dir, "src", "parser.c"});
string external_scanner_filename = join_path({language_dir, "src", "scanner.cc"});
if (!file_exists(external_scanner_filename)) {
external_scanner_filename = language_dir + "/src/scanner.c";
external_scanner_filename = join_path({language_dir, "src", "scanner.c"});
if (!file_exists(external_scanner_filename)) {
external_scanner_filename = "";
}
}
int grammar_mtime = get_modified_time(grammar_filename);
if (!grammar_mtime)
return nullptr;
if (!grammar_mtime) return nullptr;
if (libcompiler_mtime == -1) {
libcompiler_mtime = get_modified_time(libcompiler_path);
if (!libcompiler_mtime)
return nullptr;
if (!libcompiler_mtime) return nullptr;
}
int parser_mtime = get_modified_time(parser_filename);
@ -180,8 +232,7 @@ const TSLanguage *load_real_language(const string &language_name) {
write_file(parser_filename, result.code);
}
mkdir("out/tmp", 0777);
string lib_filename = "out/tmp/" + language_name + ".so";
string lib_filename = join_path({"out", "tmp", language_name + dylib_extension});
const TSLanguage *language = load_language(parser_filename, lib_filename, language_name, external_scanner_filename);
loaded_languages[language_name] = language;
return language;

View file

@ -1,22 +1,40 @@
#include <string>
#include <vector>
#include <stdlib.h>
#include <random>
#include <time.h>
using std::string;
using std::vector;
static std::default_random_engine engine;
unsigned get_time_as_seed() {
return time(nullptr);
}
void random_reseed(unsigned seed) {
engine.seed(seed);
}
unsigned random_unsigned() {
return std::uniform_int_distribution<unsigned>()(engine);
}
unsigned random_unsigned(unsigned max) {
return std::uniform_int_distribution<unsigned>(0, max - 1)(engine);
}
static string random_string(char min, char max) {
string result;
size_t length = random() % 12;
size_t length = random_unsigned(12);
for (size_t i = 0; i < length; i++) {
char inserted_char = min + (random() % (max - min));
result += inserted_char;
result += (min + random_unsigned(max - min));
}
return result;
}
static string random_char(string characters) {
size_t index = random() % characters.size();
size_t index = random_unsigned(characters.size());
return string() + characters[index];
}
@ -24,7 +42,7 @@ string random_words(size_t count) {
string result;
bool just_inserted_word = false;
for (size_t i = 0; i < count; i++) {
if (random() % 10 < 6) {
if (random_unsigned(10) < 6) {
result += random_char("!(){}[]<>+-=");
} else {
if (just_inserted_word)
@ -37,5 +55,5 @@ string random_words(size_t count) {
}
string select_random(const vector<string> &list) {
return list[random() % list.size()];
return list[random_unsigned(list.size())];
}

View file

@ -4,6 +4,10 @@
#include <string>
#include <vector>
unsigned get_time_as_seed();
void random_reseed(unsigned);
unsigned random_unsigned();
unsigned random_unsigned(unsigned max);
std::string random_words(size_t count);
std::string select_random(const std::vector<std::string> &);

View file

@ -12,8 +12,6 @@ using std::smatch;
using std::string;
using std::vector;
string fixtures_dir = "test/fixtures/";
static string trim_output(const string &input) {
string result(input);
result = regex_replace(result, regex("[\n\t ]+", extended), string(" "));
@ -65,14 +63,14 @@ static vector<TestEntry> parse_test_entries(string content) {
vector<TestEntry> read_real_language_corpus(string language_name) {
vector<TestEntry> result;
string corpus_directory = fixtures_dir + "grammars/" + language_name + "/corpus";
string corpus_directory = join_path({"test", "fixtures", "grammars", language_name, "corpus"});
for (string &test_filename : list_directory(corpus_directory)) {
for (TestEntry &entry : parse_test_entries(read_file(corpus_directory + "/" + test_filename))) {
for (TestEntry &entry : parse_test_entries(read_file(join_path({corpus_directory, test_filename})))) {
result.push_back(entry);
}
}
string error_test_filename = fixtures_dir + "/error_corpus/" + language_name + "_errors.txt";
string error_test_filename = join_path({"test", "fixtures", "error_corpus", language_name + "_errors.txt"});
for (TestEntry &entry : parse_test_entries(read_file(error_test_filename))) {
result.push_back(entry);
}
@ -83,9 +81,9 @@ vector<TestEntry> read_real_language_corpus(string language_name) {
vector<TestEntry> read_test_language_corpus(string language_name) {
vector<TestEntry> result;
string test_directory = fixtures_dir + "test_grammars/" + language_name;
string test_directory = join_path({"test", "fixtures", "test_grammars", language_name});
for (string &test_filename : list_directory(test_directory)) {
for (TestEntry &entry : parse_test_entries(read_file(test_directory + "/" + test_filename))) {
for (TestEntry &entry : parse_test_entries(read_file(join_path({test_directory, test_filename})))) {
result.push_back(entry);
}
}
@ -95,11 +93,11 @@ vector<TestEntry> read_test_language_corpus(string language_name) {
vector<ExampleEntry> examples_for_language(string language_name) {
vector<ExampleEntry> result;
string examples_directory = fixtures_dir + "grammars/" + language_name + "/examples";
string examples_directory = join_path({"test", "fixtures", "grammars", language_name, "examples"});
for (string &filename : list_directory(examples_directory)) {
result.push_back({
filename,
read_file(examples_directory + "/" + filename)
read_file(join_path({examples_directory, filename}))
});
}
return result;

View file

@ -7,10 +7,10 @@
START_TEST
vector<pair<string, string>> examples({
{
"javascript",
"Bi0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLXGK0i0vLS0tLS0tLS0tLS0tLS0tLS0tLS0tLXGK0i0vLS0tLS0tLS0tLS0tLS0xLS0tLTYtLfpZAA=="
},
// {
// "javascript",
// "Bi0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLXGK0i0vLS0tLS0tLS0tLS0tLS0tLS0tLS0tLXGK0i0vLS0tLS0tLS0tLS0tLS0xLS0tLTYtLfpZAA=="
// },
});
describe("examples found via fuzzing", [&]() {

View file

@ -75,9 +75,9 @@ for (auto &language_name : test_languages) {
set<pair<size_t, string>> insertions;
for (size_t i = 0; i < 60; i++) {
size_t edit_position = random() % utf8_char_count(entry.input);
size_t deletion_size = random() % (utf8_char_count(entry.input) - edit_position);
string inserted_text = random_words(random() % 4 + 1);
size_t edit_position = random_unsigned(utf8_char_count(entry.input));
size_t deletion_size = random_unsigned(utf8_char_count(entry.input) - edit_position);
string inserted_text = random_words(random_unsigned(4) + 1);
if (insertions.insert({edit_position, inserted_text}).second) {
string description = "\"" + inserted_text + "\" at " + to_string(edit_position);

View file

@ -8,24 +8,25 @@
START_TEST
string grammars_dir_path = "test/fixtures/test_grammars";
string grammars_dir_path = join_path({"test", "fixtures", "test_grammars"});
vector<string> test_languages = list_directory(grammars_dir_path);
for (auto &language_name : test_languages) {
if (language_name == "readme.md") continue;
describe(("test grammar: " + language_name).c_str(), [&]() {
string directory_path = grammars_dir_path + "/" + language_name;
string grammar_path = directory_path + "/grammar.json";
string directory_path = join_path({grammars_dir_path, language_name});
string grammar_path = join_path({directory_path, "grammar.json"});
string expected_error_path = join_path({directory_path, "expected_error.txt"});
string grammar_json = read_file(grammar_path);
string expected_error_path = directory_path + "/expected_error.txt";
const TSLanguage *language = nullptr;
if (file_exists(expected_error_path)) {
it("fails with the correct error message", [&]() {
TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str());
string expected_error = read_file(expected_error_path);
AssertThat((void *)compile_result.error_message, !IsNull());
AssertThat((void *)compile_result.error_message, !Equals<void *>(nullptr));
AssertThat(compile_result.error_message, Equals(expected_error));
});
return;
@ -34,7 +35,7 @@ for (auto &language_name : test_languages) {
for (auto &entry : read_test_language_corpus(language_name)) {
it(("parses " + entry.description).c_str(), [&]() {
if (!language) {
string external_scanner_path = directory_path + "/scanner.c";
string external_scanner_path = join_path({directory_path, "scanner.c"});
if (!file_exists(external_scanner_path)) external_scanner_path = "";
TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str());

View file

@ -207,7 +207,7 @@ describe("Document", [&]() {
AssertThat(ts_language_version(&language), !Equals<uint32_t>(TREE_SITTER_LANGUAGE_VERSION));
ts_document_set_language(document, &language);
AssertThat(ts_document_language(document), IsNull());
AssertThat(ts_document_language(document), Equals<const TSLanguage *>(nullptr));
});
});

View file

@ -79,6 +79,8 @@ describe("Stack", [&]() {
stack = ts_stack_new();
TSLanguage dummy_language;
TSSymbolMetadata symbol_metadata[50] = {};
dummy_language.symbol_metadata = symbol_metadata;
for (size_t i = 0; i < tree_count; i++) {
trees[i] = ts_tree_make_leaf(i, length_zero(), tree_len, &dummy_language);
@ -533,7 +535,7 @@ describe("Stack", [&]() {
});
it("allows the state to be retrieved", [&]() {
AssertThat(ts_stack_last_external_token(stack, 0), Equals(nullptr));
AssertThat(ts_stack_last_external_token(stack, 0), Equals<Tree *>(nullptr));
ts_stack_set_last_external_token(stack, 0, trees[1]);
AssertThat(ts_stack_last_external_token(stack, 0), Equals(trees[1]));

View file

@ -1,4 +1,5 @@
#include "test_helper.h"
#include "helpers/random_helpers.h"
int main(int argc, char *argv[]) {
int seed;
@ -6,11 +7,11 @@ int main(int argc, char *argv[]) {
if (seed_env) {
seed = atoi(seed_env);
} else {
seed = time(nullptr);
seed = get_time_as_seed();
}
printf("Random seed: %d\n", seed);
srandom(seed);
random_reseed(seed);
return bandit::run(argc, argv);
}

View file

@ -37,11 +37,45 @@
'externals/crypto-algorithms',
],
'sources': [
'test/compiler/build_tables/lex_conflict_manager_test.cc',
'test/compiler/build_tables/lex_item_test.cc',
'test/compiler/build_tables/lex_table_builder_test.cc',
'test/compiler/build_tables/parse_item_set_builder_test.cc',
'test/compiler/build_tables/rule_can_be_blank_test.cc',
'test/compiler/prepare_grammar/expand_repeats_test.cc',
'test/compiler/prepare_grammar/expand_tokens_test.cc',
'test/compiler/prepare_grammar/extract_choices_test.cc',
'test/compiler/prepare_grammar/extract_tokens_test.cc',
'test/compiler/prepare_grammar/flatten_grammar_test.cc',
'test/compiler/prepare_grammar/intern_symbols_test.cc',
'test/compiler/prepare_grammar/parse_regex_test.cc',
'test/compiler/rules/character_set_test.cc',
'test/compiler/rules/choice_test.cc',
'test/compiler/rules/repeat_test.cc',
'test/compiler/util/string_helpers_test.cc',
'test/helpers/encoding_helpers.cc',
'test/helpers/file_helpers.cc',
'test/helpers/load_language.cc',
'test/helpers/point_helpers.cc',
'test/helpers/random_helpers.cc',
'test/helpers/read_test_entries.cc',
'test/helpers/record_alloc.cc',
'test/helpers/scope_sequence.cc',
'test/helpers/spy_input.cc',
'test/helpers/spy_logger.cc',
'test/helpers/stderr_logger.cc',
'test/helpers/stream_methods.cc',
'test/helpers/tree_helpers.cc',
'test/integration/fuzzing-examples.cc',
'test/integration/real_grammars.cc',
'test/integration/test_grammars.cc',
'test/runtime/document_test.cc',
'test/runtime/language_test.cc',
'test/runtime/node_test.cc',
'test/runtime/parser_test.cc',
'test/runtime/stack_test.cc',
'test/runtime/tree_test.cc',
'test/tests.cc',
'<!@(find test/compiler -name "*.cc")',
'<!@(find test/runtime -name "*.cc")',
'<!@(find test/integration -name "*.cc")',
'<!@(find test/helpers -name "*.cc")',
],
'cflags': [
'-g',
@ -68,7 +102,11 @@
'target_defaults': {
'configurations': {'Test': {}, 'Release': {}},
'cflags_cc': ['-std=c++14'],
'libraries': ['-ldl'],
'conditions': [
['OS=="linux"', {
'libraries': ['-ldl'],
}]
],
'xcode_settings': {
'CLANG_CXX_LANGUAGE_STANDARD': 'c++14',
'ALWAYS_SEARCH_USER_PATHS': 'NO',