Merge pull request #49 from tree-sitter/better-conflict-handling

Improve conflict handling
This commit is contained in:
Max Brunsfeld 2016-11-18 17:11:34 -08:00 committed by GitHub
commit cac4f5d5bc
26 changed files with 853 additions and 948 deletions

View file

@ -5,8 +5,12 @@ compiler:
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- libboost-regex-dev
- g++-5
install: script/configure -D USE_BOOST_REGEX=true -D USE_LIBPROFILER=false
install:
- export CXX="g++-5"
- script/configure -D USE_LIBPROFILER=false
script: script/ci

2
externals/bandit vendored

@ -1 +1 @@
Subproject commit ae7ed1eb4c0c228f0e899e3f15e455b3ab7cc073
Subproject commit f14ade4fbba72f2e8ba3a7f47318eb07614a39d4

View file

@ -21,7 +21,6 @@
'src/compiler/build_tables/lookahead_set.cc',
'src/compiler/build_tables/parse_item.cc',
'src/compiler/build_tables/parse_item_set_builder.cc',
'src/compiler/build_tables/parse_conflict_manager.cc',
'src/compiler/build_tables/rule_can_be_blank.cc',
'src/compiler/compile.cc',
'src/compiler/generate_code/c_code.cc',

5
script/configure vendored
View file

@ -11,11 +11,6 @@ if [ "$1" == "-h" ]; then
Variables:
-D USE_BOOST_REGEX=true
Use boost regex library for tests, not the built-in regex library. This is
useful when linking against an older version of the standard library.
libboost_regex must be installed.
-D USE_LIBPROFILER=false
Don't link libprofiler into the test binaries. This flag is needed on systems
where libprofiler (from the google-perftools package) is not installed.

View file

@ -32,6 +32,7 @@ EOF
profile=
leak_check=no
mode=normal
verbose=
args=()
target=tests
export BUILDTYPE=Test
@ -60,7 +61,7 @@ while getopts "df:s:gGhpvS" option; do
args+=("--only=${OPTARG}")
;;
v)
args+=("--reporter=spec")
verbose=true
;;
s)
export TREE_SITTER_SEED=${OPTARG}
@ -71,6 +72,12 @@ while getopts "df:s:gGhpvS" option; do
esac
done
if [[ -n $verbose ]]; then
args+=("--reporter=spec")
else
args+=("--reporter=singleline")
fi
make $target
args=${args:-""}

View file

@ -1,194 +0,0 @@
#include "spec_helper.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/parse_table.h"
#include "compiler/build_tables/parse_conflict_manager.h"
using namespace rules;
using namespace build_tables;
START_TEST
describe("ParseConflictManager", []() {
pair<bool, ConflictType> result;
Symbol sym1(0);
Symbol sym2(1);
Symbol lookahead_sym(1, true);
const Production production;
ParseConflictManager *conflict_manager;
before_each([&]() {
conflict_manager = new ParseConflictManager;
});
after_each([&]() {
delete conflict_manager;
});
describe(".resolve", [&]() {
describe("errors", [&]() {
ParseAction error = ParseAction::Error();
ParseAction non_error = ParseAction::Shift(2, { 0, 0 });
it("favors non-errors and reports no conflict", [&]() {
result = conflict_manager->resolve(non_error, error);
AssertThat(result.first, IsTrue());
AssertThat(result.second, Equals(ConflictTypeNone));
result = conflict_manager->resolve(error, non_error);
AssertThat(result.first, IsFalse());
AssertThat(result.second, Equals(ConflictTypeNone));
});
});
describe("shift-extra actions", [&]() {
ParseAction shift_extra = ParseAction::ShiftExtra();
ParseAction shift = ParseAction::Shift(2, { 0, 0 });
ParseAction reduce = ParseAction::Reduce(sym2, 1, -1, AssociativityRight, production);
it("favors any shift action over a shift-extra actions", [&]() {
result = conflict_manager->resolve(shift, shift_extra);
AssertThat(result.first, IsTrue());
AssertThat(result.second, Equals(ConflictTypeNone));
result = conflict_manager->resolve(shift_extra, shift);
AssertThat(result.first, IsFalse());
AssertThat(result.second, Equals(ConflictTypeNone));
});
it("favors any reduce action over a shift-extra actions", [&]() {
result = conflict_manager->resolve(reduce, shift_extra);
AssertThat(result.first, IsTrue());
AssertThat(result.second, Equals(ConflictTypeNone));
result = conflict_manager->resolve(shift_extra, reduce);
AssertThat(result.first, IsFalse());
AssertThat(result.second, Equals(ConflictTypeNone));
});
});
describe("shift/reduce conflicts", [&]() {
describe("when the shift has higher precedence", [&]() {
ParseAction shift = ParseAction::Shift(2, {3, 4});
ParseAction reduce = ParseAction::Reduce(sym2, 1, 2, AssociativityLeft, production);
it("favors the shift and reports the conflict as resolved", [&]() {
result = conflict_manager->resolve(shift, reduce);
AssertThat(result.first, IsTrue());
AssertThat(result.second, Equals(ConflictTypeResolved));
result = conflict_manager->resolve(reduce, shift);
AssertThat(result.first, IsFalse());
AssertThat(result.second, Equals(ConflictTypeResolved));
});
});
describe("when the reduce has higher precedence", [&]() {
ParseAction shift = ParseAction::Shift(2, {1, 2});
ParseAction reduce = ParseAction::Reduce(sym2, 1, 3, AssociativityLeft, production);
it("favors the reduce and reports the conflict as resolved", [&]() {
result = conflict_manager->resolve(shift, reduce);
AssertThat(result.first, IsFalse());
AssertThat(result.second, Equals(ConflictTypeResolved));
result = conflict_manager->resolve(reduce, shift);
AssertThat(result.first, IsTrue());
AssertThat(result.second, Equals(ConflictTypeResolved));
});
});
describe("when the precedences are equal and the reduce's rule is left associative", [&]() {
ParseAction shift = ParseAction::Shift(2, { 0, 0 });
ParseAction reduce = ParseAction::Reduce(sym2, 1, 0, AssociativityLeft, production);
it("favors the reduce and reports the conflict as resolved", [&]() {
result = conflict_manager->resolve(reduce, shift);
AssertThat(result.first, IsTrue());
AssertThat(result.second, Equals(ConflictTypeResolved));
result = conflict_manager->resolve(shift, reduce);
AssertThat(result.first, IsFalse());
AssertThat(result.second, Equals(ConflictTypeResolved));
});
});
describe("when the precedences are equal and the reduce's rule is right-associative", [&]() {
ParseAction shift = ParseAction::Shift(2, { 0, 0 });
ParseAction reduce = ParseAction::Reduce(sym2, 1, 0, AssociativityRight, production);
it("favors the shift, and reports the conflict as resolved", [&]() {
result = conflict_manager->resolve(reduce, shift);
AssertThat(result.first, IsFalse());
AssertThat(result.second, Equals(ConflictTypeResolved));
result = conflict_manager->resolve(shift, reduce);
AssertThat(result.first, IsTrue());
AssertThat(result.second, Equals(ConflictTypeResolved));
});
});
describe("when the precedences are equal and the reduce's rule has no associativity", [&]() {
it("reports an unresolved conflict", [&]() {
ParseAction shift = ParseAction::Shift(2, { 0, 0 });
ParseAction reduce = ParseAction::Reduce(Symbol(2), 1, 0, AssociativityNone, production);
result = conflict_manager->resolve(reduce, shift);
AssertThat(result.first, IsFalse());
AssertThat(result.second, Equals(ConflictTypeUnresolved));
result = conflict_manager->resolve(shift, reduce);
AssertThat(result.first, IsTrue());
});
});
describe("when the shift has conflicting precedences compared to the reduce", [&]() {
ParseAction shift = ParseAction::Shift(2, { 1, 3 });
ParseAction reduce = ParseAction::Reduce(Symbol(2), 1, 2, AssociativityLeft, production);
it("returns false and reports an unresolved conflict", [&]() {
result = conflict_manager->resolve(reduce, shift);
AssertThat(result.first, IsFalse());
AssertThat(result.second, Equals(ConflictTypeUnresolved));
result = conflict_manager->resolve(shift, reduce);
AssertThat(result.first, IsTrue());
AssertThat(result.second, Equals(ConflictTypeUnresolved));
});
});
});
describe("reduce/reduce conflicts", [&]() {
describe("when one action has higher precedence", [&]() {
ParseAction left = ParseAction::Reduce(sym2, 1, 0, AssociativityLeft, production);
ParseAction right = ParseAction::Reduce(sym2, 1, 2, AssociativityLeft, production);
it("favors that action", [&]() {
result = conflict_manager->resolve(left, right);
AssertThat(result.first, IsFalse());
AssertThat(result.second, Equals(ConflictTypeResolved));
result = conflict_manager->resolve(right, left);
AssertThat(result.first, IsTrue());
AssertThat(result.second, Equals(ConflictTypeResolved));
});
});
describe("when the actions have the same precedence", [&]() {
it("returns false and reports a conflict", [&]() {
ParseAction left = ParseAction::Reduce(Symbol(2), 1, 0, AssociativityLeft, production);
ParseAction right = ParseAction::Reduce(Symbol(3), 1, 0, AssociativityLeft, production);
result = conflict_manager->resolve(right, left);
AssertThat(result.first, IsFalse());
AssertThat(result.second, Equals(ConflictTypeUnresolved));
result = conflict_manager->resolve(left, right);
AssertThat(result.first, IsFalse());
AssertThat(result.second, Equals(ConflictTypeUnresolved));
});
});
});
});
});
END_TEST

View file

@ -1,156 +0,0 @@
#include "spec_helper.h"
#include "compiler/build_tables/parse_item.h"
#include "compiler/syntax_grammar.h"
#include "helpers/rule_helpers.h"
using namespace rules;
using namespace build_tables;
START_TEST
describe("ParseItem::completion_status()", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable("rule_0", VariableTypeNamed, {
Production({
{Symbol(11, true), 0, AssociativityNone},
{Symbol(12, true), 0, AssociativityNone},
{Symbol(13), 0, AssociativityNone},
{Symbol(14, true), 4, AssociativityLeft},
}),
Production({
{Symbol(15, true), 0, AssociativityNone},
{Symbol(16, true), 0, AssociativityNone},
{Symbol(17, true), 5, AssociativityRight},
}),
Production({}),
}),
}, {}, {}};
auto production = [&](int variable_index, int production_index) -> const Production & {
return grammar.variables[variable_index].productions[production_index];
};
it("indicates whether the parse item is done, and its associativity and precedence", [&]() {
ParseItem item(Symbol(0), production(0, 0), 3);
AssertThat(item.completion_status().is_done, IsFalse());
AssertThat(item.completion_status().precedence, Equals(0));
AssertThat(item.completion_status().associativity, Equals(AssociativityNone));
item = ParseItem(Symbol(0), production(0, 0), 4);
AssertThat(item.completion_status().is_done, IsTrue());
AssertThat(item.completion_status().precedence, Equals(4));
AssertThat(item.completion_status().associativity, Equals(AssociativityLeft));
item = ParseItem(Symbol(0), production(0, 1), 3);
AssertThat(item.completion_status().is_done, IsTrue());
AssertThat(item.completion_status().precedence, Equals(5));
AssertThat(item.completion_status().associativity, Equals(AssociativityRight));
item = ParseItem(Symbol(0), production(0, 2), 0);
AssertThat(item.completion_status().is_done, IsTrue());
AssertThat(item.completion_status().precedence, Equals(0));
AssertThat(item.completion_status().associativity, Equals(AssociativityNone));
});
});
describe("ParseItemSet::transitions())", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable("rule_0", VariableTypeNamed, {
Production({
{Symbol(11, true), 0, AssociativityNone},
{Symbol(12, true), 0, AssociativityNone},
{Symbol(13), 5, AssociativityNone},
{Symbol(14, true), 0, AssociativityNone},
}),
Production({
{Symbol(11, true), 0, AssociativityNone},
{Symbol(12, true), 0, AssociativityNone},
{Symbol(15), 6, AssociativityNone},
})
}),
SyntaxVariable("rule_1", VariableTypeNamed, {
Production({
{Symbol(15), 7, AssociativityNone},
{Symbol(16, true), 0, AssociativityNone},
})
}),
SyntaxVariable("rule_2", VariableTypeNamed, {
Production({
{Symbol(18, true), 0, AssociativityNone},
})
})
}, {}, {}};
auto production = [&](int variable_index, int production_index) -> const Production & {
return grammar.variables[variable_index].productions[production_index];
};
it("computes the ParseItemSet that would occur after consuming each lookahead symbol, along with its precedence", [&]() {
ParseItemSet item_set({
// Two symbols into the first production for rule_0
{
ParseItem(Symbol(0), production(0, 0), 2),
LookaheadSet({ 21 })
},
// Two symbols into the second production for rule_0
{
ParseItem(Symbol(0), production(0, 1), 2),
LookaheadSet({ 21 })
},
// At the beginning of the first production for rule_1
{
ParseItem(Symbol(1), production(1, 0), 0),
LookaheadSet({ 22 })
},
// At the end of the first production for rule_2
{
ParseItem(Symbol(2), production(2, 0), 1),
LookaheadSet({ 22 })
}
});
AssertThat(item_set.transitions(), Equals(ParseItemSet::TransitionMap({
// For the first item, symbol 13 is next, with precedence 5.
{
Symbol(13),
{
ParseItemSet({
{
ParseItem(Symbol(0), production(0, 0), 3),
LookaheadSet({ 21 })
}
}),
PrecedenceRange(5, 5)
}
},
// For the second and third item, symbol 15 is next, with two different
// precedence values.
{
Symbol(15),
{
ParseItemSet({
{
ParseItem(Symbol(0), production(0, 1), 3),
LookaheadSet({ 21 })
},
{
ParseItem(Symbol(1), production(1, 0), 1),
LookaheadSet({ 22 })
},
}),
PrecedenceRange(6, 7)
}
},
// The third item is at the end of its production: no transitions.
})));
});
});
END_TEST

View file

@ -2,153 +2,87 @@
#include "compiler/prepare_grammar/flatten_grammar.h"
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
#include "compiler/syntax_grammar.h"
#include "compiler/rules/built_in_symbols.h"
#include "helpers/rule_helpers.h"
template<typename T, typename Func>
vector<typename result_of<Func(T)>::type> collect(const vector<T> &v, Func f) {
vector<typename result_of<Func(T)>::type> result;
for (const T &item : v)
result.push_back(f(item));
return result;
}
#include "helpers/stream_methods.h"
START_TEST
using namespace rules;
using prepare_grammar::flatten_grammar;
using prepare_grammar::InitialSyntaxGrammar;
using prepare_grammar::flatten_rule;
describe("flatten_grammar", []() {
auto get_symbol_sequences = [&](vector<Production> productions) {
return collect(productions, [](Production p) {
return collect(p, [](ProductionStep e) {
return e.symbol;
});
});
};
it("associates each symbol with the precedence and associativity binding it to its successor", [&]() {
SyntaxVariable result = flatten_rule(Variable(
"test",
VariableTypeNamed,
seq({
i_sym(1),
prec_left(101, seq({
i_sym(2),
choice({
prec_right(102, seq({
i_sym(3),
i_sym(4)
})),
i_sym(5),
}),
i_sym(6),
})),
i_sym(7),
})
));
auto get_precedence_sequences = [&](vector<Production> productions) {
return collect(productions, [](Production p) {
return collect(p, [](ProductionStep e) {
return e.precedence;
});
});
};
AssertThat(result.name, Equals("test"));
AssertThat(result.type, Equals(VariableTypeNamed));
AssertThat(result.productions, Equals(vector<Production>({
Production({
{Symbol(1), 0, AssociativityNone},
{Symbol(2), 101, AssociativityLeft},
{Symbol(3), 102, AssociativityRight},
{Symbol(4), 101, AssociativityLeft},
{Symbol(6), 0, AssociativityNone},
{Symbol(7), 0, AssociativityNone},
}),
Production({
{Symbol(1), 0, AssociativityNone},
{Symbol(2), 101, AssociativityLeft},
{Symbol(5), 101, AssociativityLeft},
{Symbol(6), 0, AssociativityNone},
{Symbol(7), 0, AssociativityNone},
})
})))
});
auto get_associativity_sequences = [&](vector<Production> productions) {
return collect(productions, [](Production p) {
return collect(p, [](ProductionStep e) {
return e.associativity;
});
});
};
InitialSyntaxGrammar input_grammar{{
// Choices within rules are extracted, resulting in multiple productions.
Variable("variable0", VariableTypeNamed, seq({
i_sym(1),
choice({ i_sym(2), i_sym(3) }),
i_sym(4),
})),
// When multiple precedence values are nested, the inner precedence wins.
Variable("variable1", VariableTypeNamed, seq({
i_sym(1),
it("uses the last assigned precedence", [&]() {
SyntaxVariable result = flatten_rule(Variable(
"test1",
VariableTypeNamed,
prec_left(101, seq({
i_sym(2),
choice({
prec_right(102, seq({
i_sym(3),
i_sym(4)
})),
i_sym(5),
}),
i_sym(6),
})),
i_sym(7),
})),
// When a precedence is applied to the end of a rule, its value is assigned
// to the last step of the corresponding production.
Variable("variable2", VariableTypeHidden, seq({
prec_left(102, seq({
i_sym(1),
i_sym(2),
})),
prec_left(103, seq({
i_sym(3),
i_sym(4),
})),
}))
}, {}, {}};
}))
));
SyntaxGrammar grammar = flatten_grammar(input_grammar);
AssertThat(result.productions, Equals(vector<Production>({
Production({
{Symbol(1), 101, AssociativityLeft},
{Symbol(2), 101, AssociativityLeft},
})
})))
it("preserves the names and types of the grammar's variables", [&]() {
AssertThat(grammar.variables[0].name, Equals("variable0"));
AssertThat(grammar.variables[1].name, Equals("variable1"));
AssertThat(grammar.variables[2].name, Equals("variable2"));
result = flatten_rule(Variable(
"test2",
VariableTypeNamed,
prec_left(101, seq({
i_sym(1),
}))
));
AssertThat(grammar.variables[0].type, Equals(VariableTypeNamed));
AssertThat(grammar.variables[1].type, Equals(VariableTypeNamed));
AssertThat(grammar.variables[2].type, Equals(VariableTypeHidden));
});
it("turns each variable's rule with a vector of possible symbol sequences", [&]() {
AssertThat(
get_symbol_sequences(grammar.variables[0].productions),
Equals(vector<vector<Symbol>>({
{ Symbol(1), Symbol(2), Symbol(4) },
{ Symbol(1), Symbol(3), Symbol(4) }
})));
AssertThat(
get_symbol_sequences(grammar.variables[1].productions),
Equals(vector<vector<Symbol>>({
{ Symbol(1), Symbol(2), Symbol(3), Symbol(4), Symbol(6), Symbol(7) },
{ Symbol(1), Symbol(2), Symbol(5), Symbol(6), Symbol(7) }
})));
AssertThat(
get_symbol_sequences(grammar.variables[2].productions),
Equals(vector<vector<Symbol>>({
{ Symbol(1), Symbol(2), Symbol(3), Symbol(4) },
})));
});
it("associates each symbol with the precedence binding it to its previous neighbor", [&]() {
AssertThat(
get_precedence_sequences(grammar.variables[0].productions),
Equals(vector<vector<int>>({
{ 0, 0, 0 },
{ 0, 0, 0 }
})));
AssertThat(
get_precedence_sequences(grammar.variables[1].productions),
Equals(vector<vector<int>>({
{ 0, 101, 102, 101, 0, 0 },
{ 0, 101, 101, 0, 0 }
})));
AssertThat(
get_precedence_sequences(grammar.variables[2].productions),
Equals(vector<vector<int>>({
{ 102, 0, 103, 103 },
})));
});
it("associates each symbol with the correct associativity", [&]() {
Associativity none = AssociativityNone;
AssertThat(
get_associativity_sequences(grammar.variables[1].productions),
Equals(vector<vector<Associativity>>({
{ none, AssociativityLeft, AssociativityRight, AssociativityLeft, none, none },
{ none, AssociativityLeft, AssociativityLeft, none, none }
})));
AssertThat(result.productions, Equals(vector<Production>({
Production({
{Symbol(1), 101, AssociativityLeft},
})
})))
});
});

View file

@ -8,6 +8,7 @@
#include <string>
#include <sys/stat.h>
#include <fstream>
#include <stdlib.h>
#include "tree_sitter/compiler.h"
using std::map;
@ -18,6 +19,7 @@ using std::istreambuf_iterator;
map<string, const TSLanguage *> loaded_languages;
int libcompiler_mtime = -1;
int compile_result_count = 0;
const char *libcompiler_path =
#if defined(__linux)
@ -63,24 +65,17 @@ static int get_modified_time(const string &path) {
return file_stat.st_mtime;
}
const TSLanguage *load_language(const string &name, const string &code, int timestamp) {
mkdir("out/tmp", 0777);
string pwd(getenv("PWD"));
string language_function_name = "ts_language_" + name;
string header_dir = pwd + "/include";
string source_filename = pwd + "/out/tmp/" + name + ".c";
string obj_filename = source_filename + ".o";
string lib_filename = source_filename + ".so";
int lib_mtime = get_modified_time(lib_filename);
const TSLanguage *load_language(const string &source_filename,
const string &lib_filename,
const string &language_name) {
string language_function_name = "ts_language_" + language_name;
string header_dir = getenv("PWD") + string("/include");
int source_mtime = get_modified_time(source_filename);
int header_mtime = get_modified_time(header_dir + "/tree_sitter/parser.h");
if (!timestamp || !header_mtime || lib_mtime < timestamp || lib_mtime < header_mtime) {
ofstream source_file;
source_file.open(source_filename);
source_file << code;
source_file.close();
int lib_mtime = get_modified_time(lib_filename);
if (!header_mtime || lib_mtime < header_mtime || lib_mtime < source_mtime) {
string obj_filename = lib_filename + ".o";
const char *compiler_name = getenv("CC");
if (!compiler_name) {
compiler_name = "gcc";
@ -135,13 +130,23 @@ const TSLanguage *load_language(const string &name, const string &code, int time
return language_fn();
}
const TSLanguage *load_language(const string &name, const TSCompileResult &compile_result) {
const TSLanguage *load_compile_result(const string &name, const TSCompileResult &compile_result) {
if (compile_result.error_type != TSCompileErrorTypeNone) {
Assert::Failure(string("Compilation failed ") + compile_result.error_message);
return nullptr;
}
const TSLanguage *language = load_language(name, compile_result.code, 0);
mkdir("out/tmp", 0777);
string source_filename = "out/tmp/compile-result-" + to_string(compile_result_count) + ".c";
string lib_filename = source_filename + ".so";
compile_result_count++;
ofstream source_file;
source_file.open(source_filename);
source_file << compile_result.code;
source_file.close();
const TSLanguage *language = load_language(source_filename, lib_filename, name);
free(compile_result.code);
return language;
}
@ -150,12 +155,6 @@ const TSLanguage *get_test_language(const string &language_name) {
if (loaded_languages[language_name])
return loaded_languages[language_name];
if (libcompiler_mtime == -1) {
libcompiler_mtime = get_modified_time(libcompiler_path);
if (!libcompiler_mtime)
return nullptr;
}
string language_dir = string("spec/fixtures/grammars/") + language_name;
string grammar_filename = language_dir + "/src/grammar.json";
string parser_filename = language_dir + "/src/parser.c";
@ -164,19 +163,21 @@ const TSLanguage *get_test_language(const string &language_name) {
if (!grammar_mtime)
return nullptr;
if (libcompiler_mtime == -1) {
libcompiler_mtime = get_modified_time(libcompiler_path);
if (!libcompiler_mtime)
return nullptr;
}
int parser_mtime = get_modified_time(parser_filename);
int input_mtime = (grammar_mtime > libcompiler_mtime) ?
grammar_mtime :
libcompiler_mtime;
string parser_code;
if (!parser_mtime || parser_mtime < input_mtime) {
if (parser_mtime < grammar_mtime || parser_mtime < libcompiler_mtime) {
printf("\n" "Regenerating the %s parser...\n", language_name.c_str());
ifstream grammar_file(grammar_filename);
istreambuf_iterator<char> grammar_file_iterator(grammar_file), end_iterator;
std::string grammar_json(grammar_file_iterator, end_iterator);
string grammar_json(grammar_file_iterator, end_iterator);
grammar_file.close();
TSCompileResult result = ts_compile_grammar(grammar_json.c_str());
if (result.error_type != TSCompileErrorTypeNone) {
@ -186,17 +187,12 @@ const TSLanguage *get_test_language(const string &language_name) {
ofstream parser_file(parser_filename);
parser_file << result.code;
parser_code = result.code;
grammar_file.close();
parser_file.close();
} else {
ifstream parser_file(parser_filename);
istreambuf_iterator<char> grammar_file_iterator(parser_file), end_iterator;
parser_code.assign(grammar_file_iterator, end_iterator);
}
const TSLanguage *language = load_language(language_name, parser_code, input_mtime);
mkdir("out/tmp", 0777);
string lib_filename = "out/tmp/" + language_name + ".so";
const TSLanguage *language = load_language(parser_filename, lib_filename, language_name);
loaded_languages[language_name] = language;
return language;
};

View file

@ -5,7 +5,7 @@
#include "tree_sitter/runtime.h"
#include <string>
const TSLanguage *load_language(const std::string &, const TSCompileResult &);
const TSLanguage *load_compile_result(const std::string &, const TSCompileResult &);
const TSLanguage *get_test_language(const std::string &language_name);
#endif // HELPERS_LOAD_LANGUAGE_H_

View file

@ -4,17 +4,6 @@
#include <streambuf>
#include <dirent.h>
#ifdef USE_BOOST_REGEX
#include "boost/regex.hpp"
using boost::regex;
using boost::regex_search;
using boost::regex_replace;
using boost::smatch;
using boost::regex_constants::extended;
#else
#include <regex>
using std::regex;
using std::regex_search;
@ -22,8 +11,6 @@ using std::regex_replace;
using std::smatch;
using std::regex_constants::extended;
#endif
using std::string;
using std::vector;
using std::ifstream;

View file

@ -65,8 +65,7 @@ ostream &operator<<(ostream &stream, const ParseAction &action) {
case ParseActionTypeAccept:
return stream << string("#<accept>");
case ParseActionTypeShift:
return stream << string("#<shift state:") << to_string(action.state_index) <<
string(" precedence:") << action.precedence_range << ">";
return stream << string("#<shift state:") << to_string(action.state_index) << ">";
case ParseActionTypeReduce:
return stream << ("#<reduce sym" + to_string(action.symbol.index) + " " +
to_string(action.consumed_symbol_count) + ">");
@ -87,7 +86,16 @@ ostream &operator<<(ostream &stream, const ParseState &state) {
}
ostream &operator<<(ostream &stream, const ProductionStep &step) {
return stream << string("(production_step symbol:") << step.symbol << string(" precedence:") << to_string(step.precedence) << ")";
stream << "(symbol: " << step.symbol << ", precedence:" << to_string(step.precedence);
stream << ", associativity: ";
switch (step.associativity) {
case rules::AssociativityLeft:
return stream << "left)";
case rules::AssociativityRight:
return stream << "right)";
default:
return stream << "none)";
}
}
ostream &operator<<(ostream &stream, const PrecedenceRange &range) {

View file

@ -1,6 +1,26 @@
#include "spec_helper.h"
#include "runtime/alloc.h"
#include "helpers/load_language.h"
#include "compiler/util/string_helpers.h"
#include <map>
static string dedent(string input) {
size_t indent_level = input.find_first_not_of("\n ") - input.find_first_not_of("\n");
string whitespace = "\n" + string(indent_level, ' ');
util::str_replace(&input, whitespace, "\n");
return input.substr(
input.find_first_not_of("\n "),
input.find_last_not_of("\n ") + 1
);
}
static string fill_template(string input, map<string, string> parameters) {
string result = input;
for (const auto &pair : parameters) {
util::str_replace(&result, "{{" + pair.first + "}}", pair.second);
}
return result;
}
START_TEST
@ -22,6 +42,290 @@ describe("compile_grammar", []() {
ts_free(node_string);
};
describe("conflicts", [&]() {
it("can resolve shift/reduce conflicts using associativities", [&]() {
string grammar_template = R"JSON({
"name": "associativity_example",
"rules": {
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "math_operation"},
{"type": "SYMBOL", "name": "identifier"}
]
},
"math_operation": {
"type": "{{math_operation_prec_type}}",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "+"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z]+"
}
}
})JSON";
// Ambiguity, which '+' applies first?
ts_document_set_input_string(document, "x+y+z");
TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
{"math_operation_prec_type", "PREC"}
}).c_str());
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
Unresolved conflict for symbol sequence:
expression '+' expression '+'
Possible interpretations:
1: (math_operation expression '+' expression) '+'
2: expression '+' (math_operation expression '+' expression)
Possible resolutions:
1: Specify a left or right associativity in `math_operation`
2: Add a conflict for these rules: `math_operation`
)MESSAGE")));
result = ts_compile_grammar(fill_template(grammar_template, {
{"math_operation_prec_type", "PREC_LEFT"}
}).c_str());
ts_document_set_language(document, load_compile_result("associativity_example", result));
ts_document_parse(document);
assert_root_node("(expression (math_operation "
"(expression (math_operation (expression (identifier)) (expression (identifier)))) "
"(expression (identifier))))");
result = ts_compile_grammar(fill_template(grammar_template, {
{"math_operation_prec_type", "PREC_RIGHT"}
}).c_str());
ts_document_set_language(document, load_compile_result("associativity_example", result));
ts_document_parse(document);
assert_root_node("(expression (math_operation "
"(expression (identifier)) "
"(expression (math_operation (expression (identifier)) (expression (identifier))))))");
});
it("can resolve shift/reduce conflicts involving single-child rules using precedence", [&]() {
string grammar_template = R"JSON({
"name": "associativity_example",
"extras": [
{"type": "PATTERN", "value": "\\s"}
],
"rules": {
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "function_call"},
{"type": "SYMBOL", "name": "identifier"}
]
},
"function_call": {
"type": "PREC_RIGHT",
"value": {{function_call_precedence}},
"content": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "expression"}
]
},
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "block"}
]
},
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "expression"},
{"type": "SYMBOL", "name": "block"}
]
}
]
}
},
"block": {
"type": "SEQ",
"members": [
{"type": "STRING", "value": "{"},
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "}"}
]
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z]+"
}
}
})JSON";
// Ambiguity: is the trailing block associated with `bar` or `foo`?
ts_document_set_input_string(document, "foo bar { baz }");
TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
{"function_call_precedence", "0"}
}).c_str());
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
Unresolved conflict for symbol sequence:
identifier '{'
Possible interpretations:
1: (expression identifier) '{'
2: (function_call identifier block)
Possible resolutions:
1: Specify a higher precedence in `function_call` than in the other rules.
2: Specify a higher precedence in `expression` than in the other rules.
3: Specify a left or right associativity in `expression`
4: Add a conflict for these rules: `expression` `function_call`
)MESSAGE")));
// Giving function calls lower precedence than expressions causes `bar`
// to be treated as an expression passed to `foo`, not as a function
// that's being called with a block.
result = ts_compile_grammar(fill_template(grammar_template, {
{"function_call_precedence", "-1"}
}).c_str());
AssertThat(result.error_message, IsNull());
ts_document_set_language(document, load_compile_result("associativity_example", result));
ts_document_parse(document);
assert_root_node("(expression (function_call "
"(identifier) "
"(expression (identifier)) "
"(block (expression (identifier)))))");
// Giving function calls higher precedence than expressions causes `bar`
// to be treated as a function that's being called with a block, not as
// an expression passed to `foo`.
result = ts_compile_grammar(fill_template(grammar_template, {
{"function_call_precedence", "1"}
}).c_str());
AssertThat(result.error_message, IsNull());
ts_document_set_language(document, load_compile_result("associativity_example", result));
ts_document_set_input_string(document, "foo bar { baz }");
ts_document_parse(document);
assert_root_node("(expression (function_call "
"(identifier) "
"(expression (function_call "
"(identifier) "
"(block (expression (identifier)))))))");
});
it("does not allow conflicting precedences", [&]() {
string grammar_template = R"JSON({
"name": "conflicting_precedence_example",
"rules": {
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "sum"},
{"type": "SYMBOL", "name": "product"},
{"type": "SYMBOL", "name": "other_thing"}
]
},
"sum": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "+"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
"product": {
"type": "PREC_LEFT",
"value": 1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "*"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
"other_thing": {
"type": "PREC_LEFT",
"value": -1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "*"},
{"type": "STRING", "value": "*"}
]
}
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z]+"
}
}
})JSON";
TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
}).c_str());
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
Unresolved conflict for symbol sequence:
expression '+' expression '*'
Possible interpretations:
1: (sum expression '+' expression) '*'
2: expression '+' (product expression '*' expression)
3: expression '+' (other_thing expression '*' '*')
Possible resolutions:
1: Specify a higher precedence in `product` and `other_thing` than in the other rules.
2: Specify a higher precedence in `sum` than in the other rules.
3: Add a conflict for these rules: `sum` `product` `other_thing`
)MESSAGE")));
});
});
describe("when the grammar's start symbol is a token", [&]() {
it("parses the token", [&]() {
TSCompileResult result = ts_compile_grammar(R"JSON(
@ -33,7 +337,7 @@ describe("compile_grammar", []() {
}
)JSON");
ts_document_set_language(document, load_language("one_token_language", result));
ts_document_set_language(document, load_compile_result("one_token_language", result));
ts_document_set_input_string(document, "the-value");
ts_document_parse(document);
@ -52,7 +356,7 @@ describe("compile_grammar", []() {
}
)JSON");
ts_document_set_language(document, load_language("blank_language", result));
ts_document_set_language(document, load_compile_result("blank_language", result));
ts_document_set_input_string(document, "");
ts_document_parse(document);
@ -79,7 +383,7 @@ describe("compile_grammar", []() {
}
)JSON");
ts_document_set_language(document, load_language("escaped_char_language", result));
ts_document_set_language(document, load_compile_result("escaped_char_language", result));
ts_document_set_input_string(document, "1234");
ts_document_parse(document);
@ -167,7 +471,7 @@ describe("compile_grammar", []() {
}
)JSON");
const TSLanguage *language = load_language("arithmetic", result);
const TSLanguage *language = load_compile_result("arithmetic", result);
ts_document_set_language(document, language);
ts_document_set_input_string(document, "a + b * c");

View file

@ -1,4 +1,4 @@
#include "compiler/build_tables/build_parse_table.h"
#include "compiler/build_tables/build_parse_table.h"
#include <algorithm>
#include <map>
#include <set>
@ -6,7 +6,6 @@
#include <unordered_map>
#include <utility>
#include "compiler/parse_table.h"
#include "compiler/build_tables/parse_conflict_manager.h"
#include "compiler/build_tables/remove_duplicate_states.h"
#include "compiler/build_tables/parse_item.h"
#include "compiler/build_tables/parse_item_set_builder.h"
@ -28,13 +27,13 @@ using std::string;
using std::to_string;
using std::unordered_map;
using std::make_shared;
using rules::Associativity;
using rules::Symbol;
using rules::END_OF_INPUT;
class ParseTableBuilder {
const SyntaxGrammar grammar;
const LexicalGrammar lexical_grammar;
ParseConflictManager conflict_manager;
unordered_map<Symbol, ParseItemSet> recovery_states;
unordered_map<ParseItemSet, ParseStateId> parse_state_ids;
vector<pair<ParseItemSet, ParseStateId>> item_sets_to_process;
@ -95,13 +94,10 @@ class ParseTableBuilder {
item_sets_to_process.pop_back();
item_set_builder.apply_transitive_closure(&item_set);
add_reduce_actions(item_set, state_id);
add_shift_actions(item_set, state_id);
add_shift_extra_actions(state_id);
string conflict = add_actions(item_set, state_id);
if (!conflicts.empty()) {
return CompileError(TSCompileErrorTypeParseConflict,
"Unresolved conflict.\n\n" + *conflicts.begin());
if (!conflict.empty()) {
return CompileError(TSCompileErrorTypeParseConflict, conflict);
}
}
@ -155,81 +151,120 @@ class ParseTableBuilder {
}
}
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const auto &transition : item_set.transitions()) {
const Symbol &symbol = transition.first;
const ParseItemSet &next_item_set = transition.second.first;
const PrecedenceRange &precedence = transition.second.second;
string add_actions(const ParseItemSet &item_set, ParseStateId state_id) {
map<Symbol::Index, ParseItemSet> terminal_successors;
map<Symbol::Index, ParseItemSet> nonterminal_successors;
set<Symbol::Index> lookaheads_with_conflicts;
if (!allow_any_conflict) {
recovery_states[symbol].add(next_item_set);
}
if (symbol.is_token) {
ParseAction *new_action = add_terminal_action(
state_id, symbol.index, ParseAction::Shift(0, precedence), item_set);
if (new_action) {
new_action->state_index = add_parse_state(next_item_set);
}
} else {
ParseStateId next_state = add_parse_state(next_item_set);
parse_table.set_nonterminal_action(state_id, symbol.index, next_state);
}
}
}
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const auto &pair : item_set.entries) {
const ParseItem &item = pair.first;
const auto &lookahead_symbols = pair.second;
const LookaheadSet &lookahead_symbols = pair.second;
ParseItem::CompletionStatus status = item.completion_status();
if (status.is_done) {
ParseAction action;
if (item.lhs() == rules::START()) {
action = ParseAction::Accept();
} else {
action = ParseAction::Reduce(Symbol(item.variable_index),
item.step_index, status.precedence,
status.associativity, *item.production);
// If the item is finished, immediately add a Reduce or Accept action to
// the parse table for each of its lookahead terminals.
if (item.is_done()) {
ParseAction action = (item.lhs() == rules::START()) ?
ParseAction::Accept() :
ParseAction::Reduce(item.lhs(), item.step_index, *item.production);
int precedence = item.precedence();
for (const Symbol::Index lookahead : *lookahead_symbols.entries) {
ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead];
// Only add the highest-precedence Reduce actions to the parse table.
// If other lower-precedence actions are possible, ignore them.
if (entry.actions.empty()) {
parse_table.add_terminal_action(state_id, lookahead, action);
} else {
ParseAction &existing_action = entry.actions[0];
if (allow_any_conflict) {
entry.actions.push_back(action);
} else {
int existing_precedence = existing_action.precedence();
if (precedence > existing_precedence) {
for (const ParseAction &old_action : entry.actions)
fragile_productions.insert(old_action.production);
entry.actions.clear();
entry.actions.push_back(action);
lookaheads_with_conflicts.erase(lookahead);
} else if (precedence == existing_precedence) {
entry.actions.push_back(action);
lookaheads_with_conflicts.insert(lookahead);
} else {
fragile_productions.insert(item.production);
}
}
}
}
for (const Symbol::Index lookahead : *lookahead_symbols.entries) {
add_terminal_action(state_id, lookahead, action, item_set);
// If the item is unfinished, create a new item by advancing one symbol.
// Add that new item to a successor item set.
} else {
Symbol symbol = item.production->at(item.step_index).symbol;
ParseItem new_item(item.lhs(), *item.production, item.step_index + 1);
if (symbol.is_token) {
terminal_successors[symbol.index].entries[new_item] = lookahead_symbols;
} else {
nonterminal_successors[symbol.index].entries[new_item] = lookahead_symbols;
}
}
}
}
void add_shift_extra_actions(ParseStateId state_id) {
ParseAction action = ParseAction::ShiftExtra();
// Add a Shift action for each possible successor state. Shift actions for
// terminal lookaheads can conflict with Reduce actions added previously.
for (auto &pair : terminal_successors) {
Symbol::Index lookahead = pair.first;
ParseItemSet &next_item_set = pair.second;
ParseStateId next_state_id = add_parse_state(next_item_set);
ParseState &state = parse_table.states[state_id];
bool had_existing_action = !state.terminal_entries[lookahead].actions.empty();
parse_table.add_terminal_action(state_id, lookahead, ParseAction::Shift(next_state_id));
if (!allow_any_conflict) {
if (had_existing_action)
lookaheads_with_conflicts.insert(lookahead);
recovery_states[Symbol(lookahead, true)].add(next_item_set);
}
}
// Add a Shift action for each non-terminal transition.
for (auto &pair : nonterminal_successors) {
Symbol::Index lookahead = pair.first;
ParseItemSet &next_item_set = pair.second;
ParseStateId next_state = add_parse_state(next_item_set);
parse_table.set_nonterminal_action(state_id, lookahead, next_state);
if (!allow_any_conflict)
recovery_states[Symbol(lookahead, false)].add(next_item_set);
}
for (Symbol::Index lookahead : lookaheads_with_conflicts) {
string conflict = handle_conflict(item_set, state_id, lookahead);
if (!conflict.empty()) return conflict;
}
ParseAction shift_extra = ParseAction::ShiftExtra();
ParseState &state = parse_table.states[state_id];
for (const Symbol &extra_symbol : grammar.extra_tokens)
for (const Symbol &extra_symbol : grammar.extra_tokens) {
if (!state.terminal_entries.count(extra_symbol.index) ||
state.has_shift_action() || allow_any_conflict)
parse_table.add_terminal_action(state_id, extra_symbol.index, action);
state.has_shift_action() || allow_any_conflict) {
parse_table.add_terminal_action(state_id, extra_symbol.index, shift_extra);
}
}
return "";
}
void mark_fragile_actions() {
for (ParseState &state : parse_table.states) {
set<Symbol> symbols_with_multiple_actions;
for (auto &entry : state.terminal_entries) {
const Symbol symbol(entry.first, true);
auto &actions = entry.second.actions;
if (actions.size() > 1) {
symbols_with_multiple_actions.insert(symbol);
}
for (ParseAction &action : actions) {
if (action.type == ParseActionTypeReduce) {
if (has_fragile_production(action.production))
action.fragile = true;
action.production = NULL;
action.precedence_range = PrecedenceRange();
action.associativity = rules::AssociativityNone;
}
}
@ -299,7 +334,7 @@ class ParseTableBuilder {
vector<ParseStateId> new_state_ids(parse_table.states.size());
size_t deleted_state_count = 0;
auto deleted_state_iter = deleted_states.begin();
for (size_t i = 0; i < new_state_ids.size(); i++) {
for (ParseStateId i = 0; i < new_state_ids.size(); i++) {
while (deleted_state_iter != deleted_states.end() && *deleted_state_iter < i) {
deleted_state_count++;
deleted_state_iter++;
@ -323,154 +358,204 @@ class ParseTableBuilder {
}
}
ParseAction *add_terminal_action(ParseStateId state_id, Symbol::Index lookahead,
const ParseAction &new_action,
const ParseItemSet &item_set) {
const ParseState &state = parse_table.states[state_id];
const auto &current_entry = state.terminal_entries.find(lookahead);
if (current_entry == state.terminal_entries.end())
return &parse_table.set_terminal_action(state_id, lookahead, new_action);
if (allow_any_conflict)
return &parse_table.add_terminal_action(state_id, lookahead, new_action);
string handle_conflict(const ParseItemSet &item_set, ParseStateId state_id,
Symbol::Index lookahead) {
ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead];
int reduction_precedence = entry.actions.front().precedence();
set<ParseItem> shift_items;
bool considered_associativity = false;
const ParseAction old_action = current_entry->second.actions[0];
auto resolution = conflict_manager.resolve(new_action, old_action);
for (const ParseAction &action : entry.actions)
if (action.type == ParseActionTypeReduce)
fragile_productions.insert(action.production);
switch (resolution.second) {
case ConflictTypeNone:
if (resolution.first) {
return &parse_table.set_terminal_action(state_id, lookahead, new_action);
}
break;
case ConflictTypeResolved: {
if (resolution.first) {
if (old_action.type == ParseActionTypeReduce)
fragile_productions.insert(old_action.production);
return &parse_table.set_terminal_action(state_id, lookahead, new_action);
} else {
if (new_action.type == ParseActionTypeReduce)
fragile_productions.insert(new_action.production);
break;
}
}
case ConflictTypeUnresolved: {
if (handle_unresolved_conflict(item_set, lookahead)) {
if (old_action.type == ParseActionTypeReduce)
fragile_productions.insert(old_action.production);
if (new_action.type == ParseActionTypeReduce)
fragile_productions.insert(new_action.production);
return &parse_table.add_terminal_action(state_id, lookahead, new_action);
}
break;
}
}
return nullptr;
}
bool handle_unresolved_conflict(const ParseItemSet &item_set,
const Symbol::Index lookahead) {
set<Symbol> involved_symbols;
set<ParseItem> reduce_items;
set<ParseItem> core_shift_items;
set<ParseItem> other_shift_items;
for (const auto &pair : item_set.entries) {
const ParseItem &item = pair.first;
const LookaheadSet &lookahead_set = pair.second;
Symbol next_symbol = item.next_symbol();
if (next_symbol == rules::NONE()) {
if (lookahead_set.contains(lookahead)) {
involved_symbols.insert(item.lhs());
reduce_items.insert(item);
}
} else {
if (item.step_index > 0) {
LookaheadSet first_set = item_set_builder.get_first_set(next_symbol);
if (entry.actions.back().type == ParseActionTypeShift) {
PrecedenceRange shift_precedence;
for (const auto &item_set_entry : item_set.entries) {
const ParseItem &item = item_set_entry.first;
if (item.step_index > 0 && !item.is_done()) {
LookaheadSet first_set = item_set_builder.get_first_set(item.next_symbol());
if (first_set.contains(lookahead)) {
involved_symbols.insert(item.lhs());
core_shift_items.insert(item);
shift_items.insert(item);
shift_precedence.add(item.precedence());
}
}
}
// If the shift action has higher precedence, prefer it over any of the
// reduce actions.
if (shift_precedence.min > reduction_precedence ||
(shift_precedence.min == reduction_precedence &&
shift_precedence.max > reduction_precedence)) {
for (const ParseAction &action : entry.actions) {
if (action.type == ParseActionTypeShift) break;
fragile_productions.insert(action.production);
}
entry.actions.assign({ entry.actions.back() });
}
// If the shift action has lower precedence, prefer the reduce actions.
else if (shift_precedence.max < reduction_precedence ||
(shift_precedence.max == reduction_precedence &&
shift_precedence.min < reduction_precedence)) {
entry.actions.pop_back();
}
// If the shift action has the same precedence as the reduce actions,
// consider the reduce actions' associativity. If they are all left
// associative, prefer the reduce actions. If they are all right
// associative, prefer the shift.
else if (shift_precedence.min == reduction_precedence &&
shift_precedence.max == reduction_precedence) {
considered_associativity = true;
bool has_non_associative_reductions = false;
bool has_left_associative_reductions = false;
bool has_right_associative_reductions = false;
for (const ParseAction &action : entry.actions) {
if (action.type != ParseActionTypeReduce) break;
switch (action.associativity()) {
case rules::AssociativityLeft:
has_left_associative_reductions = true;
break;
case rules::AssociativityRight:
has_right_associative_reductions = true;
break;
default:
has_non_associative_reductions = true;
break;
}
}
if (!has_non_associative_reductions) {
if (has_right_associative_reductions && !has_left_associative_reductions) {
for (const ParseAction &action : entry.actions) {
if (action.type == ParseActionTypeShift) break;
fragile_productions.insert(action.production);
}
entry.actions.assign({ entry.actions.back() });
} else if (has_left_associative_reductions && !has_right_associative_reductions) {
entry.actions.pop_back();
}
} else if (next_symbol.is_token && next_symbol.index == lookahead) {
other_shift_items.insert(item);
}
}
}
for (const auto &conflict_set : grammar.expected_conflicts)
if (involved_symbols == conflict_set)
return true;
if (entry.actions.size() == 1) return "";
string description = "Lookahead symbol: " + symbol_name(Symbol(lookahead, true)) + "\n";
set<Symbol> actual_conflict;
for (const ParseItem &item : shift_items)
actual_conflict.insert(item.lhs());
for (const ParseAction &action : entry.actions)
if (action.type == ParseActionTypeReduce)
actual_conflict.insert(action.symbol);
if (!reduce_items.empty()) {
description += "Reduce items:\n";
for (const ParseItem &item : reduce_items)
description += " " + item_string(item) + "\n";
for (const auto &expected_conflict : grammar.expected_conflicts)
if (expected_conflict == actual_conflict)
return "";
ParseItem earliest_starting_item;
for (const ParseAction &action : entry.actions)
if (action.type == ParseActionTypeReduce)
if (action.consumed_symbol_count > earliest_starting_item.step_index)
earliest_starting_item = ParseItem(action.symbol, *action.production, action.consumed_symbol_count);
for (const ParseItem &shift_item : shift_items)
if (shift_item.step_index > earliest_starting_item.step_index)
earliest_starting_item = shift_item;
string description = "Unresolved conflict for symbol sequence:\n\n";
for (size_t i = 0; i < earliest_starting_item.step_index; i++) {
description += " " + symbol_name(earliest_starting_item.production->at(i).symbol);
}
if (!core_shift_items.empty()) {
description += "Core shift items:\n";
for (const ParseItem &item : core_shift_items)
description += " " + item_string(item) + "\n";
description += " \u2022 " + symbol_name(Symbol(lookahead, true)) + " \u2026";
description += "\n\n";
description += "Possible interpretations:\n\n";
size_t interpretation_count = 1;
for (const ParseAction &action : entry.actions) {
if (action.type == ParseActionTypeReduce) {
description += " " + to_string(interpretation_count++) + ":";
for (size_t i = 0; i < earliest_starting_item.step_index - action.consumed_symbol_count; i++) {
description += " " + symbol_name(earliest_starting_item.production->at(i).symbol);
}
description += " (" + symbol_name(action.symbol);
for (const ProductionStep &step : *action.production) {
description += " " + symbol_name(step.symbol);
}
description += ")";
description += " \u2022 " + symbol_name(Symbol(lookahead, true)) + " \u2026";
description += "\n";
}
}
if (!other_shift_items.empty()) {
description += "Other shift items:\n";
for (const ParseItem &item : other_shift_items)
description += " " + item_string(item) + "\n";
for (const ParseItem &shift_item : shift_items) {
description += " " + to_string(interpretation_count++) + ":";
for (size_t i = 0; i < earliest_starting_item.step_index - shift_item.step_index; i++) {
description += " " + symbol_name(earliest_starting_item.production->at(i).symbol);
}
description += " (" + symbol_name(shift_item.lhs());
for (size_t i = 0; i < shift_item.production->size(); i++) {
if (i == shift_item.step_index)
description += " \u2022";
description += " " + symbol_name(shift_item.production->at(i).symbol);
}
description += ")";
description += "\n";
}
conflicts.insert(description);
return false;
}
description += "\nPossible resolutions:\n\n";
string item_string(const ParseItem &item) const {
string result = symbol_name(item.lhs()) + " ->";
size_t i = 0;
for (const ProductionStep &step : *item.production) {
if (i == item.step_index)
result += " \u2022";
result += " " + symbol_name(step.symbol);
i++;
}
if (i == item.step_index)
result += " \u2022";
size_t resolution_count = 1;
if (actual_conflict.size() > 1) {
if (!shift_items.empty()) {
description += " " + to_string(resolution_count++) + ": ";
description += "Specify a higher precedence in";
bool is_first = true;
for (const ParseItem &shift_item : shift_items) {
if (!is_first) description += " and";
description += " `" + symbol_name(shift_item.lhs()) + "`";
is_first = false;
}
description += " than in the other rules.\n";
}
result += " (prec " + to_string(item.precedence());
switch (item.associativity()) {
case rules::AssociativityNone:
result += ")";
break;
case rules::AssociativityLeft:
result += ", assoc left)";
break;
case rules::AssociativityRight:
result += ", assoc right)";
break;
for (const ParseAction &action : entry.actions) {
if (action.type == ParseActionTypeReduce) {
description += " " + to_string(resolution_count++) + ": ";
description += "Specify a higher precedence in `";
description += symbol_name(action.symbol);
description += "` than in the other rules.\n";
}
}
}
return result;
}
set<Symbol> get_first_set(const Symbol &start_symbol) {
set<Symbol> result;
vector<Symbol> symbols_to_process({ start_symbol });
while (!symbols_to_process.empty()) {
Symbol symbol = symbols_to_process.back();
symbols_to_process.pop_back();
if (result.insert(symbol).second)
for (const Production &production : grammar.productions(symbol))
if (!production.empty())
symbols_to_process.push_back(production[0].symbol);
if (considered_associativity) {
description += " " + to_string(resolution_count++) + ": ";
description += "Specify a left or right associativity in";
for (const ParseAction &action : entry.actions) {
bool is_first = true;
if (action.type == ParseActionTypeReduce) {
if (!is_first) description += " and";
description += " `" + symbol_name(action.symbol) + "`";
is_first = false;
}
}
description += "\n";
}
return result;
description += " " + to_string(resolution_count++) + ": ";
description += "Add a conflict for these rules:";
for (const Symbol &conflict_symbol : actual_conflict) {
description += " `" + symbol_name(conflict_symbol) + "`";
}
description += "\n";
return description;
}
string symbol_name(const rules::Symbol &symbol) const {
@ -491,8 +576,7 @@ class ParseTableBuilder {
}
bool has_fragile_production(const Production *production) {
auto end = fragile_productions.end();
return std::find(fragile_productions.begin(), end, production) != end;
return fragile_productions.find(production) != fragile_productions.end();
}
};

View file

@ -1,80 +0,0 @@
#include "compiler/build_tables/parse_conflict_manager.h"
#include <utility>
#include "compiler/parse_table.h"
#include "compiler/rules/built_in_symbols.h"
namespace tree_sitter {
namespace build_tables {
using std::pair;
using std::vector;
pair<bool, ConflictType> ParseConflictManager::resolve(
const ParseAction &new_action, const ParseAction &old_action) const {
if (new_action.type < old_action.type) {
auto opposite = resolve(old_action, new_action);
return { !opposite.first, opposite.second };
}
switch (old_action.type) {
case ParseActionTypeError:
return { true, ConflictTypeNone };
case ParseActionTypeShift:
if (new_action.extra) {
return { false, ConflictTypeNone };
} else if (old_action.extra) {
return { true, ConflictTypeNone };
} else if (new_action.type == ParseActionTypeReduce) {
int min_precedence = old_action.precedence_range.min;
int max_precedence = old_action.precedence_range.max;
int new_precedence = new_action.precedence_range.max;
if (new_precedence < min_precedence ||
(new_precedence == min_precedence &&
min_precedence < max_precedence)) {
return { false, ConflictTypeResolved };
} else if (new_precedence > max_precedence ||
(new_precedence == max_precedence &&
min_precedence < max_precedence)) {
return { true, ConflictTypeResolved };
} else if (min_precedence == max_precedence) {
switch (new_action.associativity) {
case rules::AssociativityLeft:
return { true, ConflictTypeResolved };
case rules::AssociativityRight:
return { false, ConflictTypeResolved };
default:
return { false, ConflictTypeUnresolved };
}
} else {
return { false, ConflictTypeUnresolved };
}
}
break;
case ParseActionTypeReduce:
if (new_action.type == ParseActionTypeReduce) {
if (new_action.extra)
return { false, ConflictTypeNone };
if (old_action.extra)
return { true, ConflictTypeNone };
int old_precedence = old_action.precedence_range.min;
int new_precedence = new_action.precedence_range.min;
if (new_precedence > old_precedence) {
return { true, ConflictTypeResolved };
} else if (new_precedence < old_precedence) {
return { false, ConflictTypeResolved };
} else {
return { false, ConflictTypeUnresolved };
}
}
default:
break;
}
return { false, ConflictTypeNone };
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -1,32 +0,0 @@
#ifndef COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_
#define COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_
#include <utility>
#include "compiler/syntax_grammar.h"
#include "compiler/build_tables/parse_item.h"
namespace tree_sitter {
class ParseAction;
namespace rules {
class Symbol;
}
namespace build_tables {
enum ConflictType {
ConflictTypeNone,
ConflictTypeResolved,
ConflictTypeUnresolved
};
class ParseConflictManager {
public:
std::pair<bool, ConflictType> resolve(const ParseAction &,
const ParseAction &) const;
};
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_

View file

@ -12,6 +12,7 @@ using std::pair;
using std::string;
using std::to_string;
using rules::Symbol;
using rules::Associativity;
using util::hash_combine;
ParseItem::ParseItem() : variable_index(-1), production(nullptr), step_index(0) {}
@ -43,26 +44,32 @@ Symbol ParseItem::lhs() const {
return Symbol(variable_index);
}
ParseItem::CompletionStatus ParseItem::completion_status() const {
CompletionStatus result = { false, 0, rules::AssociativityNone };
if (step_index == production->size()) {
result.is_done = true;
if (step_index > 0) {
const ProductionStep &last_step = production->at(step_index - 1);
result.precedence = last_step.precedence;
result.associativity = last_step.associativity;
}
}
return result;
bool ParseItem::is_done() const {
return step_index >= production->size();
}
int ParseItem::precedence() const {
if (production->empty())
return 0;
else if (completion_status().is_done)
return production->back().precedence;
else
if (is_done()) {
if (production->empty()) {
return 0;
} else {
return production->back().precedence;
}
} else {
return production->at(step_index).precedence;
}
}
rules::Associativity ParseItem::associativity() const {
if (is_done()) {
if (production->empty()) {
return rules::AssociativityNone;
} else {
return production->back().associativity;
}
} else {
return production->at(step_index).associativity;
}
}
Symbol ParseItem::next_symbol() const {
@ -72,15 +79,6 @@ Symbol ParseItem::next_symbol() const {
return production->at(step_index).symbol;
}
rules::Associativity ParseItem::associativity() const {
if (production->empty())
return rules::AssociativityNone;
else if (completion_status().is_done)
return production->back().associativity;
else
return production->at(step_index).associativity;
}
ParseItemSet::ParseItemSet() {}
ParseItemSet::ParseItemSet(const map<ParseItem, LookaheadSet> &entries)
@ -107,21 +105,33 @@ size_t ParseItemSet::unfinished_item_signature() const {
return result;
}
ParseItemSet::TransitionMap ParseItemSet::transitions() const {
ParseItemSet::TransitionMap result;
ParseItemSet::ActionMap ParseItemSet::actions() const {
ParseItemSet::ActionMap result;
for (const auto &pair : entries) {
const ParseItem &item = pair.first;
const LookaheadSet &lookahead_symbols = pair.second;
if (item.step_index == item.production->size())
continue;
size_t step = item.step_index + 1;
Symbol symbol = item.production->at(item.step_index).symbol;
int precedence = item.production->at(item.step_index).precedence;
ParseItem new_item(item.lhs(), *item.production, step);
if (item.step_index == item.production->size()) {
int precedence = item.precedence();
for (const Symbol::Index lookahead : *lookahead_symbols.entries) {
Action &action = result.terminal_actions[lookahead];
if (precedence > action.completion_precedence) {
action.completions.assign({ &item });
} else if (precedence == action.completion_precedence) {
action.completions.push_back({ &item });
}
}
} else {
Symbol symbol = item.production->at(item.step_index).symbol;
ParseItem new_item(item.lhs(), *item.production, item.step_index + 1);
result[symbol].first.entries[new_item] = lookahead_symbols;
result[symbol].second.add(precedence);
if (symbol.is_token) {
result.terminal_actions[symbol.index].continuation.entries[new_item] = lookahead_symbols;
} else {
result.nonterminal_continuations[symbol.index].entries[new_item] = lookahead_symbols;
}
}
}
return result;

View file

@ -29,7 +29,7 @@ class ParseItem {
rules::Symbol next_symbol() const;
int precedence() const;
rules::Associativity associativity() const;
CompletionStatus completion_status() const;
bool is_done() const;
int variable_index;
const Production *production;
@ -41,10 +41,16 @@ class ParseItemSet {
ParseItemSet();
explicit ParseItemSet(const std::map<ParseItem, LookaheadSet> &);
typedef std::map<rules::Symbol, std::pair<ParseItemSet, PrecedenceRange>>
TransitionMap;
struct Completion;
struct Action;
struct ActionMap {
std::map<rules::Symbol::Index, Action> terminal_actions;
std::map<rules::Symbol::Index, ParseItemSet> nonterminal_continuations;
};
ActionMap actions() const;
TransitionMap transitions() const;
bool operator==(const ParseItemSet &) const;
void add(const ParseItemSet &);
size_t unfinished_item_signature() const;
@ -52,6 +58,22 @@ class ParseItemSet {
std::map<ParseItem, LookaheadSet> entries;
};
struct ParseItemSet::Completion {
const ParseItem *item;
int precedence;
rules::Associativity associativity;
bool operator<(const ParseItemSet::Completion &other) {
return precedence < other.precedence;
}
};
struct ParseItemSet::Action {
ParseItemSet continuation;
std::vector<const ParseItem *> completions;
int completion_precedence;
};
} // namespace build_tables
} // namespace tree_sitter

View file

@ -20,9 +20,9 @@ using std::make_shared;
using rules::Symbol;
using rules::NONE;
static map<Symbol, LookaheadSet> build_first_sets(const SyntaxGrammar &grammar,
const LexicalGrammar &lexical_grammar) {
map<Symbol, LookaheadSet> result;
ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
const LexicalGrammar &lexical_grammar) :
grammar{&grammar} {
vector<Symbol> symbol_stack;
set<Symbol> processed_symbols;
@ -47,20 +47,13 @@ static map<Symbol, LookaheadSet> build_first_sets(const SyntaxGrammar &grammar,
}
}
result.insert({symbol, first_set});
first_sets.insert({symbol, first_set});
}
for (int i = 0; i < lexical_grammar.variables.size(); i++) {
for (size_t i = 0; i < lexical_grammar.variables.size(); i++) {
Symbol symbol(i, true);
result.insert({symbol, LookaheadSet({ i })});
first_sets.insert({symbol, LookaheadSet({ static_cast<Symbol::Index>(i) })});
}
return result;
}
ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
const LexicalGrammar &lexical_grammar) :
grammar{&grammar}, first_sets{build_first_sets(grammar, lexical_grammar)} {
}
void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) {
@ -109,7 +102,7 @@ void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) {
}
}
LookaheadSet ParseItemSetBuilder::get_first_set(rules::Symbol &symbol) const {
LookaheadSet ParseItemSetBuilder::get_first_set(const rules::Symbol &symbol) const {
return first_sets.find(symbol)->second;
}

View file

@ -20,7 +20,7 @@ class ParseItemSetBuilder {
public:
ParseItemSetBuilder(const SyntaxGrammar &, const LexicalGrammar &);
void apply_transitive_closure(ParseItemSet *);
LookaheadSet get_first_set(rules::Symbol &) const;
LookaheadSet get_first_set(const rules::Symbol &) const;
};
} // namespace build_tables

View file

@ -108,7 +108,7 @@ class CCodeGenerator {
private:
void add_includes() {
add("#include \"tree_sitter/parser.h\"");
add("#include <tree_sitter/parser.h>");
line();
}

View file

@ -14,27 +14,22 @@ using rules::Symbol;
ParseAction::ParseAction(ParseActionType type, ParseStateId state_index,
Symbol symbol, size_t consumed_symbol_count,
PrecedenceRange precedence_range,
rules::Associativity associativity,
const Production *production)
: type(type),
extra(false),
fragile(false),
symbol(symbol),
state_index(state_index),
symbol(symbol),
consumed_symbol_count(consumed_symbol_count),
precedence_range(precedence_range),
associativity(associativity),
production(production) {}
ParseAction::ParseAction()
: type(ParseActionTypeError),
extra(false),
fragile(false),
symbol(Symbol(-1)),
state_index(-1),
symbol(Symbol(-1)),
consumed_symbol_count(0),
associativity(rules::AssociativityNone),
production(nullptr) {}
ParseAction ParseAction::Error() {
@ -47,15 +42,13 @@ ParseAction ParseAction::Accept() {
return action;
}
ParseAction ParseAction::Shift(ParseStateId state_index,
PrecedenceRange precedence_range) {
return ParseAction(ParseActionTypeShift, state_index, Symbol(-1), 0,
precedence_range, rules::AssociativityNone, nullptr);
ParseAction ParseAction::Shift(ParseStateId state_index) {
return ParseAction(ParseActionTypeShift, state_index, Symbol(-1), 0, nullptr);
}
ParseAction ParseAction::Recover(ParseStateId state_index) {
return ParseAction(ParseActionTypeRecover, state_index, Symbol(-1), 0,
PrecedenceRange(), rules::AssociativityNone, nullptr);
nullptr);
}
ParseAction ParseAction::ShiftExtra() {
@ -66,11 +59,33 @@ ParseAction ParseAction::ShiftExtra() {
}
ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
int precedence,
rules::Associativity associativity,
const Production &production) {
return ParseAction(ParseActionTypeReduce, 0, symbol, consumed_symbol_count,
{ precedence, precedence }, associativity, &production);
&production);
}
int ParseAction::precedence() const {
if (consumed_symbol_count >= production->size()) {
if (production->empty()) {
return 0;
} else {
return production->back().precedence;
}
} else {
return production->at(consumed_symbol_count).precedence;
}
}
rules::Associativity ParseAction::associativity() const {
if (consumed_symbol_count >= production->size()) {
if (production->empty()) {
return rules::AssociativityNone;
} else {
return production->back().associativity;
}
} else {
return production->at(consumed_symbol_count).associativity;
}
}
bool ParseAction::operator==(const ParseAction &other) const {

View file

@ -26,30 +26,30 @@ enum ParseActionType {
class ParseAction {
ParseAction(ParseActionType type, ParseStateId state_index,
rules::Symbol symbol, size_t consumed_symbol_count,
PrecedenceRange range, rules::Associativity, const Production *);
const Production *);
public:
ParseAction();
static ParseAction Accept();
static ParseAction Error();
static ParseAction Shift(ParseStateId state_index, PrecedenceRange precedence);
static ParseAction Shift(ParseStateId state_index);
static ParseAction Recover(ParseStateId state_index);
static ParseAction Reduce(rules::Symbol symbol, size_t consumed_symbol_count,
int precedence, rules::Associativity,
const Production &);
static ParseAction ShiftExtra();
bool operator==(const ParseAction &) const;
bool operator<(const ParseAction &) const;
rules::Associativity associativity() const;
int precedence() const;
ParseActionType type;
bool extra;
bool fragile;
rules::Symbol symbol;
ParseStateId state_index;
size_t consumed_symbol_count;
PrecedenceRange precedence_range;
rules::Associativity associativity;
rules::Symbol symbol;
size_t consumed_symbol_count;
const Production *production;
};

View file

@ -19,6 +19,8 @@ class FlattenRule : public rules::RuleFn<void> {
private:
vector<int> precedence_stack;
vector<rules::Associativity> associativity_stack;
int last_precedence;
rules::Associativity last_associativity;
Production production;
void apply_to(const rules::Symbol *sym) {
@ -39,11 +41,13 @@ class FlattenRule : public rules::RuleFn<void> {
apply(metadata->rule);
if (precedence.second) {
last_precedence = precedence_stack.back();
precedence_stack.pop_back();
production.back().precedence = precedence_stack.back();
}
if (associativity.second) {
last_associativity = associativity_stack.back();
associativity_stack.pop_back();
production.back().associativity = associativity_stack.back();
}
@ -51,40 +55,49 @@ class FlattenRule : public rules::RuleFn<void> {
void apply_to(const rules::Seq *seq) {
apply(seq->left);
last_precedence = 0;
last_associativity = rules::AssociativityNone;
apply(seq->right);
}
public:
FlattenRule()
: precedence_stack({ 0 }),
associativity_stack({ rules::AssociativityNone }) {}
associativity_stack({ rules::AssociativityNone }),
last_precedence(0),
last_associativity(rules::AssociativityNone) {}
Production flatten(const rule_ptr &rule) {
apply(rule);
size_t size = production.size();
if (size > 1) {
production[size - 1].precedence = production[size - 2].precedence;
production[size - 1].associativity = production[size - 2].associativity;
if (!production.empty()) {
production.back().precedence = last_precedence;
production.back().associativity = last_associativity;
}
return production;
}
};
SyntaxVariable flatten_rule(const Variable &variable) {
vector<Production> productions;
for (const rule_ptr &rule_component : extract_choices(variable.rule)) {
Production production = FlattenRule().flatten(rule_component);
auto end = productions.end();
if (find(productions.begin(), end, production) == end) {
productions.push_back(production);
}
}
return SyntaxVariable(variable.name, variable.type, productions);
}
SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {
SyntaxGrammar result;
result.expected_conflicts = grammar.expected_conflicts;
result.extra_tokens = grammar.extra_tokens;
for (const Variable &variable : grammar.variables) {
vector<Production> productions;
for (const rule_ptr &rule_component : extract_choices(variable.rule)) {
Production production = FlattenRule().flatten(rule_component);
auto end = productions.end();
if (find(productions.begin(), end, production) == end)
productions.push_back(production);
}
result.variables.push_back(
SyntaxVariable(variable.name, variable.type, productions));
result.variables.push_back(flatten_rule(variable));
}
return result;

View file

@ -10,6 +10,7 @@ namespace prepare_grammar {
struct InitialSyntaxGrammar;
SyntaxVariable flatten_rule(const Variable &variable);
SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &);
} // namespace prepare_grammar

View file

@ -44,13 +44,8 @@
],
'variables': {
'USE_LIBPROFILER%': 'true',
'USE_BOOST_REGEX%': 'false',
},
'conditions': [
['USE_BOOST_REGEX != "false"', {
'defines': ['USE_BOOST_REGEX'],
'libraries': ['-lboost_regex'],
}],
['USE_LIBPROFILER != "false"', {
'libraries': ['-lprofiler'],
}]