In compiler, distinguish between anonymous tokens and hidden rules
This commit is contained in:
parent
4b270c8604
commit
5982b77c97
46 changed files with 41131 additions and 40884 deletions
|
|
@ -1,8 +1,7 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/build_parse_table.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
using namespace rules;
|
||||
|
|
@ -12,15 +11,35 @@ START_TEST
|
|||
|
||||
describe("build_parse_table", []() {
|
||||
SyntaxGrammar parse_grammar{{
|
||||
{ "rule0", choice({ i_sym(1), i_sym(2) }) },
|
||||
{ "rule1", i_token(0) },
|
||||
{ "rule2", i_token(1) },
|
||||
}, {}, { Symbol(2, SymbolOptionToken) }, {}};
|
||||
{
|
||||
"rule0",
|
||||
choice({ i_sym(1), i_sym(2) }),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule1",
|
||||
i_token(0),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule2",
|
||||
i_token(1),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
}, { Symbol(2, true) }, {}};
|
||||
|
||||
LexicalGrammar lex_grammar{{
|
||||
{ "token0", pattern("[a-c]") },
|
||||
{ "token1", pattern("[b-d]") },
|
||||
}, {}, {}};
|
||||
{
|
||||
"token0",
|
||||
pattern("[a-c]"),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"token1",
|
||||
pattern("[b-d]"),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
}, {}};
|
||||
|
||||
it("first looks for the start rule and its item set closure", [&]() {
|
||||
auto result = build_parse_table(parse_grammar, lex_grammar);
|
||||
|
|
@ -32,11 +51,11 @@ describe("build_parse_table", []() {
|
|||
// expanded from the item set closure of the start item
|
||||
{ Symbol(1), {ParseAction::Shift(2, { 0 })} },
|
||||
{ Symbol(2), {ParseAction::Shift(2, { 0 })} },
|
||||
{ Symbol(0, SymbolOptionToken), {ParseAction::Shift(3, { 0 })} },
|
||||
{ Symbol(1, SymbolOptionToken), {ParseAction::Shift(4, { 0 })} },
|
||||
{ Symbol(0, true), {ParseAction::Shift(3, { 0 })} },
|
||||
{ Symbol(1, true), {ParseAction::Shift(4, { 0 })} },
|
||||
|
||||
// for the ubiquitous_token 'token2'
|
||||
{ Symbol(2, SymbolOptionToken), {ParseAction::ShiftExtra()} },
|
||||
{ Symbol(2, true), {ParseAction::ShiftExtra()} },
|
||||
})));
|
||||
});
|
||||
|
||||
|
|
@ -52,7 +71,7 @@ describe("build_parse_table", []() {
|
|||
{ END_OF_INPUT(), {ParseAction::Accept()} },
|
||||
|
||||
// for the ubiquitous_token 'token2'
|
||||
{ Symbol(2, SymbolOptionToken), {ParseAction::ShiftExtra()} },
|
||||
{ Symbol(2, true), {ParseAction::ShiftExtra()} },
|
||||
})));
|
||||
});
|
||||
|
||||
|
|
@ -63,7 +82,7 @@ describe("build_parse_table", []() {
|
|||
{ END_OF_INPUT(), {ParseAction::Reduce(Symbol(0), 1, 0, AssociativityLeft, 0)} },
|
||||
|
||||
// for the ubiquitous_token 'token2'
|
||||
{ Symbol(2, SymbolOptionToken), {ParseAction::ShiftExtra()} },
|
||||
{ Symbol(2, true), {ParseAction::ShiftExtra()} },
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/build_tables/first_symbols.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
|
|
@ -16,7 +16,7 @@ describe("first_symbols", []() {
|
|||
auto rule = seq({ i_token(0), i_token(1) });
|
||||
|
||||
AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
|
||||
Symbol(0, SymbolOptionToken),
|
||||
Symbol(0, true),
|
||||
})));
|
||||
});
|
||||
|
||||
|
|
@ -28,8 +28,8 @@ describe("first_symbols", []() {
|
|||
i_token(1) });
|
||||
|
||||
AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
|
||||
Symbol(0, SymbolOptionToken),
|
||||
Symbol(1, SymbolOptionToken)
|
||||
Symbol(0, true),
|
||||
Symbol(1, true)
|
||||
})));
|
||||
});
|
||||
|
||||
|
|
@ -41,16 +41,21 @@ describe("first_symbols", []() {
|
|||
i_sym(0) });
|
||||
|
||||
SyntaxGrammar grammar{{
|
||||
{ "rule0", seq({
|
||||
i_token(2),
|
||||
i_token(3),
|
||||
i_token(4) }) }
|
||||
}, {}, {}, {}};
|
||||
{
|
||||
"rule0",
|
||||
seq({
|
||||
i_token(2),
|
||||
i_token(3),
|
||||
i_token(4),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
}
|
||||
}, {}, {}};
|
||||
|
||||
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0),
|
||||
Symbol(0, SymbolOptionToken),
|
||||
Symbol(2, SymbolOptionToken),
|
||||
Symbol(0, true),
|
||||
Symbol(2, true),
|
||||
})));
|
||||
});
|
||||
|
||||
|
|
@ -60,15 +65,20 @@ describe("first_symbols", []() {
|
|||
i_token(1) });
|
||||
|
||||
SyntaxGrammar grammar{{
|
||||
{ "rule0", choice({
|
||||
i_token(0),
|
||||
blank() }) }
|
||||
}, {}, {}, {}};
|
||||
{
|
||||
"rule0",
|
||||
choice({
|
||||
i_token(0),
|
||||
blank(),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
}, {}, {}};
|
||||
|
||||
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0),
|
||||
Symbol(0, SymbolOptionToken),
|
||||
Symbol(1, SymbolOptionToken),
|
||||
Symbol(0, true),
|
||||
Symbol(1, true),
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
|
@ -76,17 +86,21 @@ describe("first_symbols", []() {
|
|||
describe("when there are left-recursive rules", [&]() {
|
||||
it("terminates", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
{ "rule0", choice({
|
||||
seq({ i_sym(0), i_token(10) }),
|
||||
i_token(11),
|
||||
}) },
|
||||
}, {}, {}, {}};
|
||||
{
|
||||
"rule0",
|
||||
choice({
|
||||
seq({ i_sym(0), i_token(10) }),
|
||||
i_token(11),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
}, {}, {}};
|
||||
|
||||
auto rule = i_sym(0);
|
||||
|
||||
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0),
|
||||
Symbol(11, SymbolOptionToken)
|
||||
Symbol(11, true)
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
|
@ -95,7 +109,7 @@ describe("first_symbols", []() {
|
|||
auto rule = make_shared<Metadata>(i_token(3), map<rules::MetadataKey, int>());
|
||||
|
||||
AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
|
||||
Symbol(3, SymbolOptionToken),
|
||||
Symbol(3, true),
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
|
||||
|
|
@ -10,29 +10,39 @@ START_TEST
|
|||
|
||||
describe("item_set_closure", []() {
|
||||
SyntaxGrammar grammar{{
|
||||
{ "E", seq({
|
||||
i_sym(1),
|
||||
i_token(11) }) },
|
||||
{ "T", seq({
|
||||
i_token(12),
|
||||
i_token(13) }) },
|
||||
}, {}, {}, {}};
|
||||
{
|
||||
"E",
|
||||
seq({
|
||||
i_sym(1),
|
||||
i_token(11),
|
||||
}),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"T",
|
||||
seq({
|
||||
i_token(12),
|
||||
i_token(13),
|
||||
}),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
}, {}, {}};
|
||||
|
||||
it("adds items at the beginnings of referenced rules", [&]() {
|
||||
ParseItemSet item_set = item_set_closure(
|
||||
ParseItem(Symbol(0), grammar.rule(Symbol(0)), {}),
|
||||
set<Symbol>({ Symbol(10, SymbolOptionToken) }),
|
||||
ParseItem(Symbol(0), grammar.rules[0].rule, {}),
|
||||
set<Symbol>({ Symbol(10, true) }),
|
||||
grammar
|
||||
);
|
||||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(1), grammar.rule(Symbol(1)), {}),
|
||||
set<Symbol>({ Symbol(11, SymbolOptionToken) }),
|
||||
ParseItem(Symbol(1), grammar.rules[1].rule, {}),
|
||||
set<Symbol>({ Symbol(11, true) }),
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(0), grammar.rule(Symbol(0)), {}),
|
||||
set<Symbol>({ Symbol(10, SymbolOptionToken) }),
|
||||
ParseItem(Symbol(0), grammar.rules[0].rule, {}),
|
||||
set<Symbol>({ Symbol(10, true) }),
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/helpers/rule_helpers.h"
|
||||
|
||||
using namespace rules;
|
||||
|
|
@ -43,29 +43,37 @@ describe("char_transitions(LexItemSet)", []() {
|
|||
|
||||
describe("sym_transitions(ParseItemSet, SyntaxGrammar)", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
{ "A", blank() },
|
||||
{ "B", i_token(21) },
|
||||
}, {}, {}, {}};
|
||||
{
|
||||
"A",
|
||||
blank(),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"B",
|
||||
i_token(21),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
}, {}, {}};
|
||||
|
||||
it("computes the closure of the new item sets", [&]() {
|
||||
ParseItemSet set1({
|
||||
{
|
||||
ParseItem(Symbol(0), seq({ i_token(22), i_sym(1) }), { Symbol(101) }),
|
||||
set<Symbol>({ Symbol(23, SymbolOptionToken) })
|
||||
set<Symbol>({ Symbol(23, true) })
|
||||
},
|
||||
});
|
||||
|
||||
AssertThat(sym_transitions(set1, grammar), Equals(map<Symbol, ParseItemSet>({
|
||||
{
|
||||
Symbol(22, SymbolOptionToken),
|
||||
Symbol(22, true),
|
||||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), i_sym(1), { Symbol(101), Symbol(22) }),
|
||||
set<Symbol>({ Symbol(23, SymbolOptionToken) }),
|
||||
set<Symbol>({ Symbol(23, true) }),
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), i_token(21), {}),
|
||||
set<Symbol>({ Symbol(23, SymbolOptionToken) })
|
||||
set<Symbol>({ Symbol(23, true) })
|
||||
},
|
||||
})
|
||||
},
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
|
@ -11,16 +11,24 @@ START_TEST
|
|||
|
||||
describe("LexConflictManager", []() {
|
||||
LexicalGrammar lexical_grammar{{
|
||||
{ "other_token", pattern("[a-b]") },
|
||||
{ "lookahead_token", pattern("[c-d]") },
|
||||
}, {}, {}};
|
||||
{
|
||||
"other_token",
|
||||
pattern("[a-b]"),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"lookahead_token",
|
||||
pattern("[c-d]"),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
}, {}};
|
||||
|
||||
LexConflictManager conflict_manager(lexical_grammar);
|
||||
|
||||
bool update;
|
||||
Symbol sym1(0, SymbolOptionToken);
|
||||
Symbol sym2(1, SymbolOptionToken);
|
||||
Symbol sym3(2, SymbolOptionToken);
|
||||
Symbol sym1(0, true);
|
||||
Symbol sym2(1, true);
|
||||
Symbol sym3(2, true);
|
||||
|
||||
it("favors non-errors over lexical errors", [&]() {
|
||||
update = conflict_manager.resolve(LexAction::Advance(2, {0}), LexAction::Error());
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/build_tables/parse_conflict_manager.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
|
@ -11,17 +11,37 @@ START_TEST
|
|||
|
||||
describe("ParseConflictManager", []() {
|
||||
SyntaxGrammar syntax_grammar{{
|
||||
{ "in_progress_rule1", i_token(0) },
|
||||
{ "in_progress_rule2", i_token(0) },
|
||||
{ "reduced_rule", i_token(0) },
|
||||
{ "other_rule1", i_token(0) },
|
||||
{ "other_rule2", i_token(0) },
|
||||
}, {}, { Symbol(2, SymbolOptionToken) }, {}};
|
||||
{
|
||||
"in_progress_rule1",
|
||||
i_token(0),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"in_progress_rule2",
|
||||
i_token(0),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"reduced_rule",
|
||||
i_token(0),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"other_rule1",
|
||||
i_token(0),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"other_rule2",
|
||||
i_token(0),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
}, { Symbol(2, true) }, {}};
|
||||
|
||||
pair<bool, ConflictType> result;
|
||||
Symbol sym1(0);
|
||||
Symbol sym2(1);
|
||||
Symbol lookahead_sym(1, SymbolOptionToken);
|
||||
Symbol lookahead_sym(1, true);
|
||||
ParseConflictManager *conflict_manager;
|
||||
|
||||
before_each([&]() {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
using namespace rules;
|
||||
using build_tables::rule_can_be_blank;
|
||||
|
|
@ -57,13 +57,23 @@ describe("rule_can_be_blank", [&]() {
|
|||
|
||||
describe("checking recursively (by expanding non-terminals)", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
{ "A", choice({
|
||||
seq({ i_sym(0), i_token(11) }),
|
||||
blank() }) },
|
||||
{ "B", choice({
|
||||
seq({ i_sym(1), i_token(12) }),
|
||||
i_token(13) }) },
|
||||
}, {}, {}, {}};
|
||||
{
|
||||
"A",
|
||||
choice({
|
||||
seq({ i_sym(0), i_token(11) }),
|
||||
blank()
|
||||
}),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"B",
|
||||
choice({
|
||||
seq({ i_sym(1), i_token(12) }),
|
||||
i_token(13)
|
||||
}),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
}, {}, {}};
|
||||
|
||||
it("terminates for left-recursive rules that can be blank", [&]() {
|
||||
rule = i_sym(0);
|
||||
|
|
|
|||
|
|
@ -48,6 +48,20 @@ class rule_list : public vector<pair<string, rule_ptr>> {
|
|||
vector<pair<string, rule_ptr>>(list) {}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
class eq_vector : public vector<T> {
|
||||
public:
|
||||
bool operator==(const vector<T> &other) const {
|
||||
if (this->size() != other.size()) return false;
|
||||
for (size_t i = 0; i < this->size(); i++)
|
||||
if (!(this->operator[](i) == other[i]))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
eq_vector(const initializer_list<T> &list) : vector<T>(list) {}
|
||||
};
|
||||
|
||||
class rule_vector : public vector<rule_ptr> {
|
||||
public:
|
||||
bool operator==(const vector<rule_ptr> &other) const {
|
||||
|
|
|
|||
|
|
@ -6,43 +6,41 @@ namespace tree_sitter {
|
|||
using std::make_shared;
|
||||
using std::set;
|
||||
using std::map;
|
||||
using std::ostream;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
|
||||
namespace rules {
|
||||
rule_ptr character(const set<uint32_t> &ranges) {
|
||||
return character(ranges, true);
|
||||
}
|
||||
rule_ptr character(const set<uint32_t> &ranges) {
|
||||
return character(ranges, true);
|
||||
}
|
||||
|
||||
rule_ptr character(const set<uint32_t> &chars, bool sign) {
|
||||
CharacterSet result;
|
||||
if (sign) {
|
||||
for (uint32_t c : chars)
|
||||
result.include(c);
|
||||
} else {
|
||||
result.include_all();
|
||||
for (uint32_t c : chars)
|
||||
result.exclude(c);
|
||||
}
|
||||
return result.copy();
|
||||
rule_ptr character(const set<uint32_t> &chars, bool sign) {
|
||||
rules::CharacterSet result;
|
||||
if (sign) {
|
||||
for (uint32_t c : chars)
|
||||
result.include(c);
|
||||
} else {
|
||||
result.include_all();
|
||||
for (uint32_t c : chars)
|
||||
result.exclude(c);
|
||||
}
|
||||
return result.copy();
|
||||
}
|
||||
|
||||
rule_ptr i_sym(size_t index) {
|
||||
return make_shared<rules::Symbol>(index);
|
||||
}
|
||||
rule_ptr i_sym(size_t index) {
|
||||
return make_shared<rules::Symbol>(index);
|
||||
}
|
||||
|
||||
rule_ptr i_aux_sym(size_t index) {
|
||||
return make_shared<rules::Symbol>(index, SymbolOptionAuxiliary);
|
||||
}
|
||||
rule_ptr i_token(size_t index) {
|
||||
return make_shared<rules::Symbol>(index, true);
|
||||
}
|
||||
|
||||
rule_ptr i_token(size_t index) {
|
||||
return make_shared<rules::Symbol>(index, SymbolOptionToken);
|
||||
}
|
||||
rule_ptr metadata(rule_ptr rule, map<rules::MetadataKey, int> values) {
|
||||
return make_shared<rules::Metadata>(rule, values);
|
||||
}
|
||||
|
||||
rule_ptr i_aux_token(size_t index) {
|
||||
return make_shared<rules::Symbol>(index, SymbolOption(SymbolOptionAuxiliary|SymbolOptionToken));
|
||||
}
|
||||
|
||||
rule_ptr metadata(rule_ptr rule, map<MetadataKey, int> values) {
|
||||
return make_shared<Metadata>(rule, values);
|
||||
}
|
||||
bool operator==(const RuleEntry &left, const RuleEntry &right) {
|
||||
return left.name == right.name && left.rule->operator==(*right.rule) &&
|
||||
left.type == right.type;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,17 +4,16 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
rule_ptr metadata(rule_ptr, std::map<MetadataKey, int>);
|
||||
rule_ptr character(const std::set<uint32_t> &);
|
||||
rule_ptr character(const std::set<uint32_t> &, bool sign);
|
||||
rule_ptr i_sym(size_t index);
|
||||
rule_ptr i_aux_sym(size_t index);
|
||||
rule_ptr i_token(size_t index);
|
||||
rule_ptr i_aux_token(size_t index);
|
||||
}
|
||||
rule_ptr metadata(rule_ptr, std::map<rules::MetadataKey, int>);
|
||||
rule_ptr character(const std::set<uint32_t> &);
|
||||
rule_ptr character(const std::set<uint32_t> &, bool sign);
|
||||
rule_ptr i_sym(size_t index);
|
||||
rule_ptr i_token(size_t index);
|
||||
|
||||
bool operator==(const RuleEntry &left, const RuleEntry &right);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
#include <map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
using std::cout;
|
||||
|
||||
|
|
@ -83,4 +84,16 @@ inline std::ostream& operator<<(std::ostream &stream, const std::pair<T1, T2> &p
|
|||
|
||||
} // namespace std
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
using std::ostream;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
|
||||
inline ostream &operator<<(ostream &stream, const RuleEntry &entry) {
|
||||
return stream << string("{") << entry.name << string(", ") << entry.rule << string(", ") << to_string(entry.type) << string("}");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/prepare_grammar/expand_repeats.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
|
||||
|
|
@ -11,131 +11,223 @@ using prepare_grammar::expand_repeats;
|
|||
describe("expand_repeats", []() {
|
||||
it("replaces repeat rules with pairs of recursive rules", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
{ "rule0", repeat(i_token(0)) },
|
||||
}, {}, {}, {}};
|
||||
{
|
||||
"rule0",
|
||||
repeat(i_token(0)),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
}, {}, {}};
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(rule_list({
|
||||
{ "rule0", choice({ i_aux_sym(0), blank() }) },
|
||||
})));
|
||||
|
||||
AssertThat(match.aux_rules, Equals(rule_list({
|
||||
{ "rule0_repeat0", seq({
|
||||
i_token(0),
|
||||
choice({ i_aux_sym(0), blank() }) }) },
|
||||
AssertThat(match.rules, Equals(eq_vector<RuleEntry>({
|
||||
{
|
||||
"rule0",
|
||||
choice({ i_sym(1), blank() }),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"rule0_repeat1",
|
||||
seq({
|
||||
i_token(0),
|
||||
choice({ i_sym(1), blank() })
|
||||
}),
|
||||
RuleEntryTypeHidden
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
||||
it("replaces repeats inside of sequences", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
{ "rule0", seq({
|
||||
i_token(10),
|
||||
repeat(i_token(11)) }) },
|
||||
}, {}, {}, {}};
|
||||
{
|
||||
"rule0",
|
||||
seq({
|
||||
i_token(10),
|
||||
repeat(i_token(11)),
|
||||
}),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
}, {}, {}};
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(rule_list({
|
||||
{ "rule0", seq({
|
||||
i_token(10),
|
||||
choice({ i_aux_sym(0), blank() }) }) },
|
||||
})));
|
||||
|
||||
AssertThat(match.aux_rules, Equals(rule_list({
|
||||
{ "rule0_repeat0", seq({
|
||||
i_token(11),
|
||||
choice({ i_aux_sym(0), blank() }) }) },
|
||||
AssertThat(match.rules, Equals(eq_vector<RuleEntry>({
|
||||
{
|
||||
"rule0",
|
||||
seq({
|
||||
i_token(10),
|
||||
choice({ i_sym(1), blank() })
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule0_repeat1",
|
||||
seq({
|
||||
i_token(11),
|
||||
choice({ i_sym(1), blank() })
|
||||
}),
|
||||
RuleEntryTypeHidden
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
||||
it("replaces repeats inside of choices", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
{ "rule0", choice({ i_token(10), repeat(i_token(11)) }) },
|
||||
}, {}, {}, {}};
|
||||
{
|
||||
"rule0",
|
||||
choice({ i_token(10), repeat(i_token(11)) }),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
}, {}, {}};
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(rule_list({
|
||||
{ "rule0", choice({ i_token(10), i_aux_sym(0), blank() }) },
|
||||
})));
|
||||
|
||||
AssertThat(match.aux_rules, Equals(rule_list({
|
||||
{ "rule0_repeat0", seq({
|
||||
i_token(11),
|
||||
choice({ i_aux_sym(0), blank() }) }) },
|
||||
AssertThat(match.rules, Equals(eq_vector<RuleEntry>({
|
||||
{
|
||||
"rule0",
|
||||
choice({ i_token(10), i_sym(1), blank() }),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule0_repeat1",
|
||||
seq({
|
||||
i_token(11),
|
||||
choice({ i_sym(1), blank() }),
|
||||
}),
|
||||
RuleEntryTypeHidden
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
||||
it("does not create redundant auxiliary rules", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
{ "rule0", choice({
|
||||
seq({ i_token(1), repeat(i_token(4)) }),
|
||||
seq({ i_token(2), repeat(i_token(4)) }) }) },
|
||||
{ "rule1", seq({ i_token(3), repeat(i_token(4)) }) },
|
||||
}, {}, {}, {}};
|
||||
{
|
||||
"rule0",
|
||||
choice({
|
||||
seq({ i_token(1), repeat(i_token(4)) }),
|
||||
seq({ i_token(2), repeat(i_token(4)) }),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule1",
|
||||
seq({ i_token(3), repeat(i_token(4)) }),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
}, {}, {}};
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(rule_list({
|
||||
{ "rule0", choice({
|
||||
seq({ i_token(1), choice({ i_aux_sym(0), blank() }) }),
|
||||
seq({ i_token(2), choice({ i_aux_sym(0), blank() }) }) }) },
|
||||
{ "rule1", seq({ i_token(3), choice({ i_aux_sym(0), blank() }) }) },
|
||||
})));
|
||||
|
||||
AssertThat(match.aux_rules, Equals(rule_list({
|
||||
{ "rule0_repeat0", seq({
|
||||
i_token(4),
|
||||
choice({ i_aux_sym(0), blank() }) }) },
|
||||
AssertThat(match.rules, Equals(eq_vector<RuleEntry>({
|
||||
{
|
||||
"rule0",
|
||||
choice({
|
||||
seq({ i_token(1), choice({ i_sym(2), blank() }) }),
|
||||
seq({ i_token(2), choice({ i_sym(2), blank() }) }),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule1",
|
||||
seq({ i_token(3), choice({ i_sym(2), blank() }) }),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule0_repeat1",
|
||||
seq({
|
||||
i_token(4),
|
||||
choice({ i_sym(2), blank() }),
|
||||
}),
|
||||
RuleEntryTypeHidden
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
||||
it("can replace multiple repeats in the same rule", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
{ "rule0", seq({
|
||||
repeat(i_token(10)),
|
||||
repeat(i_token(11)) }) },
|
||||
}, {}, {}, {}};
|
||||
{
|
||||
"rule0",
|
||||
seq({
|
||||
repeat(i_token(10)),
|
||||
repeat(i_token(11)),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
}, {}, {}};
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(rule_list({
|
||||
{ "rule0", seq({
|
||||
choice({ i_aux_sym(0), blank() }),
|
||||
choice({ i_aux_sym(1), blank() }) }) },
|
||||
})));
|
||||
|
||||
AssertThat(match.aux_rules, Equals(rule_list({
|
||||
{ "rule0_repeat0", seq({
|
||||
i_token(10),
|
||||
choice({ i_aux_sym(0), blank() }) }) },
|
||||
{ "rule0_repeat1", seq({
|
||||
i_token(11),
|
||||
choice({ i_aux_sym(1), blank() }) }) },
|
||||
AssertThat(match.rules, Equals(eq_vector<RuleEntry>({
|
||||
{
|
||||
"rule0",
|
||||
seq({
|
||||
choice({ i_sym(1), blank() }),
|
||||
choice({ i_sym(2), blank() }),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule0_repeat1",
|
||||
seq({
|
||||
i_token(10),
|
||||
choice({ i_sym(1), blank() }),
|
||||
}),
|
||||
RuleEntryTypeHidden
|
||||
},
|
||||
{
|
||||
"rule0_repeat2",
|
||||
seq({
|
||||
i_token(11),
|
||||
choice({ i_sym(2), blank() }),
|
||||
}),
|
||||
RuleEntryTypeHidden
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
||||
it("can replace repeats in multiple rules", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
{ "rule0", repeat(i_token(10)) },
|
||||
{ "rule1", repeat(i_token(11)) },
|
||||
}, {}, {}, {}};
|
||||
{
|
||||
"rule0",
|
||||
repeat(i_token(10)),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"rule1",
|
||||
repeat(i_token(11)),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
}, {}, {}};
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(rule_list({
|
||||
{ "rule0", choice({ i_aux_sym(0), blank() }) },
|
||||
{ "rule1", choice({ i_aux_sym(1), blank() }) },
|
||||
})));
|
||||
|
||||
AssertThat(match.aux_rules, Equals(rule_list({
|
||||
{ "rule0_repeat0", seq({
|
||||
i_token(10),
|
||||
choice({ i_aux_sym(0), blank() }) }) },
|
||||
{ "rule1_repeat0", seq({
|
||||
i_token(11),
|
||||
choice({ i_aux_sym(1), blank() }) }) },
|
||||
AssertThat(match.rules, Equals(eq_vector<RuleEntry>({
|
||||
{
|
||||
"rule0",
|
||||
choice({ i_sym(2), blank() }),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule1",
|
||||
choice({ i_sym(3), blank() }),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule0_repeat1",
|
||||
seq({
|
||||
i_token(10),
|
||||
choice({ i_sym(2), blank() }),
|
||||
}),
|
||||
RuleEntryTypeHidden
|
||||
},
|
||||
{
|
||||
"rule1_repeat1",
|
||||
seq({
|
||||
i_token(11),
|
||||
choice({ i_sym(3), blank() })
|
||||
}),
|
||||
RuleEntryTypeHidden
|
||||
},
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
#include "compiler/prepare_grammar/expand_tokens.h"
|
||||
|
||||
|
|
@ -12,36 +12,64 @@ describe("expand_tokens", []() {
|
|||
describe("string rules", [&]() {
|
||||
it("replaces strings with sequences of character sets", [&]() {
|
||||
LexicalGrammar grammar{{
|
||||
{ "rule_A", seq({
|
||||
i_sym(10),
|
||||
str("xyz"),
|
||||
i_sym(11) }) },
|
||||
}, {}, {}};
|
||||
{
|
||||
"rule_A",
|
||||
seq({
|
||||
i_sym(10),
|
||||
str("xyz"),
|
||||
i_sym(11),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
}, {}};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((const GrammarError *)nullptr));
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", seq({
|
||||
i_sym(10),
|
||||
token(prec(1, seq({ character({ 'x' }), character({ 'y' }), character({ 'z' }) }))),
|
||||
i_sym(11) }) },
|
||||
AssertThat(result.first.rules, Equals(eq_vector<RuleEntry>({
|
||||
{
|
||||
"rule_A",
|
||||
seq({
|
||||
i_sym(10),
|
||||
metadata(seq({
|
||||
character({ 'x' }),
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
}), {
|
||||
{PRECEDENCE, 1},
|
||||
{IS_TOKEN, 1},
|
||||
}),
|
||||
i_sym(11),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles strings containing non-ASCII UTF8 characters", [&]() {
|
||||
LexicalGrammar grammar{{
|
||||
// α β
|
||||
{ "rule_A", str("\u03B1 \u03B2") },
|
||||
}, {}, {}};
|
||||
{
|
||||
"rule_A",
|
||||
str("\u03B1 \u03B2"), // α β
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
}, {}};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", token(prec(1, seq({
|
||||
character({ 945 }),
|
||||
character({ ' ' }),
|
||||
character({ 946 }) }))) }
|
||||
AssertThat(result.first.rules, Equals(eq_vector<RuleEntry>({
|
||||
{
|
||||
"rule_A",
|
||||
metadata(seq({
|
||||
character({ 945 }),
|
||||
character({ ' ' }),
|
||||
character({ 946 }),
|
||||
}), {
|
||||
{PRECEDENCE, 1},
|
||||
{IS_TOKEN, 1},
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
}
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
|
@ -49,43 +77,65 @@ describe("expand_tokens", []() {
|
|||
describe("regexp rules", [&]() {
|
||||
it("replaces regexps with the equivalent rule tree", [&]() {
|
||||
LexicalGrammar grammar{{
|
||||
{ "rule_A", seq({
|
||||
i_sym(10),
|
||||
pattern("x*"),
|
||||
i_sym(11) }) },
|
||||
}, {}, {}};
|
||||
{
|
||||
"rule_A",
|
||||
seq({
|
||||
i_sym(10),
|
||||
pattern("x*"),
|
||||
i_sym(11),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
}, {}};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((const GrammarError *)nullptr));
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", seq({
|
||||
i_sym(10),
|
||||
repeat(character({ 'x' })),
|
||||
i_sym(11) }) },
|
||||
AssertThat(result.first.rules, Equals(eq_vector<RuleEntry>({
|
||||
{
|
||||
"rule_A",
|
||||
seq({
|
||||
i_sym(10),
|
||||
repeat(character({ 'x' })),
|
||||
i_sym(11),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles regexps containing non-ASCII UTF8 characters", [&]() {
|
||||
LexicalGrammar grammar{{
|
||||
// [^α-δ]
|
||||
{ "rule_A", pattern("[^\u03B1-\u03B4]*") },
|
||||
}, {}, {}};
|
||||
{
|
||||
"rule_A",
|
||||
pattern("[^\u03B1-\u03B4]*"), // [^α-δ]
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
}, {}};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "rule_A", repeat(character({ 945, 946, 947, 948 }, false)) }
|
||||
AssertThat(result.first.rules, Equals(eq_vector<RuleEntry>({
|
||||
{
|
||||
"rule_A",
|
||||
repeat(character({ 945, 946, 947, 948 }, false)),
|
||||
RuleEntryTypeNamed
|
||||
}
|
||||
})));
|
||||
});
|
||||
|
||||
it("returns an error when the grammar contains an invalid regex", [&]() {
|
||||
LexicalGrammar grammar{{
|
||||
{ "rule_A", seq({
|
||||
pattern("("),
|
||||
str("xyz"),
|
||||
pattern("[") }) },
|
||||
}, {}, {}};
|
||||
{
|
||||
"rule_A",
|
||||
seq({
|
||||
pattern("("),
|
||||
str("xyz"),
|
||||
pattern("["),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
}, {}};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
#include "compiler/prepare_grammar/extract_tokens.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
|
|
@ -12,271 +11,301 @@ using prepare_grammar::extract_tokens;
|
|||
using prepare_grammar::InternedGrammar;
|
||||
|
||||
describe("extract_tokens", []() {
|
||||
it("moves string rules into the lexical grammar", [&]() {
|
||||
it("moves strings, patterns, and sub-rules marked as tokens into the lexical grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
|
||||
{
|
||||
"rule_A",
|
||||
repeat(seq({
|
||||
str("ab"),
|
||||
pattern("cd*"),
|
||||
choice({
|
||||
i_sym(1),
|
||||
i_sym(2),
|
||||
token(repeat(choice({ str("ef"), str("gh") }))),
|
||||
}),
|
||||
})),
|
||||
},
|
||||
{
|
||||
"rule_B",
|
||||
pattern("ij+"),
|
||||
},
|
||||
{
|
||||
"rule_C",
|
||||
choice({ str("kl"), blank() }),
|
||||
},
|
||||
{
|
||||
"rule_D",
|
||||
repeat(i_sym(3))
|
||||
}
|
||||
}, {}, {}});
|
||||
|
||||
AssertThat(get<0>(result).rules, Equals(rule_list({
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
|
||||
SyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
const GrammarError *error = get<2>(result);
|
||||
|
||||
AssertThat(error, Equals<const GrammarError *>(nullptr));
|
||||
|
||||
AssertThat(syntax_grammar.rules, Equals(eq_vector<RuleEntry>({
|
||||
{
|
||||
"rule_A",
|
||||
repeat(seq({
|
||||
|
||||
// This string is now the first token in the lexical grammar.
|
||||
i_token(0),
|
||||
|
||||
// This pattern is now the second rule in the lexical grammar.
|
||||
i_token(1),
|
||||
|
||||
choice({
|
||||
// Rule 1, which this symbol pointed to, has been moved to the
|
||||
// lexical grammar.
|
||||
i_token(3),
|
||||
|
||||
// This symbol's index has been decremented, because a previous rule
|
||||
// was moved to the lexical grammar.
|
||||
i_sym(1),
|
||||
|
||||
// This token rule is now the third rule in the lexical grammar.
|
||||
i_token(2),
|
||||
}),
|
||||
})),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"rule_C",
|
||||
choice({ i_token(4), blank() }),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"rule_D",
|
||||
repeat(i_sym(2)),
|
||||
RuleEntryTypeNamed,
|
||||
}
|
||||
})));
|
||||
AssertThat(get<0>(result).aux_rules, IsEmpty())
|
||||
|
||||
AssertThat(get<1>(result).rules, IsEmpty())
|
||||
AssertThat(get<1>(result).aux_rules, Equals(rule_list({
|
||||
{ "'ab'", str("ab") },
|
||||
AssertThat(lexical_grammar.rules, Equals(eq_vector<RuleEntry>({
|
||||
|
||||
// Strings become anonymous rules.
|
||||
{
|
||||
"ab",
|
||||
str("ab"),
|
||||
RuleEntryTypeAnonymous,
|
||||
},
|
||||
|
||||
// Patterns become hidden rules.
|
||||
{
|
||||
"/cd*/",
|
||||
pattern("cd*"),
|
||||
RuleEntryTypeHidden,
|
||||
},
|
||||
|
||||
// Rules marked as tokens become hidden rules.
|
||||
{
|
||||
"/(ef|gh)*/",
|
||||
repeat(choice({ str("ef"), str("gh") })),
|
||||
RuleEntryTypeHidden,
|
||||
},
|
||||
|
||||
// This named rule was moved wholesale to the lexical grammar.
|
||||
{
|
||||
"rule_B",
|
||||
pattern("ij+"),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
|
||||
// Strings become anonymous rules.
|
||||
{
|
||||
"kl",
|
||||
str("kl"),
|
||||
RuleEntryTypeAnonymous,
|
||||
},
|
||||
|
||||
})));
|
||||
});
|
||||
|
||||
it("moves pattern rules into the lexical grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", seq({ pattern("a+"), i_sym(0) }) }
|
||||
}, {}, {}});
|
||||
|
||||
AssertThat(get<0>(result).rules, Equals(rule_list({
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
|
||||
})));
|
||||
AssertThat(get<0>(result).aux_rules, IsEmpty())
|
||||
|
||||
AssertThat(get<1>(result).rules, IsEmpty())
|
||||
AssertThat(get<1>(result).aux_rules, Equals(rule_list({
|
||||
{ "/a+/", pattern("a+") },
|
||||
})));
|
||||
});
|
||||
|
||||
it("moves other rules marked as tokens into the lexical grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", seq({
|
||||
token(seq({ pattern("."), choice({ str("a"), str("b") }) })),
|
||||
i_sym(0) }) }
|
||||
}, {}, {}});
|
||||
|
||||
AssertThat(get<0>(result).rules, Equals(rule_list({
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
|
||||
})));
|
||||
AssertThat(get<0>(result).aux_rules, IsEmpty())
|
||||
|
||||
AssertThat(get<1>(result).rules, IsEmpty())
|
||||
AssertThat(get<1>(result).aux_rules, Equals(rule_list({
|
||||
{ "(seq /./ (choice 'a' 'b'))", token(seq({ pattern("."), choice({ str("a"), str("b") }) })) },
|
||||
})));
|
||||
});
|
||||
|
||||
it("does not move blank rules", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", choice({ i_sym(0), blank() }) },
|
||||
}, {}, {}});
|
||||
|
||||
AssertThat(get<0>(result).rules, Equals(rule_list({
|
||||
{ "rule_A", choice({ i_sym(0), blank() }) },
|
||||
})));
|
||||
AssertThat(get<0>(result).aux_rules, IsEmpty())
|
||||
|
||||
AssertThat(get<1>(result).rules, IsEmpty())
|
||||
AssertThat(get<1>(result).aux_rules, IsEmpty())
|
||||
});
|
||||
|
||||
it("does not create duplicate tokens in the lexical grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
|
||||
{
|
||||
"rule_A",
|
||||
seq({
|
||||
str("ab"),
|
||||
i_sym(0),
|
||||
str("ab"),
|
||||
})
|
||||
},
|
||||
}, {}, {}});
|
||||
|
||||
AssertThat(get<0>(result).rules, Equals(rule_list({
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0), i_aux_token(0) }) }
|
||||
})));
|
||||
AssertThat(get<0>(result).aux_rules, IsEmpty())
|
||||
SyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
|
||||
AssertThat(get<1>(result).rules, IsEmpty())
|
||||
AssertThat(get<1>(result).aux_rules, Equals(rule_list({
|
||||
{ "'ab'", str("ab") },
|
||||
AssertThat(syntax_grammar.rules, Equals(eq_vector<RuleEntry>({
|
||||
{
|
||||
"rule_A",
|
||||
seq({ i_token(0), i_sym(0), i_token(0) }),
|
||||
RuleEntryTypeNamed
|
||||
}
|
||||
})));
|
||||
|
||||
AssertThat(lexical_grammar.rules, Equals(eq_vector<RuleEntry>({
|
||||
{
|
||||
"ab",
|
||||
str("ab"),
|
||||
RuleEntryTypeAnonymous
|
||||
},
|
||||
})))
|
||||
});
|
||||
|
||||
it("updates the grammar's expected conflict symbols", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
it("does not move entire rules into the lexical grammar if their content is used elsewhere in the grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{
|
||||
{ "rule_A", str("ok") },
|
||||
{ "rule_B", repeat(i_sym(0)) },
|
||||
{ "rule_C", repeat(seq({ i_sym(0), i_sym(0) })) },
|
||||
"rule_A",
|
||||
seq({ i_sym(1), str("ab") })
|
||||
},
|
||||
{ str(" ") },
|
||||
{ { Symbol(1), Symbol(2) } }
|
||||
});
|
||||
{
|
||||
"rule_B",
|
||||
str("cd")
|
||||
},
|
||||
{
|
||||
"rule_C",
|
||||
seq({ str("ef"), str("cd") })
|
||||
},
|
||||
}, {}, {}});
|
||||
|
||||
AssertThat(get<0>(result).rules.size(), Equals<size_t>(2));
|
||||
AssertThat(get<1>(result).rules.size(), Equals<size_t>(1));
|
||||
AssertThat(get<0>(result).expected_conflicts, Equals(set<set<Symbol>>({
|
||||
SyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
|
||||
AssertThat(syntax_grammar.rules, Equals(eq_vector<RuleEntry>({
|
||||
{
|
||||
"rule_A",
|
||||
seq({ i_sym(1), i_token(0) }),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule_B",
|
||||
i_token(1),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule_C",
|
||||
seq({ i_token(2), i_token(1) }),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
})));
|
||||
|
||||
AssertThat(lexical_grammar.rules, Equals(eq_vector<RuleEntry>({
|
||||
{
|
||||
"ab",
|
||||
str("ab"),
|
||||
RuleEntryTypeAnonymous
|
||||
},
|
||||
{
|
||||
"cd",
|
||||
str("cd"),
|
||||
RuleEntryTypeAnonymous
|
||||
},
|
||||
{
|
||||
"ef",
|
||||
str("ef"),
|
||||
RuleEntryTypeAnonymous
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
||||
it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{
|
||||
"rule_A",
|
||||
str("ok")
|
||||
},
|
||||
{
|
||||
"rule_B",
|
||||
repeat(i_sym(0))
|
||||
},
|
||||
{
|
||||
"rule_C",
|
||||
repeat(seq({ i_sym(0), i_sym(0) }))
|
||||
},
|
||||
}, { str(" ") }, { { Symbol(1), Symbol(2) } }});
|
||||
|
||||
SyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
|
||||
AssertThat(syntax_grammar.rules.size(), Equals<size_t>(2));
|
||||
AssertThat(syntax_grammar.expected_conflicts, Equals(set<set<Symbol>>({
|
||||
{ Symbol(0), Symbol(1) },
|
||||
})));
|
||||
});
|
||||
|
||||
describe("when an entire grammar rule is a token", [&]() {
|
||||
it("moves the rule the lexical grammar and updates referencing symbols", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", i_sym(1) },
|
||||
{ "rule_B", pattern("a|b") },
|
||||
{ "rule_C", token(seq({ str("a"), str("b") })) },
|
||||
}, {}, {}});
|
||||
|
||||
AssertThat(get<0>(result).rules, Equals(rule_list({
|
||||
{ "rule_A", i_token(0) }
|
||||
})));
|
||||
AssertThat(get<0>(result).aux_rules, IsEmpty());
|
||||
|
||||
AssertThat(get<1>(result).rules, Equals(rule_list({
|
||||
{ "rule_B", pattern("a|b") },
|
||||
{ "rule_C", token(seq({ str("a"), str("b") })) },
|
||||
})));
|
||||
|
||||
// TODO put back
|
||||
// AssertThat(get<1>(result).aux_rules, IsEmpty());
|
||||
});
|
||||
|
||||
it("updates symbols whose indices need to change due to deleted rules", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", str("ab") },
|
||||
{ "rule_B", i_sym(0) },
|
||||
{ "rule_C", i_sym(1) },
|
||||
}, {}, {}});
|
||||
|
||||
AssertThat(get<0>(result).rules, Equals(rule_list({
|
||||
{ "rule_B", i_token(0) },
|
||||
{ "rule_C", i_sym(0) },
|
||||
})));
|
||||
AssertThat(get<0>(result).aux_rules, IsEmpty());
|
||||
|
||||
AssertThat(get<1>(result).rules, Equals(rule_list({
|
||||
{ "rule_A", str("ab") },
|
||||
})));
|
||||
|
||||
// TODO put back
|
||||
// AssertThat(get<1>(result).aux_rules, IsEmpty());
|
||||
});
|
||||
|
||||
it("does not move the rule if its content is used elsewhere in the grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", seq({ i_sym(1), str("ab") }) },
|
||||
{ "rule_B", str("cd") },
|
||||
{ "rule_C", seq({ str("ef"), str("cd") }) },
|
||||
}, {}, {}});
|
||||
|
||||
AssertThat(get<0>(result).rules, Equals(rule_list({
|
||||
{ "rule_A", seq({ i_sym(1), i_aux_token(0) }) },
|
||||
{ "rule_B", i_aux_token(1) },
|
||||
{ "rule_C", seq({ i_aux_token(2), i_aux_token(1) }) },
|
||||
})));
|
||||
AssertThat(get<0>(result).aux_rules, IsEmpty());
|
||||
|
||||
AssertThat(get<1>(result).rules, IsEmpty())
|
||||
AssertThat(get<1>(result).aux_rules, Equals(rule_list({
|
||||
{ "'ab'", str("ab") },
|
||||
{ "'cd'", str("cd") },
|
||||
{ "'ef'", str("ef") },
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
describe("handling ubiquitous tokens", [&]() {
|
||||
describe("ubiquitous tokens that are not symbols", [&]() {
|
||||
it("adds them to the lexical grammar's separators", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", str("x") },
|
||||
}, {
|
||||
pattern("\\s+"),
|
||||
str("y"),
|
||||
}, {}});
|
||||
it("adds inline ubiquitous tokens to the lexical grammar's separators", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", str("x") },
|
||||
}, {
|
||||
pattern("\\s+"),
|
||||
str("y"),
|
||||
}, {}});
|
||||
|
||||
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
|
||||
|
||||
AssertThat(get<1>(result).separators, Equals(rule_vector({
|
||||
pattern("\\s+"),
|
||||
str("y"),
|
||||
})));
|
||||
AssertThat(get<1>(result).separators, Equals(rule_vector({
|
||||
pattern("\\s+"),
|
||||
str("y"),
|
||||
})));
|
||||
|
||||
AssertThat(get<0>(result).ubiquitous_tokens, IsEmpty());
|
||||
});
|
||||
AssertThat(get<0>(result).ubiquitous_tokens, IsEmpty());
|
||||
});
|
||||
|
||||
describe("ubiquitous tokens that point to moved rules", [&]() {
|
||||
it("updates them according to the new symbol numbers", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{ {
|
||||
{ "rule_A", seq({ str("w"), i_sym(1) }) },
|
||||
{ "rule_B", str("x") },
|
||||
{ "rule_C", str("y") },
|
||||
}, {
|
||||
i_sym(2),
|
||||
}, {}});
|
||||
it("updates ubiquitous symbols according to the new symbol numbers", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{ {
|
||||
{ "rule_A", seq({ str("w"), str("x"), i_sym(1) }) },
|
||||
{ "rule_B", str("y") },
|
||||
{ "rule_C", str("z") },
|
||||
}, {
|
||||
i_sym(2),
|
||||
}, {}});
|
||||
|
||||
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
|
||||
|
||||
AssertThat(get<0>(result).ubiquitous_tokens, Equals(set<Symbol>({
|
||||
{ Symbol(1, SymbolOptionToken) },
|
||||
})));
|
||||
AssertThat(get<0>(result).ubiquitous_tokens, Equals(set<Symbol>({
|
||||
{ Symbol(3, true) },
|
||||
})));
|
||||
|
||||
AssertThat(get<1>(result).separators, IsEmpty());
|
||||
});
|
||||
AssertThat(get<1>(result).separators, IsEmpty());
|
||||
});
|
||||
|
||||
describe("ubiquitous tokens that are visible", [&]() {
|
||||
it("preserves them in the syntactic grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", str("ab") },
|
||||
{ "rule_B", str("bc") },
|
||||
}, { i_sym(1) }, {}});
|
||||
it("returns an error if any ubiquitous tokens are non-token symbols", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{
|
||||
"rule_A",
|
||||
seq({ str("x"), i_sym(1) }),
|
||||
},
|
||||
{
|
||||
"rule_B",
|
||||
seq({ str("y"), str("z") })
|
||||
},
|
||||
}, { i_sym(1) }, {}});
|
||||
|
||||
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
|
||||
|
||||
AssertThat(get<0>(result).ubiquitous_tokens, Equals(set<Symbol>({
|
||||
Symbol(1, SymbolOptionToken)
|
||||
})));
|
||||
|
||||
AssertThat(get<1>(result).separators, IsEmpty());
|
||||
});
|
||||
AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(get<2>(result), EqualsPointer(
|
||||
new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
|
||||
"Not a token: rule_B")));
|
||||
});
|
||||
|
||||
describe("ubiquitous tokens that are used in other grammar rules", [&]() {
|
||||
it("preserves them in the syntactic grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", seq({ i_sym(1), str("ab") }) },
|
||||
{ "_rule_B", str("bc") },
|
||||
}, { i_sym(1) }, {}});
|
||||
it("returns an error if any ubiquitous tokens are non-token rules", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{
|
||||
"rule_A",
|
||||
str("x")
|
||||
},
|
||||
{
|
||||
"rule_B",
|
||||
str("y")
|
||||
},
|
||||
}, { choice({ i_sym(1), blank() }) }, {}});
|
||||
|
||||
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
|
||||
|
||||
AssertThat(get<0>(result).ubiquitous_tokens, Equals(set<Symbol>({
|
||||
Symbol(0, SymbolOptionToken),
|
||||
})));
|
||||
|
||||
AssertThat(get<1>(result).separators, IsEmpty());
|
||||
});
|
||||
});
|
||||
|
||||
describe("ubiquitous tokens that are non-token symbols", [&]() {
|
||||
it("returns an error", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", seq({ str("x"), i_sym(1) }), },
|
||||
{ "rule_B", seq({ str("y"), str("z") }) },
|
||||
}, { i_sym(1) }, {}});
|
||||
|
||||
AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(get<2>(result), EqualsPointer(
|
||||
new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
|
||||
"Not a token: rule_B")));
|
||||
});
|
||||
});
|
||||
|
||||
describe("ubiquitous tokens that are not symbols", [&]() {
|
||||
it("returns an error", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", str("x") },
|
||||
{ "rule_B", str("y") },
|
||||
}, { choice({ i_sym(1), blank() }) }, {}});
|
||||
|
||||
AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(get<2>(result), EqualsPointer(
|
||||
new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
|
||||
"Not a token: (choice (sym 1) (blank))")));
|
||||
});
|
||||
AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(get<2>(result), EqualsPointer(
|
||||
new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
|
||||
"Not a token: (choice (sym 1) (blank))")));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue