Remove custom LexicalGrammar and SyntaxGrammar constructors
This commit is contained in:
parent
5d41d23ab1
commit
31b2db12d2
15 changed files with 101 additions and 151 deletions
|
|
@ -11,16 +11,16 @@ using namespace build_tables;
|
|||
START_TEST
|
||||
|
||||
describe("build_parse_table", []() {
|
||||
SyntaxGrammar parse_grammar({
|
||||
SyntaxGrammar parse_grammar{{
|
||||
{ "rule0", choice({ i_sym(1), i_sym(2) }) },
|
||||
{ "rule1", i_token(0) },
|
||||
{ "rule2", i_token(1) },
|
||||
}, {}, { Symbol(2, SymbolOptionToken) }, set<set<Symbol>>());
|
||||
}, {}, { Symbol(2, SymbolOptionToken) }, {}};
|
||||
|
||||
LexicalGrammar lex_grammar({
|
||||
LexicalGrammar lex_grammar{{
|
||||
{ "token0", pattern("[a-c]") },
|
||||
{ "token1", pattern("[b-d]") },
|
||||
}, {});
|
||||
}, {}, {}};
|
||||
|
||||
it("first looks for the start rule and its item set closure", [&]() {
|
||||
auto result = build_parse_table(parse_grammar, lex_grammar);
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ using namespace rules;
|
|||
START_TEST
|
||||
|
||||
describe("first_symbols", []() {
|
||||
const SyntaxGrammar null_grammar;
|
||||
SyntaxGrammar null_grammar;
|
||||
|
||||
describe("for a sequence AB", [&]() {
|
||||
it("ignores B when A cannot be blank", [&]() {
|
||||
|
|
@ -40,12 +40,12 @@ describe("first_symbols", []() {
|
|||
i_token(1) }),
|
||||
i_sym(0) });
|
||||
|
||||
SyntaxGrammar grammar({
|
||||
SyntaxGrammar grammar{{
|
||||
{ "rule0", seq({
|
||||
i_token(2),
|
||||
i_token(3),
|
||||
i_token(4) }) }
|
||||
}, {});
|
||||
}, {}, {}, {}};
|
||||
|
||||
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0),
|
||||
|
|
@ -59,11 +59,11 @@ describe("first_symbols", []() {
|
|||
i_sym(0),
|
||||
i_token(1) });
|
||||
|
||||
SyntaxGrammar grammar({
|
||||
SyntaxGrammar grammar{{
|
||||
{ "rule0", choice({
|
||||
i_token(0),
|
||||
blank() }) }
|
||||
}, {});
|
||||
}, {}, {}, {}};
|
||||
|
||||
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0),
|
||||
|
|
@ -75,12 +75,12 @@ describe("first_symbols", []() {
|
|||
|
||||
describe("when there are left-recursive rules", [&]() {
|
||||
it("terminates", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
SyntaxGrammar grammar{{
|
||||
{ "rule0", choice({
|
||||
seq({ i_sym(0), i_token(10) }),
|
||||
i_token(11),
|
||||
}) },
|
||||
}, {});
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto rule = i_sym(0);
|
||||
|
||||
|
|
|
|||
|
|
@ -9,14 +9,14 @@ using namespace rules;
|
|||
START_TEST
|
||||
|
||||
describe("item_set_closure", []() {
|
||||
SyntaxGrammar grammar({
|
||||
SyntaxGrammar grammar{{
|
||||
{ "E", seq({
|
||||
i_sym(1),
|
||||
i_token(11) }) },
|
||||
{ "T", seq({
|
||||
i_token(12),
|
||||
i_token(13) }) },
|
||||
}, {});
|
||||
}, {}, {}, {}};
|
||||
|
||||
it("adds items at the beginnings of referenced rules", [&]() {
|
||||
ParseItemSet item_set = item_set_closure(
|
||||
|
|
|
|||
|
|
@ -42,10 +42,10 @@ describe("char_transitions(LexItemSet)", []() {
|
|||
});
|
||||
|
||||
describe("sym_transitions(ParseItemSet, SyntaxGrammar)", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
{ "A", blank() },
|
||||
{ "B", i_token(21) },
|
||||
}, {}, set<Symbol>(), set<set<Symbol>>());
|
||||
SyntaxGrammar grammar{{
|
||||
{ "A", blank() },
|
||||
{ "B", i_token(21) },
|
||||
}, {}, {}, {}};
|
||||
|
||||
it("computes the closure of the new item sets", [&]() {
|
||||
ParseItemSet set1({
|
||||
|
|
|
|||
|
|
@ -10,10 +10,10 @@ using namespace build_tables;
|
|||
START_TEST
|
||||
|
||||
describe("LexConflictManager", []() {
|
||||
LexicalGrammar lexical_grammar({
|
||||
LexicalGrammar lexical_grammar{{
|
||||
{ "other_token", pattern("[a-b]") },
|
||||
{ "lookahead_token", pattern("[c-d]") },
|
||||
}, {});
|
||||
}, {}, {}};
|
||||
|
||||
LexConflictManager conflict_manager(lexical_grammar);
|
||||
|
||||
|
|
|
|||
|
|
@ -10,13 +10,13 @@ using namespace build_tables;
|
|||
START_TEST
|
||||
|
||||
describe("ParseConflictManager", []() {
|
||||
SyntaxGrammar syntax_grammar({
|
||||
SyntaxGrammar syntax_grammar{{
|
||||
{ "in_progress_rule1", i_token(0) },
|
||||
{ "in_progress_rule2", i_token(0) },
|
||||
{ "reduced_rule", i_token(0) },
|
||||
{ "other_rule1", i_token(0) },
|
||||
{ "other_rule2", i_token(0) },
|
||||
}, {}, { Symbol(2, SymbolOptionToken) }, set<set<Symbol>>());
|
||||
}, {}, { Symbol(2, SymbolOptionToken) }, {}};
|
||||
|
||||
pair<bool, ConflictType> result;
|
||||
Symbol sym1(0);
|
||||
|
|
|
|||
|
|
@ -56,14 +56,14 @@ describe("rule_can_be_blank", [&]() {
|
|||
});
|
||||
|
||||
describe("checking recursively (by expanding non-terminals)", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
{ "A", choice({
|
||||
seq({ i_sym(0), i_token(11) }),
|
||||
blank() }) },
|
||||
{ "B", choice({
|
||||
seq({ i_sym(1), i_token(12) }),
|
||||
i_token(13) }) },
|
||||
}, {}, set<Symbol>(), set<set<Symbol>>());
|
||||
SyntaxGrammar grammar{{
|
||||
{ "A", choice({
|
||||
seq({ i_sym(0), i_token(11) }),
|
||||
blank() }) },
|
||||
{ "B", choice({
|
||||
seq({ i_sym(1), i_token(12) }),
|
||||
i_token(13) }) },
|
||||
}, {}, {}, {}};
|
||||
|
||||
it("terminates for left-recursive rules that can be blank", [&]() {
|
||||
rule = i_sym(0);
|
||||
|
|
|
|||
|
|
@ -10,9 +10,9 @@ using prepare_grammar::expand_repeats;
|
|||
|
||||
describe("expand_repeats", []() {
|
||||
it("replaces repeat rules with pairs of recursive rules", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", repeat(i_token(0)) },
|
||||
}, {}, set<Symbol>(), set<set<Symbol>>());
|
||||
SyntaxGrammar grammar{{
|
||||
{ "rule0", repeat(i_token(0)) },
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
|
|
@ -28,11 +28,11 @@ describe("expand_repeats", []() {
|
|||
});
|
||||
|
||||
it("replaces repeats inside of sequences", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", seq({
|
||||
i_token(10),
|
||||
repeat(i_token(11)) }) },
|
||||
}, {}, set<Symbol>(), set<set<Symbol>>());
|
||||
SyntaxGrammar grammar{{
|
||||
{ "rule0", seq({
|
||||
i_token(10),
|
||||
repeat(i_token(11)) }) },
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
|
|
@ -50,9 +50,9 @@ describe("expand_repeats", []() {
|
|||
});
|
||||
|
||||
it("replaces repeats inside of choices", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", choice({ i_token(10), repeat(i_token(11)) }) },
|
||||
}, {}, set<Symbol>(), set<set<Symbol>>());
|
||||
SyntaxGrammar grammar{{
|
||||
{ "rule0", choice({ i_token(10), repeat(i_token(11)) }) },
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
|
|
@ -68,12 +68,12 @@ describe("expand_repeats", []() {
|
|||
});
|
||||
|
||||
it("does not create redundant auxiliary rules", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", choice({
|
||||
seq({ i_token(1), repeat(i_token(4)) }),
|
||||
seq({ i_token(2), repeat(i_token(4)) }) }) },
|
||||
{ "rule1", seq({ i_token(3), repeat(i_token(4)) }) },
|
||||
}, {}, set<Symbol>(), set<set<Symbol>>());
|
||||
SyntaxGrammar grammar{{
|
||||
{ "rule0", choice({
|
||||
seq({ i_token(1), repeat(i_token(4)) }),
|
||||
seq({ i_token(2), repeat(i_token(4)) }) }) },
|
||||
{ "rule1", seq({ i_token(3), repeat(i_token(4)) }) },
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
|
|
@ -92,11 +92,11 @@ describe("expand_repeats", []() {
|
|||
});
|
||||
|
||||
it("can replace multiple repeats in the same rule", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", seq({
|
||||
repeat(i_token(10)),
|
||||
repeat(i_token(11)) }) },
|
||||
}, {}, set<Symbol>(), set<set<Symbol>>());
|
||||
SyntaxGrammar grammar{{
|
||||
{ "rule0", seq({
|
||||
repeat(i_token(10)),
|
||||
repeat(i_token(11)) }) },
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
|
|
@ -117,10 +117,10 @@ describe("expand_repeats", []() {
|
|||
});
|
||||
|
||||
it("can replace repeats in multiple rules", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", repeat(i_token(10)) },
|
||||
{ "rule1", repeat(i_token(11)) },
|
||||
}, {}, set<Symbol>(), set<set<Symbol>>());
|
||||
SyntaxGrammar grammar{{
|
||||
{ "rule0", repeat(i_token(10)) },
|
||||
{ "rule1", repeat(i_token(11)) },
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
|
|
|
|||
|
|
@ -11,12 +11,12 @@ using prepare_grammar::expand_tokens;
|
|||
describe("expand_tokens", []() {
|
||||
describe("string rules", [&]() {
|
||||
it("replaces strings with sequences of character sets", [&]() {
|
||||
LexicalGrammar grammar({
|
||||
LexicalGrammar grammar{{
|
||||
{ "rule_A", seq({
|
||||
i_sym(10),
|
||||
str("xyz"),
|
||||
i_sym(11) }) },
|
||||
}, {});
|
||||
}, {}, {}};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
|
|
@ -30,10 +30,10 @@ describe("expand_tokens", []() {
|
|||
});
|
||||
|
||||
it("handles strings containing non-ASCII UTF8 characters", [&]() {
|
||||
LexicalGrammar grammar({
|
||||
// α β
|
||||
{ "rule_A", str("\u03B1 \u03B2") },
|
||||
}, {});
|
||||
LexicalGrammar grammar{{
|
||||
// α β
|
||||
{ "rule_A", str("\u03B1 \u03B2") },
|
||||
}, {}, {}};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
|
|
@ -48,12 +48,12 @@ describe("expand_tokens", []() {
|
|||
|
||||
describe("regexp rules", [&]() {
|
||||
it("replaces regexps with the equivalent rule tree", [&]() {
|
||||
LexicalGrammar grammar({
|
||||
{ "rule_A", seq({
|
||||
i_sym(10),
|
||||
pattern("x*"),
|
||||
i_sym(11) }) },
|
||||
}, {});
|
||||
LexicalGrammar grammar{{
|
||||
{ "rule_A", seq({
|
||||
i_sym(10),
|
||||
pattern("x*"),
|
||||
i_sym(11) }) },
|
||||
}, {}, {}};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
|
|
@ -67,10 +67,10 @@ describe("expand_tokens", []() {
|
|||
});
|
||||
|
||||
it("handles regexps containing non-ASCII UTF8 characters", [&]() {
|
||||
LexicalGrammar grammar({
|
||||
// [^α-δ]
|
||||
{ "rule_A", pattern("[^\u03B1-\u03B4]*") },
|
||||
}, {});
|
||||
LexicalGrammar grammar{{
|
||||
// [^α-δ]
|
||||
{ "rule_A", pattern("[^\u03B1-\u03B4]*") },
|
||||
}, {}, {}};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
|
|
@ -80,12 +80,12 @@ describe("expand_tokens", []() {
|
|||
});
|
||||
|
||||
it("returns an error when the grammar contains an invalid regex", [&]() {
|
||||
LexicalGrammar grammar({
|
||||
{ "rule_A", seq({
|
||||
pattern("("),
|
||||
str("xyz"),
|
||||
pattern("[") }) },
|
||||
}, {});
|
||||
LexicalGrammar grammar{{
|
||||
{ "rule_A", seq({
|
||||
pattern("("),
|
||||
str("xyz"),
|
||||
pattern("[") }) },
|
||||
}, {}, {}};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
|
|
|
|||
|
|
@ -11,19 +11,6 @@ using std::pair;
|
|||
using std::vector;
|
||||
using std::set;
|
||||
|
||||
LexicalGrammar::LexicalGrammar() {}
|
||||
|
||||
LexicalGrammar::LexicalGrammar(
|
||||
const vector<pair<string, rules::rule_ptr>> &rules,
|
||||
const vector<pair<string, rules::rule_ptr>> &aux_rules)
|
||||
: rules(rules), aux_rules(aux_rules) {}
|
||||
|
||||
LexicalGrammar::LexicalGrammar(
|
||||
const vector<pair<string, rules::rule_ptr>> &rules,
|
||||
const vector<pair<string, rules::rule_ptr>> &aux_rules,
|
||||
const vector<rules::rule_ptr> &separators)
|
||||
: rules(rules), aux_rules(aux_rules), separators(separators) {}
|
||||
|
||||
const rules::rule_ptr &LexicalGrammar::rule(const rules::Symbol &symbol) const {
|
||||
return symbol.is_auxiliary() ? aux_rules[symbol.index].second
|
||||
: rules[symbol.index].second;
|
||||
|
|
|
|||
|
|
@ -11,20 +11,11 @@ namespace tree_sitter {
|
|||
|
||||
class LexicalGrammar {
|
||||
public:
|
||||
LexicalGrammar();
|
||||
LexicalGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
|
||||
LexicalGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
|
||||
const std::vector<rules::rule_ptr> &separators);
|
||||
|
||||
const std::string &rule_name(const rules::Symbol &symbol) const;
|
||||
const rules::rule_ptr &rule(const rules::Symbol &symbol) const;
|
||||
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> rules;
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;
|
||||
std::vector<std::pair<std::string, rules::rule_ptr>> rules;
|
||||
std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;
|
||||
std::vector<rules::rule_ptr> separators;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -67,19 +67,18 @@ class ExpandRepeats : public rules::IdentityRuleFn {
|
|||
};
|
||||
|
||||
SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
|
||||
vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules);
|
||||
SyntaxGrammar result;
|
||||
result.aux_rules = grammar.aux_rules;
|
||||
result.ubiquitous_tokens = grammar.ubiquitous_tokens;
|
||||
result.expected_conflicts = grammar.expected_conflicts;
|
||||
|
||||
ExpandRepeats expander(aux_rules.size());
|
||||
ExpandRepeats expander(result.aux_rules.size());
|
||||
for (auto &pair : grammar.rules)
|
||||
result.rules.push_back({ pair.first, expander.expand(pair.second, pair.first) });
|
||||
|
||||
for (auto &pair : grammar.rules) {
|
||||
rules.push_back({ pair.first, expander.expand(pair.second, pair.first) });
|
||||
}
|
||||
|
||||
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(),
|
||||
expander.aux_rules.end());
|
||||
|
||||
return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens,
|
||||
grammar.expected_conflicts);
|
||||
result.aux_rules.insert(result.aux_rules.end(), expander.aux_rules.begin(),
|
||||
expander.aux_rules.end());
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -56,34 +56,33 @@ class ExpandTokens : public rules::IdentityRuleFn {
|
|||
ExpandTokens() : error(nullptr) {}
|
||||
};
|
||||
|
||||
pair<LexicalGrammar, const GrammarError *> expand_tokens(
|
||||
const LexicalGrammar &grammar) {
|
||||
vector<pair<string, rule_ptr>> rules, aux_rules;
|
||||
vector<rule_ptr> separators;
|
||||
pair<LexicalGrammar, const GrammarError *>
|
||||
expand_tokens(const LexicalGrammar &grammar) {
|
||||
LexicalGrammar result;
|
||||
ExpandTokens expander;
|
||||
|
||||
for (auto &pair : grammar.rules) {
|
||||
auto rule = expander.apply(pair.second);
|
||||
if (expander.error)
|
||||
return { LexicalGrammar(), expander.error };
|
||||
rules.push_back({ pair.first, rule });
|
||||
return { result, expander.error };
|
||||
result.rules.push_back({ pair.first, rule });
|
||||
}
|
||||
|
||||
for (auto &pair : grammar.aux_rules) {
|
||||
auto rule = expander.apply(pair.second);
|
||||
if (expander.error)
|
||||
return { LexicalGrammar(), expander.error };
|
||||
aux_rules.push_back({ pair.first, rule });
|
||||
return { result, expander.error };
|
||||
result.aux_rules.push_back({ pair.first, rule });
|
||||
}
|
||||
|
||||
for (auto &sep : grammar.separators) {
|
||||
auto rule = expander.apply(sep);
|
||||
if (expander.error)
|
||||
return { LexicalGrammar(), expander.error };
|
||||
separators.push_back(rule);
|
||||
return { result, expander.error };
|
||||
result.separators.push_back(rule);
|
||||
}
|
||||
|
||||
return { LexicalGrammar(rules, aux_rules, separators), nullptr, };
|
||||
return { result, nullptr, };
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -7,22 +7,6 @@
|
|||
namespace tree_sitter {
|
||||
|
||||
using std::string;
|
||||
using std::pair;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
|
||||
SyntaxGrammar::SyntaxGrammar() {}
|
||||
|
||||
SyntaxGrammar::SyntaxGrammar(const vector<pair<string, rules::rule_ptr>> &rules,
|
||||
const vector<pair<string, rules::rule_ptr>> &aux_rules)
|
||||
: rules(rules), aux_rules(aux_rules) {}
|
||||
|
||||
SyntaxGrammar::SyntaxGrammar(const vector<pair<string, rules::rule_ptr>> &rules,
|
||||
const vector<pair<string, rules::rule_ptr>> &aux_rules,
|
||||
const set<rules::Symbol> &ubiquitous_tokens,
|
||||
const set<set<rules::Symbol>> &expected_conflicts)
|
||||
: rules(rules), aux_rules(aux_rules),
|
||||
ubiquitous_tokens(ubiquitous_tokens), expected_conflicts(expected_conflicts) {}
|
||||
|
||||
const rules::rule_ptr &SyntaxGrammar::rule(const rules::Symbol &symbol) const {
|
||||
return symbol.is_auxiliary() ? aux_rules[symbol.index].second
|
||||
|
|
|
|||
|
|
@ -12,21 +12,11 @@ namespace tree_sitter {
|
|||
|
||||
class SyntaxGrammar {
|
||||
public:
|
||||
SyntaxGrammar();
|
||||
SyntaxGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
|
||||
SyntaxGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
|
||||
const std::set<rules::Symbol> &ubiquitous_tokens,
|
||||
const std::set<std::set<rules::Symbol>> &expected_conflicts);
|
||||
|
||||
const std::string &rule_name(const rules::Symbol &symbol) const;
|
||||
const rules::rule_ptr &rule(const rules::Symbol &symbol) const;
|
||||
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> rules;
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;
|
||||
std::vector<std::pair<std::string, rules::rule_ptr>> rules;
|
||||
std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;
|
||||
std::set<rules::Symbol> ubiquitous_tokens;
|
||||
std::set<std::set<rules::Symbol>> expected_conflicts;
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue