Allow users to put their own auxiliary rules in grammars
This commit is contained in:
parent
812f27f43a
commit
a77ca1ee08
3 changed files with 138 additions and 109 deletions
|
|
@ -7,112 +7,135 @@ using namespace rules;
|
|||
using prepare_grammar::perform;
|
||||
|
||||
describe("preparing a grammar", []() {
|
||||
it("moves sub-rules that don't contain symbols into a separate 'lexical' grammar", [&]() {
|
||||
pair<Grammar, Grammar> result = perform(Grammar({
|
||||
{ "rule1", seq({
|
||||
character('a'),
|
||||
character('b'),
|
||||
seq({
|
||||
sym("rule2"),
|
||||
sym("rule3") }),
|
||||
seq({
|
||||
describe("extracting tokens", []() {
|
||||
it("moves sub-rules that don't contain symbols into a separate 'lexical' grammar", [&]() {
|
||||
pair<Grammar, Grammar> result = perform(Grammar({
|
||||
{ "rule1", seq({
|
||||
character('a'),
|
||||
character('b') }) }) }
|
||||
}));
|
||||
character('b'),
|
||||
seq({
|
||||
sym("rule2"),
|
||||
sym("rule3") }),
|
||||
seq({
|
||||
character('a'),
|
||||
character('b') }) }) }
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(Grammar({
|
||||
{ "rule1", seq({
|
||||
aux_sym("token1"),
|
||||
seq({
|
||||
sym("rule2"),
|
||||
sym("rule3") }),
|
||||
aux_sym("token1") }) }
|
||||
})));
|
||||
|
||||
AssertThat(result.second, Equals(Grammar("", map<const string, const rule_ptr>(), {
|
||||
{ "token1", rules::seq({
|
||||
rules::character('a'),
|
||||
rules::character('b') }) },
|
||||
})));
|
||||
});
|
||||
|
||||
AssertThat(result.first, Equals(Grammar({
|
||||
{ "rule1", seq({
|
||||
aux_sym("token1"),
|
||||
seq({
|
||||
sym("rule2"),
|
||||
sym("rule3") }),
|
||||
aux_sym("token1") }) }
|
||||
})));
|
||||
it("moves entire rules into the lexical grammar when possible, preserving their names", [&]() {
|
||||
auto result = perform(Grammar({
|
||||
{ "rule1", sym("rule2") },
|
||||
{ "rule2", seq({
|
||||
character('a'),
|
||||
character('b') }) }
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(Grammar({
|
||||
{ "rule1", sym("rule2") }
|
||||
})));
|
||||
|
||||
AssertThat(result.second, Equals(Grammar("", {
|
||||
{ "rule2", seq({
|
||||
character('a'),
|
||||
character('b') }) },
|
||||
})));
|
||||
});
|
||||
|
||||
AssertThat(result.second, Equals(Grammar("", map<const string, const rule_ptr>(), {
|
||||
{ "token1", rules::seq({
|
||||
rules::character('a'),
|
||||
rules::character('b') }) },
|
||||
})));
|
||||
});
|
||||
|
||||
it("moves entire rules into the lexical grammar when possible, preserving their names", [&]() {
|
||||
auto result = perform(Grammar({
|
||||
{ "rule1", sym("rule2") },
|
||||
{ "rule2", seq({
|
||||
character('a'),
|
||||
character('b') }) }
|
||||
}));
|
||||
it("moves parts of auxiliary rules into auxiliary lexical rules", []() {
|
||||
auto result = perform(Grammar("rule1", map<const string, const rule_ptr>(), {
|
||||
{ "rule1", sym("rule2") },
|
||||
{ "rule2", seq({
|
||||
character('a'),
|
||||
character('b') }) }
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(Grammar("rule1", map<const string, const rule_ptr>(), {
|
||||
{ "rule1", sym("rule2") }
|
||||
})));
|
||||
|
||||
AssertThat(result.second, Equals(Grammar("", map<const string, const rule_ptr>(), {
|
||||
{ "rule2", seq({
|
||||
character('a'),
|
||||
character('b') }) },
|
||||
})));
|
||||
});
|
||||
|
||||
AssertThat(result.first, Equals(Grammar({
|
||||
{ "rule1", sym("rule2") }
|
||||
})));
|
||||
|
||||
AssertThat(result.second, Equals(Grammar("", {
|
||||
{ "rule2", seq({
|
||||
character('a'),
|
||||
character('b') }) },
|
||||
})));
|
||||
});
|
||||
|
||||
it("replaces repeat rules with pairs of recursive rules", [&]() {
|
||||
Grammar result = perform(Grammar({
|
||||
{ "rule1", seq({
|
||||
sym("x"),
|
||||
repeat(seq({ sym("a"), sym("b") })),
|
||||
sym("y")
|
||||
}) },
|
||||
})).first;
|
||||
|
||||
AssertThat(result, Equals(Grammar("rule1", {
|
||||
{ "rule1", seq({
|
||||
sym("x"),
|
||||
aux_sym("repeat_helper1"),
|
||||
sym("y")
|
||||
}) },
|
||||
}, {
|
||||
{ "repeat_helper1", seq({
|
||||
seq({ sym("a"), sym("b") }),
|
||||
choice({
|
||||
aux_sym("repeat_helper1") ,
|
||||
blank()
|
||||
}),
|
||||
}) }
|
||||
})));
|
||||
it("does not extract blanks into tokens", [&]() {
|
||||
pair<Grammar, Grammar> result = perform(Grammar({
|
||||
{ "rule1", choice({ sym("rule2"), blank() }) },
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(Grammar("rule1", {
|
||||
{ "rule1", choice({ sym("rule2"), blank() }) },
|
||||
})));
|
||||
|
||||
AssertThat(result.second, Equals(Grammar("", map<const string, const rule_ptr>())));
|
||||
});
|
||||
});
|
||||
|
||||
it("does not replace repeat rules that can be moved into the lexical grammar", [&]() {
|
||||
pair<Grammar, Grammar> result = perform(Grammar({
|
||||
{ "rule1", seq({
|
||||
sym("x"),
|
||||
repeat(seq({ str("a"), str("b") })),
|
||||
sym("y")
|
||||
}) },
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(Grammar("rule1", {
|
||||
{ "rule1", seq({
|
||||
sym("x"),
|
||||
aux_sym("token1"),
|
||||
sym("y")
|
||||
}) },
|
||||
})));
|
||||
describe("expanding repeats", []() {
|
||||
it("replaces repeat rules with pairs of recursive rules", [&]() {
|
||||
Grammar result = perform(Grammar({
|
||||
{ "rule1", seq({
|
||||
sym("x"),
|
||||
repeat(seq({ sym("a"), sym("b") })),
|
||||
sym("y")
|
||||
}) },
|
||||
})).first;
|
||||
|
||||
AssertThat(result, Equals(Grammar("rule1", {
|
||||
{ "rule1", seq({
|
||||
sym("x"),
|
||||
aux_sym("repeat_helper1"),
|
||||
sym("y")
|
||||
}) },
|
||||
}, {
|
||||
{ "repeat_helper1", seq({
|
||||
seq({ sym("a"), sym("b") }),
|
||||
choice({
|
||||
aux_sym("repeat_helper1"),
|
||||
blank(),
|
||||
}),
|
||||
}) }
|
||||
})));
|
||||
});
|
||||
|
||||
AssertThat(result.second, Equals(Grammar("", map<const string, const rule_ptr>(), {
|
||||
{ "token1", repeat(seq({ str("a"), str("b") })) },
|
||||
})));
|
||||
});
|
||||
|
||||
it("does not extract blanks into tokens", [&]() {
|
||||
pair<Grammar, Grammar> result = perform(Grammar({
|
||||
{ "rule1", choice({ sym("rule2"), blank() }) },
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(Grammar("rule1", {
|
||||
{ "rule1", choice({ sym("rule2"), blank() }) },
|
||||
})));
|
||||
|
||||
AssertThat(result.second, Equals(Grammar("", map<const string, const rule_ptr>())));
|
||||
it("does not replace repeat rules that can be moved into the lexical grammar", [&]() {
|
||||
pair<Grammar, Grammar> result = perform(Grammar({
|
||||
{ "rule1", seq({
|
||||
sym("x"),
|
||||
repeat(seq({ str("a"), str("b") })),
|
||||
sym("y")
|
||||
}) },
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(Grammar("rule1", {
|
||||
{ "rule1", seq({
|
||||
sym("x"),
|
||||
aux_sym("token1"),
|
||||
sym("y")
|
||||
}) },
|
||||
})));
|
||||
|
||||
AssertThat(result.second, Equals(Grammar("", map<const string, const rule_ptr>(), {
|
||||
{ "token1", repeat(seq({ str("a"), str("b") })) },
|
||||
})));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -46,13 +46,16 @@ namespace tree_sitter {
|
|||
};
|
||||
|
||||
Grammar expand_repeats(const Grammar &grammar) {
|
||||
map<const string, const rule_ptr> result;
|
||||
map<const string, const rule_ptr> rules;
|
||||
map<const string, const rule_ptr> aux_rules(grammar.aux_rules);
|
||||
RepeatExpander visitor;
|
||||
|
||||
for (auto pair : grammar.rules)
|
||||
result.insert({ pair.first, visitor.apply(pair.second) });
|
||||
rules.insert({ pair.first, visitor.apply(pair.second) });
|
||||
|
||||
aux_rules.insert(visitor.aux_rules.begin(), visitor.aux_rules.end());
|
||||
|
||||
return Grammar(grammar.start_rule_name, result, visitor.aux_rules);
|
||||
return Grammar(grammar.start_rule_name, rules, aux_rules);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -62,13 +62,14 @@ namespace tree_sitter {
|
|||
pair<Grammar, Grammar> extract_tokens(const Grammar &input_grammar) {
|
||||
TokenExtractor extractor;
|
||||
map<const string, const rule_ptr> rules;
|
||||
map<const string, const rule_ptr> aux_rules;
|
||||
map<const string, const rule_ptr> tokens;
|
||||
map<const string, const rule_ptr> aux_rules;
|
||||
map<const string, const rule_ptr> aux_tokens;
|
||||
|
||||
for (auto pair : input_grammar.rules) {
|
||||
string name = pair.first;
|
||||
rule_ptr rule = pair.second;
|
||||
auto new_rule = extractor.initial_apply(rule);
|
||||
rule_ptr new_rule = extractor.initial_apply(rule);
|
||||
if (new_rule.get())
|
||||
rules.insert({ name, new_rule });
|
||||
else
|
||||
|
|
@ -78,16 +79,18 @@ namespace tree_sitter {
|
|||
for (auto pair : input_grammar.aux_rules) {
|
||||
string name = pair.first;
|
||||
rule_ptr rule = pair.second;
|
||||
auto new_rule = extractor.initial_apply(rule);
|
||||
rule_ptr new_rule = extractor.initial_apply(rule);
|
||||
if (new_rule.get())
|
||||
aux_rules.insert({ name, new_rule });
|
||||
else
|
||||
tokens.insert({ name, rule });
|
||||
aux_tokens.insert({ name, rule });
|
||||
}
|
||||
|
||||
|
||||
aux_tokens.insert(extractor.tokens.begin(), extractor.tokens.end());
|
||||
|
||||
return {
|
||||
Grammar(input_grammar.start_rule_name, rules),
|
||||
Grammar("", tokens, extractor.tokens)
|
||||
Grammar(input_grammar.start_rule_name, rules, aux_rules),
|
||||
Grammar("", tokens, aux_tokens)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue