Allow users to put their own auxiliary rules in grammars

This commit is contained in:
Max Brunsfeld 2014-02-13 13:09:00 -08:00
parent 812f27f43a
commit a77ca1ee08
3 changed files with 138 additions and 109 deletions

View file

@ -7,112 +7,135 @@ using namespace rules;
using prepare_grammar::perform;
describe("preparing a grammar", []() {
it("moves sub-rules that don't contain symbols into a separate 'lexical' grammar", [&]() {
pair<Grammar, Grammar> result = perform(Grammar({
{ "rule1", seq({
character('a'),
character('b'),
seq({
sym("rule2"),
sym("rule3") }),
seq({
describe("extracting tokens", []() {
it("moves sub-rules that don't contain symbols into a separate 'lexical' grammar", [&]() {
pair<Grammar, Grammar> result = perform(Grammar({
{ "rule1", seq({
character('a'),
character('b') }) }) }
}));
character('b'),
seq({
sym("rule2"),
sym("rule3") }),
seq({
character('a'),
character('b') }) }) }
}));
AssertThat(result.first, Equals(Grammar({
{ "rule1", seq({
aux_sym("token1"),
seq({
sym("rule2"),
sym("rule3") }),
aux_sym("token1") }) }
})));
AssertThat(result.second, Equals(Grammar("", map<const string, const rule_ptr>(), {
{ "token1", rules::seq({
rules::character('a'),
rules::character('b') }) },
})));
});
AssertThat(result.first, Equals(Grammar({
{ "rule1", seq({
aux_sym("token1"),
seq({
sym("rule2"),
sym("rule3") }),
aux_sym("token1") }) }
})));
it("moves entire rules into the lexical grammar when possible, preserving their names", [&]() {
auto result = perform(Grammar({
{ "rule1", sym("rule2") },
{ "rule2", seq({
character('a'),
character('b') }) }
}));
AssertThat(result.first, Equals(Grammar({
{ "rule1", sym("rule2") }
})));
AssertThat(result.second, Equals(Grammar("", {
{ "rule2", seq({
character('a'),
character('b') }) },
})));
});
AssertThat(result.second, Equals(Grammar("", map<const string, const rule_ptr>(), {
{ "token1", rules::seq({
rules::character('a'),
rules::character('b') }) },
})));
});
it("moves entire rules into the lexical grammar when possible, preserving their names", [&]() {
auto result = perform(Grammar({
{ "rule1", sym("rule2") },
{ "rule2", seq({
character('a'),
character('b') }) }
}));
it("moves parts of auxiliary rules into auxiliary lexical rules", []() {
auto result = perform(Grammar("rule1", map<const string, const rule_ptr>(), {
{ "rule1", sym("rule2") },
{ "rule2", seq({
character('a'),
character('b') }) }
}));
AssertThat(result.first, Equals(Grammar("rule1", map<const string, const rule_ptr>(), {
{ "rule1", sym("rule2") }
})));
AssertThat(result.second, Equals(Grammar("", map<const string, const rule_ptr>(), {
{ "rule2", seq({
character('a'),
character('b') }) },
})));
});
AssertThat(result.first, Equals(Grammar({
{ "rule1", sym("rule2") }
})));
AssertThat(result.second, Equals(Grammar("", {
{ "rule2", seq({
character('a'),
character('b') }) },
})));
});
it("replaces repeat rules with pairs of recursive rules", [&]() {
Grammar result = perform(Grammar({
{ "rule1", seq({
sym("x"),
repeat(seq({ sym("a"), sym("b") })),
sym("y")
}) },
})).first;
AssertThat(result, Equals(Grammar("rule1", {
{ "rule1", seq({
sym("x"),
aux_sym("repeat_helper1"),
sym("y")
}) },
}, {
{ "repeat_helper1", seq({
seq({ sym("a"), sym("b") }),
choice({
aux_sym("repeat_helper1") ,
blank()
}),
}) }
})));
it("does not extract blanks into tokens", [&]() {
pair<Grammar, Grammar> result = perform(Grammar({
{ "rule1", choice({ sym("rule2"), blank() }) },
}));
AssertThat(result.first, Equals(Grammar("rule1", {
{ "rule1", choice({ sym("rule2"), blank() }) },
})));
AssertThat(result.second, Equals(Grammar("", map<const string, const rule_ptr>())));
});
});
it("does not replace repeat rules that can be moved into the lexical grammar", [&]() {
pair<Grammar, Grammar> result = perform(Grammar({
{ "rule1", seq({
sym("x"),
repeat(seq({ str("a"), str("b") })),
sym("y")
}) },
}));
AssertThat(result.first, Equals(Grammar("rule1", {
{ "rule1", seq({
sym("x"),
aux_sym("token1"),
sym("y")
}) },
})));
describe("expanding repeats", []() {
it("replaces repeat rules with pairs of recursive rules", [&]() {
Grammar result = perform(Grammar({
{ "rule1", seq({
sym("x"),
repeat(seq({ sym("a"), sym("b") })),
sym("y")
}) },
})).first;
AssertThat(result, Equals(Grammar("rule1", {
{ "rule1", seq({
sym("x"),
aux_sym("repeat_helper1"),
sym("y")
}) },
}, {
{ "repeat_helper1", seq({
seq({ sym("a"), sym("b") }),
choice({
aux_sym("repeat_helper1"),
blank(),
}),
}) }
})));
});
AssertThat(result.second, Equals(Grammar("", map<const string, const rule_ptr>(), {
{ "token1", repeat(seq({ str("a"), str("b") })) },
})));
});
it("does not extract blanks into tokens", [&]() {
pair<Grammar, Grammar> result = perform(Grammar({
{ "rule1", choice({ sym("rule2"), blank() }) },
}));
AssertThat(result.first, Equals(Grammar("rule1", {
{ "rule1", choice({ sym("rule2"), blank() }) },
})));
AssertThat(result.second, Equals(Grammar("", map<const string, const rule_ptr>())));
it("does not replace repeat rules that can be moved into the lexical grammar", [&]() {
pair<Grammar, Grammar> result = perform(Grammar({
{ "rule1", seq({
sym("x"),
repeat(seq({ str("a"), str("b") })),
sym("y")
}) },
}));
AssertThat(result.first, Equals(Grammar("rule1", {
{ "rule1", seq({
sym("x"),
aux_sym("token1"),
sym("y")
}) },
})));
AssertThat(result.second, Equals(Grammar("", map<const string, const rule_ptr>(), {
{ "token1", repeat(seq({ str("a"), str("b") })) },
})));
});
});
});

View file

@ -46,13 +46,16 @@ namespace tree_sitter {
};
Grammar expand_repeats(const Grammar &grammar) {
map<const string, const rule_ptr> result;
map<const string, const rule_ptr> rules;
map<const string, const rule_ptr> aux_rules(grammar.aux_rules);
RepeatExpander visitor;
for (auto pair : grammar.rules)
result.insert({ pair.first, visitor.apply(pair.second) });
rules.insert({ pair.first, visitor.apply(pair.second) });
aux_rules.insert(visitor.aux_rules.begin(), visitor.aux_rules.end());
return Grammar(grammar.start_rule_name, result, visitor.aux_rules);
return Grammar(grammar.start_rule_name, rules, aux_rules);
}
}
}

View file

@ -62,13 +62,14 @@ namespace tree_sitter {
pair<Grammar, Grammar> extract_tokens(const Grammar &input_grammar) {
TokenExtractor extractor;
map<const string, const rule_ptr> rules;
map<const string, const rule_ptr> aux_rules;
map<const string, const rule_ptr> tokens;
map<const string, const rule_ptr> aux_rules;
map<const string, const rule_ptr> aux_tokens;
for (auto pair : input_grammar.rules) {
string name = pair.first;
rule_ptr rule = pair.second;
auto new_rule = extractor.initial_apply(rule);
rule_ptr new_rule = extractor.initial_apply(rule);
if (new_rule.get())
rules.insert({ name, new_rule });
else
@ -78,16 +79,18 @@ namespace tree_sitter {
for (auto pair : input_grammar.aux_rules) {
string name = pair.first;
rule_ptr rule = pair.second;
auto new_rule = extractor.initial_apply(rule);
rule_ptr new_rule = extractor.initial_apply(rule);
if (new_rule.get())
aux_rules.insert({ name, new_rule });
else
tokens.insert({ name, rule });
aux_tokens.insert({ name, rule });
}
aux_tokens.insert(extractor.tokens.begin(), extractor.tokens.end());
return {
Grammar(input_grammar.start_rule_name, rules),
Grammar("", tokens, extractor.tokens)
Grammar(input_grammar.start_rule_name, rules, aux_rules),
Grammar("", tokens, aux_tokens)
};
}
}