Fix bug in FIRST set function

This commit is contained in:
Max Brunsfeld 2014-02-13 18:52:17 -08:00
parent 2ee9455174
commit bf07522026
3 changed files with 62 additions and 36 deletions

View file

@ -10,39 +10,60 @@ using namespace rules;
START_TEST
describe("computing FIRST sets", []() {
Grammar grammar({
{ "A", choice({
seq({
sym("B"),
sym("x"),
sym("B") }),
sym("B") }) },
{ "B", choice({
seq({
sym("y"),
sym("z"),
sym("y") }),
sym("y") }) },
{ "C", seq({
choice({
sym("x"),
blank() }),
sym("y") }) }
});
describe("for a rule starting with a non-terminal B", [&]() {
it("includes FIRST(B)", [&]() {
auto terminals = first_set(grammar.rules.find("A")->second, grammar);
AssertThat(terminals, Equals(set<Symbol>({
const Grammar null_grammar({{ "something", blank() }});
describe("for a sequence AB", [&]() {
it("ignores B when A cannot be blank", [&]() {
auto rule = seq({ sym("x"), sym("y") });
AssertThat(first_set(rule, null_grammar), Equals(set<Symbol>({
Symbol("x"),
})));
});
it("includes FIRST(B) when A can be blank", [&]() {
auto rule = seq({
choice({
sym("x"),
blank() }),
sym("y") });
AssertThat(first_set(rule, null_grammar), Equals(set<Symbol>({
Symbol("x"),
Symbol("y")
})));
});
});
describe("for a sequence xy", [&]() {
it("includes FIRST(y) when x can be blank", [&]() {
auto terminals = first_set(grammar.rules.find("C")->second, grammar);
AssertThat(terminals, Equals(set<Symbol>({
it("includes FIRST(A's right hand side) when A is a non-terminal", [&]() {
auto rule = choice({
seq({
sym("A"),
sym("x"),
sym("A") }),
sym("A") });
Grammar grammar({
{ "A", choice({
seq({
sym("y"),
sym("z"),
sym("y") }),
sym("y") }) }
});
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
Symbol("y")
})));
});
it("includes FIRST(B) when A is a non-terminal and its expansion can be blank", [&]() {
Grammar grammar({{ "A", choice({ sym("x"), blank() }) }});
auto rule = seq({
sym("A"),
sym("y") });
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
Symbol("x"),
Symbol("y")
})));

View file

@ -32,11 +32,16 @@ namespace tree_sitter {
value = set_union(apply(rule->left, grammar), apply(rule->right, grammar));
}
bool can_be_blank(const rule_ptr &rule) {
if (rule_can_be_blank(rule)) return true;
auto symbol = std::dynamic_pointer_cast<const Symbol>(rule);
return (symbol.get() && grammar.has_definition(*symbol) && rule_can_be_blank(grammar.rule(*symbol)));
}
void visit(const Seq *rule) {
if (rule_can_be_blank(rule->left)) {
value = set_union(apply(rule->left, grammar), apply(rule->right, grammar));
} else {
value = apply(rule->left, grammar);
value = apply(rule->left, grammar);
if (can_be_blank(rule->left)) {
value = set_union(value, apply(rule->right, grammar));
}
}

View file

@ -8,8 +8,8 @@ namespace tree_sitter {
using rules::Symbol;
namespace build_tables {
static bool contains(ParseItemSet items, ParseItem item) {
return (std::find(items.begin(), items.end(), item) != items.end());
static bool contains(const ParseItemSet &items, const ParseItem &item) {
return items.size() > 0 && (std::find(items.begin(), items.end(), item) != items.end());
}
static void add_item(ParseItemSet &item_set, const ParseItem &item, const Grammar &grammar) {