diff --git a/spec/compiler/build_tables/first_set_spec.cpp b/spec/compiler/build_tables/first_set_spec.cpp index 3776c2f4..dfd84aab 100644 --- a/spec/compiler/build_tables/first_set_spec.cpp +++ b/spec/compiler/build_tables/first_set_spec.cpp @@ -10,39 +10,60 @@ using namespace rules; START_TEST describe("computing FIRST sets", []() { - Grammar grammar({ - { "A", choice({ - seq({ - sym("B"), - sym("x"), - sym("B") }), - sym("B") }) }, - { "B", choice({ - seq({ - sym("y"), - sym("z"), - sym("y") }), - sym("y") }) }, - { "C", seq({ - choice({ - sym("x"), - blank() }), - sym("y") }) } - }); - - describe("for a rule starting with a non-terminal B", [&]() { - it("includes FIRST(B)", [&]() { - auto terminals = first_set(grammar.rules.find("A")->second, grammar); - AssertThat(terminals, Equals(set({ + const Grammar null_grammar({{ "something", blank() }}); + + describe("for a sequence AB", [&]() { + it("ignores B when A cannot be blank", [&]() { + auto rule = seq({ sym("x"), sym("y") }); + + AssertThat(first_set(rule, null_grammar), Equals(set({ + Symbol("x"), + }))); + }); + + it("includes FIRST(B) when A can be blank", [&]() { + auto rule = seq({ + choice({ + sym("x"), + blank() }), + sym("y") }); + + AssertThat(first_set(rule, null_grammar), Equals(set({ + Symbol("x"), Symbol("y") }))); }); - }); - - describe("for a sequence xy", [&]() { - it("includes FIRST(y) when x can be blank", [&]() { - auto terminals = first_set(grammar.rules.find("C")->second, grammar); - AssertThat(terminals, Equals(set({ + + it("includes FIRST(A's right hand side) when A is a non-terminal", [&]() { + auto rule = choice({ + seq({ + sym("A"), + sym("x"), + sym("A") }), + sym("A") }); + + Grammar grammar({ + { "A", choice({ + seq({ + sym("y"), + sym("z"), + sym("y") }), + sym("y") }) } + }); + + AssertThat(first_set(rule, grammar), Equals(set({ + Symbol("y") + }))); + }); + + it("includes FIRST(B) when A is a non-terminal and its expansion can be blank", [&]() { + Grammar grammar({{ "A", choice({ sym("x"), blank() }) }}); + + auto rule = seq({ + sym("A"), + sym("y") }); + + AssertThat(first_set(rule, grammar), Equals(set({ Symbol("x"), Symbol("y") }))); diff --git a/src/compiler/build_tables/first_set.cpp b/src/compiler/build_tables/first_set.cpp index 440725ab..850786e2 100644 --- a/src/compiler/build_tables/first_set.cpp +++ b/src/compiler/build_tables/first_set.cpp @@ -32,11 +32,16 @@ namespace tree_sitter { value = set_union(apply(rule->left, grammar), apply(rule->right, grammar)); } + bool can_be_blank(const rule_ptr &rule) { + if (rule_can_be_blank(rule)) return true; + auto symbol = std::dynamic_pointer_cast(rule); + return (symbol.get() && grammar.has_definition(*symbol) && rule_can_be_blank(grammar.rule(*symbol))); + } + void visit(const Seq *rule) { - if (rule_can_be_blank(rule->left)) { - value = set_union(apply(rule->left, grammar), apply(rule->right, grammar)); - } else { - value = apply(rule->left, grammar); + value = apply(rule->left, grammar); + if (can_be_blank(rule->left)) { + value = set_union(value, apply(rule->right, grammar)); } } diff --git a/src/compiler/build_tables/item_set_closure.cpp b/src/compiler/build_tables/item_set_closure.cpp index 847fd72f..ca2e1170 100644 --- a/src/compiler/build_tables/item_set_closure.cpp +++ b/src/compiler/build_tables/item_set_closure.cpp @@ -8,8 +8,8 @@ namespace tree_sitter { using rules::Symbol; namespace build_tables { - static bool contains(ParseItemSet items, ParseItem item) { - return (std::find(items.begin(), items.end(), item) != items.end()); + static bool contains(const ParseItemSet &items, const ParseItem &item) { + return items.size() > 0 && (std::find(items.begin(), items.end(), item) != items.end()); } static void add_item(ParseItemSet &item_set, const ParseItem &item, const Grammar &grammar) {