diff --git a/spec/compiler/build_tables/next_symbols_spec.cpp b/spec/compiler/build_tables/next_symbols_spec.cpp new file mode 100644 index 00000000..df932471 --- /dev/null +++ b/spec/compiler/build_tables/next_symbols_spec.cpp @@ -0,0 +1,38 @@ +#include "spec_helper.h" +#include "build_tables/next_symbols.h" +#include "grammar.h" +#include "rules.h" + +using std::set; +using namespace build_tables; +using namespace rules; + +START_TEST + +describe("computing FIRST sets", []() { + Grammar grammar({ + { "A", choice({ + seq({ + sym("B"), + sym("x"), + sym("B") }), + sym("B") }) }, + { "B", choice({ + seq({ + sym("y"), + sym("z"), + sym("y") }), + sym("y") }) }, + }); + + describe("for a rule", [&]() { + it("searches the tree for terminals", [&]() { + auto terminals = next_terminals(grammar.rules.find("A")->second, grammar); + AssertThat(terminals, Equals(set({ + Symbol("y") + }))); + }); + }); +}); + +END_TEST \ No newline at end of file diff --git a/spec/spec_helper.h b/spec/spec_helper.h index 1dec0ba0..8fce663e 100644 --- a/spec/spec_helper.h +++ b/spec/spec_helper.h @@ -4,6 +4,7 @@ #include "bandit/bandit.h" #include #include +#include #include #include "grammar.h" @@ -26,7 +27,19 @@ namespace std { } return stream << ">"; } - + + template + inline ostream& operator<<(ostream &stream, const set &set) { + stream << string("#"; + } + template inline ostream& operator<<(ostream &stream, const unordered_map &map) { stream << string("# - set next_symbols(const Item &item, const Grammar &grammar) { + class FirstSetVisitor : Visitor { + set value; + const Grammar grammar; + + FirstSetVisitor(const Grammar &grammar) : grammar(grammar) {} + + set set_union(const set &left, const set &right) { + set result = left; + result.insert(right.begin(), right.end()); + return result; + } + + void visit(const Symbol *rule) { + if (grammar.has_definition(*rule)) { + value = apply(grammar.rule(rule->name), grammar); + } else { + value = set({ *rule }); + } + } + + void visit(const Choice *rule) { + value = set_union(apply(rule->left, grammar), apply(rule->right, grammar)); + } + + void visit(const Seq *rule) { + value = apply(rule->left, grammar); + } + + public: + static set apply(const rule_ptr rule, const Grammar &grammar) { + FirstSetVisitor visitor(grammar); + rule->accept(visitor); + return visitor.value; + } + }; + + template + set next_symbols(const rules::rule_ptr &rule, const Grammar &grammar) { set result; - for (auto pair : rule_transitions(item.rule)) { + for (auto pair : rule_transitions(rule)) { auto symbol = dynamic_pointer_cast(pair.first); - if (symbol && (grammar.has_definition(*symbol) == isNonTerminal)) + if (symbol && (grammar.has_definition(*symbol) == !isTerminal)) result.insert(*symbol); } return result; } + set next_terminals(const rules::rule_ptr &rule, const Grammar &grammar) { + return FirstSetVisitor::apply(rule, grammar); + } + + set next_non_terminals(const rules::rule_ptr &rule, const Grammar &grammar) { + return next_symbols(rule, grammar); + } + set next_terminals(const Item &item, const Grammar &grammar) { - return next_symbols(item, grammar); + return next_terminals(item.rule, grammar); } set next_non_terminals(const Item &item, const Grammar &grammar) { - return next_symbols(item, grammar); + return next_non_terminals(item.rule, grammar); } set next_terminals(const ItemSet &item_set, const Grammar &grammar) { diff --git a/src/compiler/build_tables/next_symbols.h b/src/compiler/build_tables/next_symbols.h index 719954e1..6818b525 100644 --- a/src/compiler/build_tables/next_symbols.h +++ b/src/compiler/build_tables/next_symbols.h @@ -9,6 +9,7 @@ namespace tree_sitter { class Grammar; namespace build_tables { + std::set next_terminals(const rules::rule_ptr &rule, const Grammar &grammar); std::set next_terminals(const ItemSet &item_set, const Grammar &grammar); std::set next_terminals(const Item &item, const Grammar &grammar); std::set next_non_terminals(const Item &item, const Grammar &grammar); diff --git a/tree_sitter.xcodeproj/project.pbxproj b/tree_sitter.xcodeproj/project.pbxproj index 4e504d37..56187eb1 100644 --- a/tree_sitter.xcodeproj/project.pbxproj +++ b/tree_sitter.xcodeproj/project.pbxproj @@ -18,6 +18,7 @@ 1251209B1830145300C9B56A /* rule.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209A1830145300C9B56A /* rule.cpp */; }; 125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 125120A3183083BD00C9B56A /* arithmetic.cpp */; }; 12BC470518822B27005AC502 /* parse_config.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12BC470318822A17005AC502 /* parse_config.cpp */; }; + 12BC470718830BC5005AC502 /* next_symbols_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12BC470618830BC5005AC502 /* next_symbols_spec.cpp */; }; 12D136A4183678A2005F3369 /* repeat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A2183678A2005F3369 /* repeat.cpp */; }; 12EDCF8A187B498C005A7A07 /* tree_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12EDCF89187B498C005A7A07 /* tree_spec.cpp */; }; 12EDCF8D187C6282005A7A07 /* document.c in Sources */ = {isa = PBXBuildFile; fileRef = 12EDCF8C187C6282005A7A07 /* document.c */; }; @@ -95,6 +96,7 @@ 125120A218307FFD00C9B56A /* arithmetic.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = arithmetic.h; path = spec/fixtures/grammars/arithmetic.h; sourceTree = SOURCE_ROOT; }; 125120A3183083BD00C9B56A /* arithmetic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = arithmetic.cpp; path = spec/fixtures/grammars/arithmetic.cpp; sourceTree = SOURCE_ROOT; }; 12BC470318822A17005AC502 /* parse_config.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_config.cpp; sourceTree = ""; }; + 12BC470618830BC5005AC502 /* next_symbols_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = next_symbols_spec.cpp; sourceTree = ""; }; 12C344421822F27700B07BE3 /* transition_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = transition_map.h; path = ../build_tables/transition_map.h; sourceTree = ""; }; 12D1369E18342088005F3369 /* todo.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = todo.md; sourceTree = ""; }; 12D136A0183570F5005F3369 /* pattern_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = pattern_spec.cpp; path = spec/compiler/rules/pattern_spec.cpp; sourceTree = SOURCE_ROOT; }; @@ -229,6 +231,7 @@ children = ( 12EDCFB6188205BA005A7A07 /* rule_transitions_spec.cpp */, 12EDCFB7188205BA005A7A07 /* perform_spec.cpp */, + 12BC470618830BC5005AC502 /* next_symbols_spec.cpp */, ); name = build_tables; path = compiler/build_tables; @@ -497,6 +500,7 @@ 12EDCFBC188205BF005A7A07 /* rule_transitions_spec.cpp in Sources */, 12130605182C348F00FCF928 /* character.cpp in Sources */, 12EDCFB418820519005A7A07 /* compile.cpp in Sources */, + 12BC470718830BC5005AC502 /* next_symbols_spec.cpp in Sources */, 1213060B182C389100FCF928 /* symbol.cpp in Sources */, 12EDCFB118820395005A7A07 /* item_set.cpp in Sources */, 1251209B1830145300C9B56A /* rule.cpp in Sources */,