diff --git a/spec/compiler/build_tables/follow_sets_spec.cpp b/spec/compiler/build_tables/follow_sets_spec.cpp new file mode 100644 index 00000000..be538084 --- /dev/null +++ b/spec/compiler/build_tables/follow_sets_spec.cpp @@ -0,0 +1,51 @@ +#include "spec_helper.h" +#include "prepared_grammar.h" +#include "build_tables/follow_sets.h" + +using std::set; +using namespace build_tables; +using namespace rules; + +START_TEST + +describe("computing FOLLOW sets", []() { + const PreparedGrammar grammar("", { + { "A", sym("a") }, + { "B", sym("b") }, + }, {}); + + it("all of the starting non-terminals for the item, and their following terminals", [&]() { + ParseItem item(Symbol("C"), choice({ + seq({ sym("A"), choice({ sym("x"), sym("y") }) }), + seq({ sym("B"), sym("z") }), + }), {}, Symbol("w")); + + AssertThat(follow_sets(item, grammar), Equals(map>({ + { Symbol("A"), set({ Symbol("x"), Symbol("y") }) }, + { Symbol("B"), set({ Symbol("z") }) }, + }))); + }); + + it("does not include terminals at the beginning of the item", [&]() { + ParseItem item(Symbol("C"), choice({ + seq({ sym("A"), choice({ sym("x"), sym("y") }) }), + seq({ sym("x"), sym("y") }), + }), {}, Symbol("w")); + + AssertThat(follow_sets(item, grammar), Equals(map>({ + { Symbol("A"), set({ Symbol("x"), Symbol("y") }) }, + }))); + }); + + it("includes the item's lookahead terminal if the rule after the non-terminal might be blank", [&]() { + ParseItem item(Symbol("C"), choice({ + seq({ sym("A"), choice({ sym("x"), blank() }) }), + }), {}, Symbol("w")); + + AssertThat(follow_sets(item, grammar), Equals(map>({ + { Symbol("A"), set({ Symbol("x"), Symbol("w") }) }, + }))); + }); +}); + +END_TEST \ No newline at end of file diff --git a/src/compiler/build_tables/first_set.h b/src/compiler/build_tables/first_set.h index f755e836..860bf572 100644 --- a/src/compiler/build_tables/first_set.h +++ b/src/compiler/build_tables/first_set.h @@ -8,7 +8,19 @@ namespace tree_sitter { class PreparedGrammar; namespace build_tables { + + /* + * Returns the set of terminal symbols that can appear at + * the beginning of a string derivable from a given rule, + * in a given gramamr. + */ std::set first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar); + + /* + * Returns the set of terminal symbols that can appear at + * the beginning of any item in the given set. + */ +// std::set first_set(const ParseItemSet &item_set, const PreparedGrammar &grammar); } } diff --git a/src/compiler/build_tables/follow_sets.cpp b/src/compiler/build_tables/follow_sets.cpp index 8f52a4f9..1e3f48b3 100644 --- a/src/compiler/build_tables/follow_sets.cpp +++ b/src/compiler/build_tables/follow_sets.cpp @@ -18,10 +18,10 @@ namespace tree_sitter { Symbol symbol = pair.first; rule_ptr next_rule = pair.second; if (grammar.has_definition(symbol)) { - set following_non_terminals = first_set(next_rule, grammar); + set following_terminals = first_set(next_rule, grammar); if (rule_can_be_blank(next_rule, grammar)) - following_non_terminals.insert(item.lookahead_sym); - result.insert({ symbol, following_non_terminals }); + following_terminals.insert(item.lookahead_sym); + result.insert({ symbol, following_terminals }); } } diff --git a/src/compiler/build_tables/follow_sets.h b/src/compiler/build_tables/follow_sets.h index cc633438..d715be3b 100644 --- a/src/compiler/build_tables/follow_sets.h +++ b/src/compiler/build_tables/follow_sets.h @@ -10,7 +10,15 @@ namespace tree_sitter { class PreparedGrammar; namespace build_tables { + + /* + * Returns a map of non-terminal symbols to sets of terminal symbols. + * The keys are the non-terminals which may appear first in the given + * item. The values are the sets of terminals which can appear immediately + * after the corresponding non-terminals. + */ std::map> follow_sets(const ParseItem &item, const PreparedGrammar &grammar); + } } diff --git a/src/compiler/build_tables/item_set_closure.cpp b/src/compiler/build_tables/item_set_closure.cpp index de417ac8..5fc39333 100644 --- a/src/compiler/build_tables/item_set_closure.cpp +++ b/src/compiler/build_tables/item_set_closure.cpp @@ -17,7 +17,7 @@ namespace tree_sitter { static void add_item(ParseItemSet &item_set, const ParseItem &item, const PreparedGrammar &grammar) { if (!contains(item_set, item)) { item_set.insert(item); - for (auto pair : follow_sets(item, grammar)) { + for (auto &pair : follow_sets(item, grammar)) { Symbol non_terminal = pair.first; set terminals = pair.second; for (auto &terminal : terminals) { diff --git a/src/compiler/build_tables/merge_transitions.h b/src/compiler/build_tables/merge_transitions.h index 0d6c3e61..26d8bdf8 100644 --- a/src/compiler/build_tables/merge_transitions.h +++ b/src/compiler/build_tables/merge_transitions.h @@ -7,6 +7,13 @@ namespace tree_sitter { namespace build_tables { + + /* + * Merges two transition maps with symbol keys. If both maps + * contain values for the same symbol, the new value for that + * symbol will be computed by merging the two previous values + * using the given function. + */ template std::map merge_sym_transitions(const std::map &left, @@ -30,6 +37,12 @@ namespace tree_sitter { return result; } + /* + * Merges two transition maps with character set keys. If the + * two maps contain values for overlapping character sets, the + * new value for the two sets' intersection will be computed by + * merging the two previous values using the given function. + */ template std::map merge_char_transitions(const std::map &left, diff --git a/tree_sitter.xcodeproj/project.pbxproj b/tree_sitter.xcodeproj/project.pbxproj index 6899a455..adcf7e25 100644 --- a/tree_sitter.xcodeproj/project.pbxproj +++ b/tree_sitter.xcodeproj/project.pbxproj @@ -14,6 +14,7 @@ 12130614182C3A1700FCF928 /* seq.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130612182C3A1700FCF928 /* seq.cpp */; }; 12130617182C3D2900FCF928 /* string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130615182C3D2900FCF928 /* string.cpp */; }; 1214930E181E200B008E9BDA /* main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 121492E9181E200B008E9BDA /* main.cpp */; }; + 122587B118BDD79600A68B84 /* follow_sets_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 122587B018BDD79600A68B84 /* follow_sets_spec.cpp */; }; 1225CC6418765693000D4723 /* prepare_grammar_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1225CC6318765693000D4723 /* prepare_grammar_spec.cpp */; }; 1236A7C518B287DC00593ABB /* character_range.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1236A7C318B287DC00593ABB /* character_range.cpp */; }; 1236A7D218B554C800593ABB /* prepared_grammar.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1236A7D018B554C800593ABB /* prepared_grammar.cpp */; }; @@ -95,6 +96,7 @@ 121492E9181E200B008E9BDA /* main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = main.cpp; path = spec/main.cpp; sourceTree = SOURCE_ROOT; }; 121492EA181E200B008E9BDA /* rules_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rules_spec.cpp; path = spec/compiler/rules/rules_spec.cpp; sourceTree = SOURCE_ROOT; }; 121D8B3018795CC0003CF44B /* parser.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = parser.h; sourceTree = ""; }; + 122587B018BDD79600A68B84 /* follow_sets_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = follow_sets_spec.cpp; sourceTree = ""; }; 1225CC6318765693000D4723 /* prepare_grammar_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = prepare_grammar_spec.cpp; sourceTree = ""; }; 1236A7C318B287DC00593ABB /* character_range.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = character_range.cpp; sourceTree = ""; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; 1236A7C918B2A79F00593ABB /* rule.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rule.h; sourceTree = ""; }; @@ -246,6 +248,7 @@ isa = PBXGroup; children = ( 12BC470618830BC5005AC502 /* first_set_spec.cpp */, + 122587B018BDD79600A68B84 /* follow_sets_spec.cpp */, 12AB4660188CB3A300DE79DF /* item_set_closure_spec.cpp */, 12EDCFB7188205BA005A7A07 /* perform_spec.cpp */, 127528B418AACB70006B682B /* rule_can_be_blank_spec.cpp */, @@ -511,6 +514,7 @@ 12EDCFC018820880005A7A07 /* item_set_closure.cpp in Sources */, 12E75AA218930931001B8F10 /* expand_repeats.cpp in Sources */, 12EDCFBD188205BF005A7A07 /* perform_spec.cpp in Sources */, + 122587B118BDD79600A68B84 /* follow_sets_spec.cpp in Sources */, 12EDCFC61882153D005A7A07 /* first_set.cpp in Sources */, 12130611182C3A1100FCF928 /* blank.cpp in Sources */, 12AB465F188BD03E00DE79DF /* follow_sets.cpp in Sources */,