Backfill unit tests for follow sets
Also, add some comment docs for some compiler functions
This commit is contained in:
parent
df223d566e
commit
df05c75525
7 changed files with 92 additions and 4 deletions
51
spec/compiler/build_tables/follow_sets_spec.cpp
Normal file
51
spec/compiler/build_tables/follow_sets_spec.cpp
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
#include "spec_helper.h"
|
||||
#include "prepared_grammar.h"
|
||||
#include "build_tables/follow_sets.h"
|
||||
|
||||
using std::set;
|
||||
using namespace build_tables;
|
||||
using namespace rules;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("computing FOLLOW sets", []() {
|
||||
const PreparedGrammar grammar("", {
|
||||
{ "A", sym("a") },
|
||||
{ "B", sym("b") },
|
||||
}, {});
|
||||
|
||||
it("all of the starting non-terminals for the item, and their following terminals", [&]() {
|
||||
ParseItem item(Symbol("C"), choice({
|
||||
seq({ sym("A"), choice({ sym("x"), sym("y") }) }),
|
||||
seq({ sym("B"), sym("z") }),
|
||||
}), {}, Symbol("w"));
|
||||
|
||||
AssertThat(follow_sets(item, grammar), Equals(map<Symbol, set<Symbol>>({
|
||||
{ Symbol("A"), set<Symbol>({ Symbol("x"), Symbol("y") }) },
|
||||
{ Symbol("B"), set<Symbol>({ Symbol("z") }) },
|
||||
})));
|
||||
});
|
||||
|
||||
it("does not include terminals at the beginning of the item", [&]() {
|
||||
ParseItem item(Symbol("C"), choice({
|
||||
seq({ sym("A"), choice({ sym("x"), sym("y") }) }),
|
||||
seq({ sym("x"), sym("y") }),
|
||||
}), {}, Symbol("w"));
|
||||
|
||||
AssertThat(follow_sets(item, grammar), Equals(map<Symbol, set<Symbol>>({
|
||||
{ Symbol("A"), set<Symbol>({ Symbol("x"), Symbol("y") }) },
|
||||
})));
|
||||
});
|
||||
|
||||
it("includes the item's lookahead terminal if the rule after the non-terminal might be blank", [&]() {
|
||||
ParseItem item(Symbol("C"), choice({
|
||||
seq({ sym("A"), choice({ sym("x"), blank() }) }),
|
||||
}), {}, Symbol("w"));
|
||||
|
||||
AssertThat(follow_sets(item, grammar), Equals(map<Symbol, set<Symbol>>({
|
||||
{ Symbol("A"), set<Symbol>({ Symbol("x"), Symbol("w") }) },
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -8,7 +8,19 @@ namespace tree_sitter {
|
|||
class PreparedGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
/*
|
||||
* Returns the set of terminal symbols that can appear at
|
||||
* the beginning of a string derivable from a given rule,
|
||||
* in a given gramamr.
|
||||
*/
|
||||
std::set<rules::Symbol> first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar);
|
||||
|
||||
/*
|
||||
* Returns the set of terminal symbols that can appear at
|
||||
* the beginning of any item in the given set.
|
||||
*/
|
||||
// std::set<rules::Symbol> first_set(const ParseItemSet &item_set, const PreparedGrammar &grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -18,10 +18,10 @@ namespace tree_sitter {
|
|||
Symbol symbol = pair.first;
|
||||
rule_ptr next_rule = pair.second;
|
||||
if (grammar.has_definition(symbol)) {
|
||||
set<Symbol> following_non_terminals = first_set(next_rule, grammar);
|
||||
set<Symbol> following_terminals = first_set(next_rule, grammar);
|
||||
if (rule_can_be_blank(next_rule, grammar))
|
||||
following_non_terminals.insert(item.lookahead_sym);
|
||||
result.insert({ symbol, following_non_terminals });
|
||||
following_terminals.insert(item.lookahead_sym);
|
||||
result.insert({ symbol, following_terminals });
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,15 @@ namespace tree_sitter {
|
|||
class PreparedGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
/*
|
||||
* Returns a map of non-terminal symbols to sets of terminal symbols.
|
||||
* The keys are the non-terminals which may appear first in the given
|
||||
* item. The values are the sets of terminals which can appear immediately
|
||||
* after the corresponding non-terminals.
|
||||
*/
|
||||
std::map<rules::Symbol, std::set<rules::Symbol>> follow_sets(const ParseItem &item, const PreparedGrammar &grammar);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ namespace tree_sitter {
|
|||
static void add_item(ParseItemSet &item_set, const ParseItem &item, const PreparedGrammar &grammar) {
|
||||
if (!contains(item_set, item)) {
|
||||
item_set.insert(item);
|
||||
for (auto pair : follow_sets(item, grammar)) {
|
||||
for (auto &pair : follow_sets(item, grammar)) {
|
||||
Symbol non_terminal = pair.first;
|
||||
set<Symbol> terminals = pair.second;
|
||||
for (auto &terminal : terminals) {
|
||||
|
|
|
|||
|
|
@ -7,6 +7,13 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
/*
|
||||
* Merges two transition maps with symbol keys. If both maps
|
||||
* contain values for the same symbol, the new value for that
|
||||
* symbol will be computed by merging the two previous values
|
||||
* using the given function.
|
||||
*/
|
||||
template<typename T>
|
||||
std::map<rules::Symbol, T>
|
||||
merge_sym_transitions(const std::map<rules::Symbol, T> &left,
|
||||
|
|
@ -30,6 +37,12 @@ namespace tree_sitter {
|
|||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Merges two transition maps with character set keys. If the
|
||||
* two maps contain values for overlapping character sets, the
|
||||
* new value for the two sets' intersection will be computed by
|
||||
* merging the two previous values using the given function.
|
||||
*/
|
||||
template<typename T>
|
||||
std::map<rules::CharacterSet, T>
|
||||
merge_char_transitions(const std::map<rules::CharacterSet, T> &left,
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
12130614182C3A1700FCF928 /* seq.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130612182C3A1700FCF928 /* seq.cpp */; };
|
||||
12130617182C3D2900FCF928 /* string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130615182C3D2900FCF928 /* string.cpp */; };
|
||||
1214930E181E200B008E9BDA /* main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 121492E9181E200B008E9BDA /* main.cpp */; };
|
||||
122587B118BDD79600A68B84 /* follow_sets_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 122587B018BDD79600A68B84 /* follow_sets_spec.cpp */; };
|
||||
1225CC6418765693000D4723 /* prepare_grammar_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1225CC6318765693000D4723 /* prepare_grammar_spec.cpp */; };
|
||||
1236A7C518B287DC00593ABB /* character_range.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1236A7C318B287DC00593ABB /* character_range.cpp */; };
|
||||
1236A7D218B554C800593ABB /* prepared_grammar.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1236A7D018B554C800593ABB /* prepared_grammar.cpp */; };
|
||||
|
|
@ -95,6 +96,7 @@
|
|||
121492E9181E200B008E9BDA /* main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = main.cpp; path = spec/main.cpp; sourceTree = SOURCE_ROOT; };
|
||||
121492EA181E200B008E9BDA /* rules_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rules_spec.cpp; path = spec/compiler/rules/rules_spec.cpp; sourceTree = SOURCE_ROOT; };
|
||||
121D8B3018795CC0003CF44B /* parser.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = parser.h; sourceTree = "<group>"; };
|
||||
122587B018BDD79600A68B84 /* follow_sets_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = follow_sets_spec.cpp; sourceTree = "<group>"; };
|
||||
1225CC6318765693000D4723 /* prepare_grammar_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = prepare_grammar_spec.cpp; sourceTree = "<group>"; };
|
||||
1236A7C318B287DC00593ABB /* character_range.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = character_range.cpp; sourceTree = "<group>"; xcLanguageSpecificationIdentifier = xcode.lang.cpp; };
|
||||
1236A7C918B2A79F00593ABB /* rule.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rule.h; sourceTree = "<group>"; };
|
||||
|
|
@ -246,6 +248,7 @@
|
|||
isa = PBXGroup;
|
||||
children = (
|
||||
12BC470618830BC5005AC502 /* first_set_spec.cpp */,
|
||||
122587B018BDD79600A68B84 /* follow_sets_spec.cpp */,
|
||||
12AB4660188CB3A300DE79DF /* item_set_closure_spec.cpp */,
|
||||
12EDCFB7188205BA005A7A07 /* perform_spec.cpp */,
|
||||
127528B418AACB70006B682B /* rule_can_be_blank_spec.cpp */,
|
||||
|
|
@ -511,6 +514,7 @@
|
|||
12EDCFC018820880005A7A07 /* item_set_closure.cpp in Sources */,
|
||||
12E75AA218930931001B8F10 /* expand_repeats.cpp in Sources */,
|
||||
12EDCFBD188205BF005A7A07 /* perform_spec.cpp in Sources */,
|
||||
122587B118BDD79600A68B84 /* follow_sets_spec.cpp in Sources */,
|
||||
12EDCFC61882153D005A7A07 /* first_set.cpp in Sources */,
|
||||
12130611182C3A1100FCF928 /* blank.cpp in Sources */,
|
||||
12AB465F188BD03E00DE79DF /* follow_sets.cpp in Sources */,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue