Backfill unit tests for follow sets

Also, add some comment docs for some compiler functions
This commit is contained in:
Max Brunsfeld 2014-02-26 00:39:31 -08:00
parent df223d566e
commit df05c75525
7 changed files with 92 additions and 4 deletions

View file

@ -0,0 +1,51 @@
#include "spec_helper.h"
#include "prepared_grammar.h"
#include "build_tables/follow_sets.h"
using std::set;
using namespace build_tables;
using namespace rules;
START_TEST
describe("computing FOLLOW sets", []() {
const PreparedGrammar grammar("", {
{ "A", sym("a") },
{ "B", sym("b") },
}, {});
it("all of the starting non-terminals for the item, and their following terminals", [&]() {
ParseItem item(Symbol("C"), choice({
seq({ sym("A"), choice({ sym("x"), sym("y") }) }),
seq({ sym("B"), sym("z") }),
}), {}, Symbol("w"));
AssertThat(follow_sets(item, grammar), Equals(map<Symbol, set<Symbol>>({
{ Symbol("A"), set<Symbol>({ Symbol("x"), Symbol("y") }) },
{ Symbol("B"), set<Symbol>({ Symbol("z") }) },
})));
});
it("does not include terminals at the beginning of the item", [&]() {
ParseItem item(Symbol("C"), choice({
seq({ sym("A"), choice({ sym("x"), sym("y") }) }),
seq({ sym("x"), sym("y") }),
}), {}, Symbol("w"));
AssertThat(follow_sets(item, grammar), Equals(map<Symbol, set<Symbol>>({
{ Symbol("A"), set<Symbol>({ Symbol("x"), Symbol("y") }) },
})));
});
it("includes the item's lookahead terminal if the rule after the non-terminal might be blank", [&]() {
ParseItem item(Symbol("C"), choice({
seq({ sym("A"), choice({ sym("x"), blank() }) }),
}), {}, Symbol("w"));
AssertThat(follow_sets(item, grammar), Equals(map<Symbol, set<Symbol>>({
{ Symbol("A"), set<Symbol>({ Symbol("x"), Symbol("w") }) },
})));
});
});
END_TEST

View file

@ -8,7 +8,19 @@ namespace tree_sitter {
class PreparedGrammar;
namespace build_tables {
/*
* Returns the set of terminal symbols that can appear at
* the beginning of a string derivable from a given rule,
* in a given gramamr.
*/
std::set<rules::Symbol> first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar);
/*
* Returns the set of terminal symbols that can appear at
* the beginning of any item in the given set.
*/
// std::set<rules::Symbol> first_set(const ParseItemSet &item_set, const PreparedGrammar &grammar);
}
}

View file

@ -18,10 +18,10 @@ namespace tree_sitter {
Symbol symbol = pair.first;
rule_ptr next_rule = pair.second;
if (grammar.has_definition(symbol)) {
set<Symbol> following_non_terminals = first_set(next_rule, grammar);
set<Symbol> following_terminals = first_set(next_rule, grammar);
if (rule_can_be_blank(next_rule, grammar))
following_non_terminals.insert(item.lookahead_sym);
result.insert({ symbol, following_non_terminals });
following_terminals.insert(item.lookahead_sym);
result.insert({ symbol, following_terminals });
}
}

View file

@ -10,7 +10,15 @@ namespace tree_sitter {
class PreparedGrammar;
namespace build_tables {
/*
* Returns a map of non-terminal symbols to sets of terminal symbols.
* The keys are the non-terminals which may appear first in the given
* item. The values are the sets of terminals which can appear immediately
* after the corresponding non-terminals.
*/
std::map<rules::Symbol, std::set<rules::Symbol>> follow_sets(const ParseItem &item, const PreparedGrammar &grammar);
}
}

View file

@ -17,7 +17,7 @@ namespace tree_sitter {
static void add_item(ParseItemSet &item_set, const ParseItem &item, const PreparedGrammar &grammar) {
if (!contains(item_set, item)) {
item_set.insert(item);
for (auto pair : follow_sets(item, grammar)) {
for (auto &pair : follow_sets(item, grammar)) {
Symbol non_terminal = pair.first;
set<Symbol> terminals = pair.second;
for (auto &terminal : terminals) {

View file

@ -7,6 +7,13 @@
namespace tree_sitter {
namespace build_tables {
/*
* Merges two transition maps with symbol keys. If both maps
* contain values for the same symbol, the new value for that
* symbol will be computed by merging the two previous values
* using the given function.
*/
template<typename T>
std::map<rules::Symbol, T>
merge_sym_transitions(const std::map<rules::Symbol, T> &left,
@ -30,6 +37,12 @@ namespace tree_sitter {
return result;
}
/*
* Merges two transition maps with character set keys. If the
* two maps contain values for overlapping character sets, the
* new value for the two sets' intersection will be computed by
* merging the two previous values using the given function.
*/
template<typename T>
std::map<rules::CharacterSet, T>
merge_char_transitions(const std::map<rules::CharacterSet, T> &left,

View file

@ -14,6 +14,7 @@
12130614182C3A1700FCF928 /* seq.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130612182C3A1700FCF928 /* seq.cpp */; };
12130617182C3D2900FCF928 /* string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130615182C3D2900FCF928 /* string.cpp */; };
1214930E181E200B008E9BDA /* main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 121492E9181E200B008E9BDA /* main.cpp */; };
122587B118BDD79600A68B84 /* follow_sets_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 122587B018BDD79600A68B84 /* follow_sets_spec.cpp */; };
1225CC6418765693000D4723 /* prepare_grammar_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1225CC6318765693000D4723 /* prepare_grammar_spec.cpp */; };
1236A7C518B287DC00593ABB /* character_range.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1236A7C318B287DC00593ABB /* character_range.cpp */; };
1236A7D218B554C800593ABB /* prepared_grammar.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1236A7D018B554C800593ABB /* prepared_grammar.cpp */; };
@ -95,6 +96,7 @@
121492E9181E200B008E9BDA /* main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = main.cpp; path = spec/main.cpp; sourceTree = SOURCE_ROOT; };
121492EA181E200B008E9BDA /* rules_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rules_spec.cpp; path = spec/compiler/rules/rules_spec.cpp; sourceTree = SOURCE_ROOT; };
121D8B3018795CC0003CF44B /* parser.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = parser.h; sourceTree = "<group>"; };
122587B018BDD79600A68B84 /* follow_sets_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = follow_sets_spec.cpp; sourceTree = "<group>"; };
1225CC6318765693000D4723 /* prepare_grammar_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = prepare_grammar_spec.cpp; sourceTree = "<group>"; };
1236A7C318B287DC00593ABB /* character_range.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = character_range.cpp; sourceTree = "<group>"; xcLanguageSpecificationIdentifier = xcode.lang.cpp; };
1236A7C918B2A79F00593ABB /* rule.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rule.h; sourceTree = "<group>"; };
@ -246,6 +248,7 @@
isa = PBXGroup;
children = (
12BC470618830BC5005AC502 /* first_set_spec.cpp */,
122587B018BDD79600A68B84 /* follow_sets_spec.cpp */,
12AB4660188CB3A300DE79DF /* item_set_closure_spec.cpp */,
12EDCFB7188205BA005A7A07 /* perform_spec.cpp */,
127528B418AACB70006B682B /* rule_can_be_blank_spec.cpp */,
@ -511,6 +514,7 @@
12EDCFC018820880005A7A07 /* item_set_closure.cpp in Sources */,
12E75AA218930931001B8F10 /* expand_repeats.cpp in Sources */,
12EDCFBD188205BF005A7A07 /* perform_spec.cpp in Sources */,
122587B118BDD79600A68B84 /* follow_sets_spec.cpp in Sources */,
12EDCFC61882153D005A7A07 /* first_set.cpp in Sources */,
12130611182C3A1100FCF928 /* blank.cpp in Sources */,
12AB465F188BD03E00DE79DF /* follow_sets.cpp in Sources */,