Compute FIRST sets correctly

This commit is contained in:
Max Brunsfeld 2014-01-13 12:57:48 -08:00
parent 4cacdcba70
commit 29f73afbc5
5 changed files with 108 additions and 9 deletions

View file

@ -0,0 +1,38 @@
#include "spec_helper.h"
#include "build_tables/next_symbols.h"
#include "grammar.h"
#include "rules.h"
using std::set;
using namespace build_tables;
using namespace rules;
START_TEST
describe("computing FIRST sets", []() {
Grammar grammar({
{ "A", choice({
seq({
sym("B"),
sym("x"),
sym("B") }),
sym("B") }) },
{ "B", choice({
seq({
sym("y"),
sym("z"),
sym("y") }),
sym("y") }) },
});
describe("for a rule", [&]() {
it("searches the tree for terminals", [&]() {
auto terminals = next_terminals(grammar.rules.find("A")->second, grammar);
AssertThat(terminals, Equals(set<Symbol>({
Symbol("y")
})));
});
});
});
END_TEST

View file

@ -4,6 +4,7 @@
#include "bandit/bandit.h"
#include <iostream>
#include <unordered_set>
#include <set>
#include <unordered_map>
#include "grammar.h"
@ -26,7 +27,19 @@ namespace std {
}
return stream << ">";
}
template<typename T>
inline ostream& operator<<(ostream &stream, const set<T> &set) {
stream << string("#<set: ");
bool started = false;
for (auto item : set) {
if (started) stream << string(", ");
stream << item;
started = true;
}
return stream << ">";
}
template<typename TKey, typename TValue>
inline ostream& operator<<(ostream &stream, const unordered_map<TKey, TValue> &map) {
stream << string("#<map: ");

View file

@ -6,28 +6,71 @@
using std::set;
using std::vector;
using std::dynamic_pointer_cast;
using namespace tree_sitter::rules;
namespace tree_sitter {
class Grammar;
namespace build_tables {
template<bool isNonTerminal>
set<rules::Symbol> next_symbols(const Item &item, const Grammar &grammar) {
class FirstSetVisitor : Visitor {
set<Symbol> value;
const Grammar grammar;
FirstSetVisitor(const Grammar &grammar) : grammar(grammar) {}
set<Symbol> set_union(const set<Symbol> &left, const set<Symbol> &right) {
set<Symbol> result = left;
result.insert(right.begin(), right.end());
return result;
}
void visit(const Symbol *rule) {
if (grammar.has_definition(*rule)) {
value = apply(grammar.rule(rule->name), grammar);
} else {
value = set<Symbol>({ *rule });
}
}
void visit(const Choice *rule) {
value = set_union(apply(rule->left, grammar), apply(rule->right, grammar));
}
void visit(const Seq *rule) {
value = apply(rule->left, grammar);
}
public:
static set<Symbol> apply(const rule_ptr rule, const Grammar &grammar) {
FirstSetVisitor visitor(grammar);
rule->accept(visitor);
return visitor.value;
}
};
template<bool isTerminal>
set<rules::Symbol> next_symbols(const rules::rule_ptr &rule, const Grammar &grammar) {
set<rules::Symbol> result;
for (auto pair : rule_transitions(item.rule)) {
for (auto pair : rule_transitions(rule)) {
auto symbol = dynamic_pointer_cast<const rules::Symbol>(pair.first);
if (symbol && (grammar.has_definition(*symbol) == isNonTerminal))
if (symbol && (grammar.has_definition(*symbol) == !isTerminal))
result.insert(*symbol);
}
return result;
}
set<rules::Symbol> next_terminals(const rules::rule_ptr &rule, const Grammar &grammar) {
return FirstSetVisitor::apply(rule, grammar);
}
set<rules::Symbol> next_non_terminals(const rules::rule_ptr &rule, const Grammar &grammar) {
return next_symbols<false>(rule, grammar);
}
set<rules::Symbol> next_terminals(const Item &item, const Grammar &grammar) {
return next_symbols<false>(item, grammar);
return next_terminals(item.rule, grammar);
}
set<rules::Symbol> next_non_terminals(const Item &item, const Grammar &grammar) {
return next_symbols<true>(item, grammar);
return next_non_terminals(item.rule, grammar);
}
set<rules::Symbol> next_terminals(const ItemSet &item_set, const Grammar &grammar) {

View file

@ -9,6 +9,7 @@ namespace tree_sitter {
class Grammar;
namespace build_tables {
std::set<rules::Symbol> next_terminals(const rules::rule_ptr &rule, const Grammar &grammar);
std::set<rules::Symbol> next_terminals(const ItemSet &item_set, const Grammar &grammar);
std::set<rules::Symbol> next_terminals(const Item &item, const Grammar &grammar);
std::set<rules::Symbol> next_non_terminals(const Item &item, const Grammar &grammar);

View file

@ -18,6 +18,7 @@
1251209B1830145300C9B56A /* rule.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209A1830145300C9B56A /* rule.cpp */; };
125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 125120A3183083BD00C9B56A /* arithmetic.cpp */; };
12BC470518822B27005AC502 /* parse_config.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12BC470318822A17005AC502 /* parse_config.cpp */; };
12BC470718830BC5005AC502 /* next_symbols_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12BC470618830BC5005AC502 /* next_symbols_spec.cpp */; };
12D136A4183678A2005F3369 /* repeat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A2183678A2005F3369 /* repeat.cpp */; };
12EDCF8A187B498C005A7A07 /* tree_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12EDCF89187B498C005A7A07 /* tree_spec.cpp */; };
12EDCF8D187C6282005A7A07 /* document.c in Sources */ = {isa = PBXBuildFile; fileRef = 12EDCF8C187C6282005A7A07 /* document.c */; };
@ -95,6 +96,7 @@
125120A218307FFD00C9B56A /* arithmetic.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = arithmetic.h; path = spec/fixtures/grammars/arithmetic.h; sourceTree = SOURCE_ROOT; };
125120A3183083BD00C9B56A /* arithmetic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = arithmetic.cpp; path = spec/fixtures/grammars/arithmetic.cpp; sourceTree = SOURCE_ROOT; };
12BC470318822A17005AC502 /* parse_config.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_config.cpp; sourceTree = "<group>"; };
12BC470618830BC5005AC502 /* next_symbols_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = next_symbols_spec.cpp; sourceTree = "<group>"; };
12C344421822F27700B07BE3 /* transition_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = transition_map.h; path = ../build_tables/transition_map.h; sourceTree = "<group>"; };
12D1369E18342088005F3369 /* todo.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = todo.md; sourceTree = "<group>"; };
12D136A0183570F5005F3369 /* pattern_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = pattern_spec.cpp; path = spec/compiler/rules/pattern_spec.cpp; sourceTree = SOURCE_ROOT; };
@ -229,6 +231,7 @@
children = (
12EDCFB6188205BA005A7A07 /* rule_transitions_spec.cpp */,
12EDCFB7188205BA005A7A07 /* perform_spec.cpp */,
12BC470618830BC5005AC502 /* next_symbols_spec.cpp */,
);
name = build_tables;
path = compiler/build_tables;
@ -497,6 +500,7 @@
12EDCFBC188205BF005A7A07 /* rule_transitions_spec.cpp in Sources */,
12130605182C348F00FCF928 /* character.cpp in Sources */,
12EDCFB418820519005A7A07 /* compile.cpp in Sources */,
12BC470718830BC5005AC502 /* next_symbols_spec.cpp in Sources */,
1213060B182C389100FCF928 /* symbol.cpp in Sources */,
12EDCFB118820395005A7A07 /* item_set.cpp in Sources */,
1251209B1830145300C9B56A /* rule.cpp in Sources */,