Compute FIRST sets correctly
This commit is contained in:
parent
4cacdcba70
commit
29f73afbc5
5 changed files with 108 additions and 9 deletions
38
spec/compiler/build_tables/next_symbols_spec.cpp
Normal file
38
spec/compiler/build_tables/next_symbols_spec.cpp
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
#include "spec_helper.h"
|
||||
#include "build_tables/next_symbols.h"
|
||||
#include "grammar.h"
|
||||
#include "rules.h"
|
||||
|
||||
using std::set;
|
||||
using namespace build_tables;
|
||||
using namespace rules;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("computing FIRST sets", []() {
|
||||
Grammar grammar({
|
||||
{ "A", choice({
|
||||
seq({
|
||||
sym("B"),
|
||||
sym("x"),
|
||||
sym("B") }),
|
||||
sym("B") }) },
|
||||
{ "B", choice({
|
||||
seq({
|
||||
sym("y"),
|
||||
sym("z"),
|
||||
sym("y") }),
|
||||
sym("y") }) },
|
||||
});
|
||||
|
||||
describe("for a rule", [&]() {
|
||||
it("searches the tree for terminals", [&]() {
|
||||
auto terminals = next_terminals(grammar.rules.find("A")->second, grammar);
|
||||
AssertThat(terminals, Equals(set<Symbol>({
|
||||
Symbol("y")
|
||||
})));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -4,6 +4,7 @@
|
|||
#include "bandit/bandit.h"
|
||||
#include <iostream>
|
||||
#include <unordered_set>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include "grammar.h"
|
||||
|
||||
|
|
@ -26,7 +27,19 @@ namespace std {
|
|||
}
|
||||
return stream << ">";
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
inline ostream& operator<<(ostream &stream, const set<T> &set) {
|
||||
stream << string("#<set: ");
|
||||
bool started = false;
|
||||
for (auto item : set) {
|
||||
if (started) stream << string(", ");
|
||||
stream << item;
|
||||
started = true;
|
||||
}
|
||||
return stream << ">";
|
||||
}
|
||||
|
||||
template<typename TKey, typename TValue>
|
||||
inline ostream& operator<<(ostream &stream, const unordered_map<TKey, TValue> &map) {
|
||||
stream << string("#<map: ");
|
||||
|
|
|
|||
|
|
@ -6,28 +6,71 @@
|
|||
using std::set;
|
||||
using std::vector;
|
||||
using std::dynamic_pointer_cast;
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
|
||||
namespace build_tables {
|
||||
template<bool isNonTerminal>
|
||||
set<rules::Symbol> next_symbols(const Item &item, const Grammar &grammar) {
|
||||
class FirstSetVisitor : Visitor {
|
||||
set<Symbol> value;
|
||||
const Grammar grammar;
|
||||
|
||||
FirstSetVisitor(const Grammar &grammar) : grammar(grammar) {}
|
||||
|
||||
set<Symbol> set_union(const set<Symbol> &left, const set<Symbol> &right) {
|
||||
set<Symbol> result = left;
|
||||
result.insert(right.begin(), right.end());
|
||||
return result;
|
||||
}
|
||||
|
||||
void visit(const Symbol *rule) {
|
||||
if (grammar.has_definition(*rule)) {
|
||||
value = apply(grammar.rule(rule->name), grammar);
|
||||
} else {
|
||||
value = set<Symbol>({ *rule });
|
||||
}
|
||||
}
|
||||
|
||||
void visit(const Choice *rule) {
|
||||
value = set_union(apply(rule->left, grammar), apply(rule->right, grammar));
|
||||
}
|
||||
|
||||
void visit(const Seq *rule) {
|
||||
value = apply(rule->left, grammar);
|
||||
}
|
||||
|
||||
public:
|
||||
static set<Symbol> apply(const rule_ptr rule, const Grammar &grammar) {
|
||||
FirstSetVisitor visitor(grammar);
|
||||
rule->accept(visitor);
|
||||
return visitor.value;
|
||||
}
|
||||
};
|
||||
|
||||
template<bool isTerminal>
|
||||
set<rules::Symbol> next_symbols(const rules::rule_ptr &rule, const Grammar &grammar) {
|
||||
set<rules::Symbol> result;
|
||||
for (auto pair : rule_transitions(item.rule)) {
|
||||
for (auto pair : rule_transitions(rule)) {
|
||||
auto symbol = dynamic_pointer_cast<const rules::Symbol>(pair.first);
|
||||
if (symbol && (grammar.has_definition(*symbol) == isNonTerminal))
|
||||
if (symbol && (grammar.has_definition(*symbol) == !isTerminal))
|
||||
result.insert(*symbol);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
set<rules::Symbol> next_terminals(const rules::rule_ptr &rule, const Grammar &grammar) {
|
||||
return FirstSetVisitor::apply(rule, grammar);
|
||||
}
|
||||
|
||||
set<rules::Symbol> next_non_terminals(const rules::rule_ptr &rule, const Grammar &grammar) {
|
||||
return next_symbols<false>(rule, grammar);
|
||||
}
|
||||
|
||||
set<rules::Symbol> next_terminals(const Item &item, const Grammar &grammar) {
|
||||
return next_symbols<false>(item, grammar);
|
||||
return next_terminals(item.rule, grammar);
|
||||
}
|
||||
|
||||
set<rules::Symbol> next_non_terminals(const Item &item, const Grammar &grammar) {
|
||||
return next_symbols<true>(item, grammar);
|
||||
return next_non_terminals(item.rule, grammar);
|
||||
}
|
||||
|
||||
set<rules::Symbol> next_terminals(const ItemSet &item_set, const Grammar &grammar) {
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ namespace tree_sitter {
|
|||
class Grammar;
|
||||
|
||||
namespace build_tables {
|
||||
std::set<rules::Symbol> next_terminals(const rules::rule_ptr &rule, const Grammar &grammar);
|
||||
std::set<rules::Symbol> next_terminals(const ItemSet &item_set, const Grammar &grammar);
|
||||
std::set<rules::Symbol> next_terminals(const Item &item, const Grammar &grammar);
|
||||
std::set<rules::Symbol> next_non_terminals(const Item &item, const Grammar &grammar);
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@
|
|||
1251209B1830145300C9B56A /* rule.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209A1830145300C9B56A /* rule.cpp */; };
|
||||
125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 125120A3183083BD00C9B56A /* arithmetic.cpp */; };
|
||||
12BC470518822B27005AC502 /* parse_config.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12BC470318822A17005AC502 /* parse_config.cpp */; };
|
||||
12BC470718830BC5005AC502 /* next_symbols_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12BC470618830BC5005AC502 /* next_symbols_spec.cpp */; };
|
||||
12D136A4183678A2005F3369 /* repeat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A2183678A2005F3369 /* repeat.cpp */; };
|
||||
12EDCF8A187B498C005A7A07 /* tree_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12EDCF89187B498C005A7A07 /* tree_spec.cpp */; };
|
||||
12EDCF8D187C6282005A7A07 /* document.c in Sources */ = {isa = PBXBuildFile; fileRef = 12EDCF8C187C6282005A7A07 /* document.c */; };
|
||||
|
|
@ -95,6 +96,7 @@
|
|||
125120A218307FFD00C9B56A /* arithmetic.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = arithmetic.h; path = spec/fixtures/grammars/arithmetic.h; sourceTree = SOURCE_ROOT; };
|
||||
125120A3183083BD00C9B56A /* arithmetic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = arithmetic.cpp; path = spec/fixtures/grammars/arithmetic.cpp; sourceTree = SOURCE_ROOT; };
|
||||
12BC470318822A17005AC502 /* parse_config.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_config.cpp; sourceTree = "<group>"; };
|
||||
12BC470618830BC5005AC502 /* next_symbols_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = next_symbols_spec.cpp; sourceTree = "<group>"; };
|
||||
12C344421822F27700B07BE3 /* transition_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = transition_map.h; path = ../build_tables/transition_map.h; sourceTree = "<group>"; };
|
||||
12D1369E18342088005F3369 /* todo.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = todo.md; sourceTree = "<group>"; };
|
||||
12D136A0183570F5005F3369 /* pattern_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = pattern_spec.cpp; path = spec/compiler/rules/pattern_spec.cpp; sourceTree = SOURCE_ROOT; };
|
||||
|
|
@ -229,6 +231,7 @@
|
|||
children = (
|
||||
12EDCFB6188205BA005A7A07 /* rule_transitions_spec.cpp */,
|
||||
12EDCFB7188205BA005A7A07 /* perform_spec.cpp */,
|
||||
12BC470618830BC5005AC502 /* next_symbols_spec.cpp */,
|
||||
);
|
||||
name = build_tables;
|
||||
path = compiler/build_tables;
|
||||
|
|
@ -497,6 +500,7 @@
|
|||
12EDCFBC188205BF005A7A07 /* rule_transitions_spec.cpp in Sources */,
|
||||
12130605182C348F00FCF928 /* character.cpp in Sources */,
|
||||
12EDCFB418820519005A7A07 /* compile.cpp in Sources */,
|
||||
12BC470718830BC5005AC502 /* next_symbols_spec.cpp in Sources */,
|
||||
1213060B182C389100FCF928 /* symbol.cpp in Sources */,
|
||||
12EDCFB118820395005A7A07 /* item_set.cpp in Sources */,
|
||||
1251209B1830145300C9B56A /* rule.cpp in Sources */,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue