Make separate PreparedGrammar subclass of Grammar for internal use
This commit is contained in:
parent
1d56578a81
commit
e87380a8b7
28 changed files with 214 additions and 145 deletions
|
|
@ -53,18 +53,11 @@ namespace tree_sitter {
|
|||
|
||||
namespace tree_sitter {
|
||||
class Grammar {
|
||||
typedef std::map<const std::string, const rules::rule_ptr> rule_map;
|
||||
public:
|
||||
Grammar(std::string start_rule_name, const rule_map &rules);
|
||||
Grammar(std::string start_rule_name, const rule_map &rules, const rule_map &aux_rules);
|
||||
|
||||
Grammar(std::string start_rule_name, const std::map<const std::string, const rules::rule_ptr> &rules);
|
||||
bool operator==(const Grammar &other) const;
|
||||
bool has_definition(const rules::Symbol &symbol) const;
|
||||
const rules::rule_ptr rule(const rules::Symbol &symbol) const;
|
||||
|
||||
const std::string start_rule_name;
|
||||
const rule_map rules;
|
||||
const rule_map aux_rules;
|
||||
const std::map<const std::string, const rules::rule_ptr> rules;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const Grammar &grammar);
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include "spec_helper.h"
|
||||
#include "prepared_grammar.h"
|
||||
#include "build_tables/first_set.h"
|
||||
|
||||
using std::set;
|
||||
|
|
@ -8,7 +9,7 @@ using namespace rules;
|
|||
START_TEST
|
||||
|
||||
describe("computing FIRST sets", []() {
|
||||
const Grammar null_grammar("", {{ "something", blank() }});
|
||||
const PreparedGrammar null_grammar("", {}, {});
|
||||
|
||||
describe("for a sequence AB", [&]() {
|
||||
it("ignores B when A cannot be blank", [&]() {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include "spec_helper.h"
|
||||
#include "prepared_grammar.h"
|
||||
#include "build_tables/item_set_closure.h"
|
||||
#include "build_tables/item_set_transitions.h"
|
||||
|
||||
|
|
@ -8,7 +9,7 @@ using namespace rules;
|
|||
START_TEST
|
||||
|
||||
describe("computing closures of item sets", []() {
|
||||
Grammar grammar("E", {
|
||||
PreparedGrammar grammar("E", {
|
||||
{ "E", choice({
|
||||
seq({
|
||||
sym("T"),
|
||||
|
|
@ -24,7 +25,7 @@ describe("computing closures of item sets", []() {
|
|||
{ "F", choice({
|
||||
sym("v"),
|
||||
sym("n") }) }
|
||||
});
|
||||
}, {});
|
||||
|
||||
it("computes the item set closure", [&]() {
|
||||
ParseItemSet item_set = item_set_closure(ParseItemSet({
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "spec_helper.h"
|
||||
#include <functional>
|
||||
#include "prepared_grammar.h"
|
||||
#include "build_tables/perform.h"
|
||||
#include <functional>
|
||||
|
||||
using build_tables::perform;
|
||||
using namespace rules;
|
||||
|
|
@ -19,7 +20,7 @@ static set<Symbol> keys(const map<Symbol, parse_actions> &map) {
|
|||
START_TEST
|
||||
|
||||
describe("building parse and lex tables", []() {
|
||||
Grammar grammar("expression", {
|
||||
PreparedGrammar grammar("expression", {
|
||||
{ "expression", choice({
|
||||
seq({
|
||||
sym("term"),
|
||||
|
|
@ -34,15 +35,15 @@ describe("building parse and lex tables", []() {
|
|||
sym("expression"),
|
||||
sym("right-paren")
|
||||
}) }) }
|
||||
});
|
||||
}, {});
|
||||
|
||||
Grammar lex_grammar("", {
|
||||
PreparedGrammar lex_grammar("", {
|
||||
{ "plus", str("+") },
|
||||
{ "variable", pattern("\\w+") },
|
||||
{ "number", pattern("\\d+") },
|
||||
{ "left-paren", str("(") },
|
||||
{ "right-paren", str(")") }
|
||||
});
|
||||
}, {});
|
||||
|
||||
ParseTable table;
|
||||
LexTable lex_table;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include "spec_helper.h"
|
||||
#include "prepared_grammar.h"
|
||||
#include "prepare_grammar/perform.h"
|
||||
#include "rules/symbol.h"
|
||||
|
||||
|
|
@ -10,7 +11,7 @@ using prepare_grammar::perform;
|
|||
describe("preparing a grammar", []() {
|
||||
describe("extracting tokens", []() {
|
||||
it("moves sub-rules that don't contain symbols into a separate 'lexical' grammar", [&]() {
|
||||
pair<Grammar, Grammar> result = perform(Grammar("rule1", {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = perform(Grammar("rule1", {
|
||||
{ "rule1", seq({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }),
|
||||
|
|
@ -22,16 +23,16 @@ describe("preparing a grammar", []() {
|
|||
character({ 'b' }) }) }) }
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(Grammar("rule1", {
|
||||
AssertThat(result.first, Equals(PreparedGrammar("rule1", {
|
||||
{ "rule1", seq({
|
||||
make_shared<Symbol>("token1", SymbolTypeAuxiliary),
|
||||
seq({
|
||||
sym("rule2"),
|
||||
sym("rule3") }),
|
||||
make_shared<Symbol>("token1", SymbolTypeAuxiliary) }) }
|
||||
})));
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(Grammar("", map<const string, const rule_ptr>(), {
|
||||
AssertThat(result.second, Equals(PreparedGrammar("", {}, {
|
||||
{ "token1", rules::seq({
|
||||
rules::character({ 'a' }),
|
||||
rules::character({ 'b' }) }) },
|
||||
|
|
@ -46,52 +47,33 @@ describe("preparing a grammar", []() {
|
|||
character({ 'b' }) }) }
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(Grammar("rule1", {
|
||||
AssertThat(result.first, Equals(PreparedGrammar("rule1", {
|
||||
{ "rule1", sym("rule2") }
|
||||
})));
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(Grammar("", {
|
||||
AssertThat(result.second, Equals(PreparedGrammar("", {
|
||||
{ "rule2", seq({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }) }) },
|
||||
})));
|
||||
});
|
||||
|
||||
it("moves parts of auxiliary rules into auxiliary lexical rules", []() {
|
||||
auto result = perform(Grammar("rule1", map<const string, const rule_ptr>(), {
|
||||
{ "rule1", sym("rule2") },
|
||||
{ "rule2", seq({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }) }) }
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(Grammar("rule1", map<const string, const rule_ptr>(), {
|
||||
{ "rule1", sym("rule2") }
|
||||
})));
|
||||
|
||||
AssertThat(result.second, Equals(Grammar("", map<const string, const rule_ptr>(), {
|
||||
{ "rule2", seq({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }) }) },
|
||||
})));
|
||||
}, {})));
|
||||
});
|
||||
|
||||
it("does not extract blanks into tokens", [&]() {
|
||||
pair<Grammar, Grammar> result = perform(Grammar("rule1", {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = perform(Grammar("rule1", {
|
||||
{ "rule1", choice({ sym("rule2"), blank() }) },
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(Grammar("rule1", {
|
||||
AssertThat(result.first, Equals(PreparedGrammar("rule1", {
|
||||
{ "rule1", choice({ sym("rule2"), blank() }) },
|
||||
})));
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(Grammar("", map<const string, const rule_ptr>())));
|
||||
AssertThat(result.second, Equals(PreparedGrammar("", {}, {})));
|
||||
});
|
||||
});
|
||||
|
||||
describe("expanding repeats", []() {
|
||||
it("replaces repeat rules with pairs of recursive rules", [&]() {
|
||||
Grammar result = perform(Grammar("rule1", {
|
||||
PreparedGrammar result = perform(Grammar("rule1", {
|
||||
{ "rule1", seq({
|
||||
sym("x"),
|
||||
repeat(seq({ sym("a"), sym("b") })),
|
||||
|
|
@ -99,7 +81,7 @@ describe("preparing a grammar", []() {
|
|||
}) },
|
||||
})).first;
|
||||
|
||||
AssertThat(result, Equals(Grammar("rule1", {
|
||||
AssertThat(result, Equals(PreparedGrammar("rule1", {
|
||||
{ "rule1", seq({
|
||||
sym("x"),
|
||||
make_shared<Symbol>("repeat_helper1", SymbolTypeAuxiliary),
|
||||
|
|
@ -117,7 +99,7 @@ describe("preparing a grammar", []() {
|
|||
});
|
||||
|
||||
it("does not replace repeat rules that can be moved into the lexical grammar", [&]() {
|
||||
pair<Grammar, Grammar> result = perform(Grammar("rule1", {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = perform(Grammar("rule1", {
|
||||
{ "rule1", seq({
|
||||
sym("x"),
|
||||
repeat(seq({ str("a"), str("b") })),
|
||||
|
|
@ -125,15 +107,15 @@ describe("preparing a grammar", []() {
|
|||
}) },
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(Grammar("rule1", {
|
||||
AssertThat(result.first, Equals(PreparedGrammar("rule1", {
|
||||
{ "rule1", seq({
|
||||
sym("x"),
|
||||
make_shared<Symbol>("token1", SymbolTypeAuxiliary),
|
||||
sym("y")
|
||||
}) },
|
||||
})));
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(Grammar("", map<const string, const rule_ptr>(), {
|
||||
AssertThat(result.second, Equals(PreparedGrammar("", {}, {
|
||||
{ "token1", repeat(seq({ str("a"), str("b") })) },
|
||||
})));
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#include "first_set.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "prepared_grammar.h"
|
||||
#include "rule_can_be_blank.h"
|
||||
#include "rules/visitor.h"
|
||||
#include "rules/seq.h"
|
||||
|
|
@ -12,9 +13,9 @@ namespace tree_sitter {
|
|||
namespace build_tables {
|
||||
class FirstSetVisitor : Visitor {
|
||||
set<Symbol> value;
|
||||
const Grammar grammar;
|
||||
const PreparedGrammar grammar;
|
||||
|
||||
FirstSetVisitor(const Grammar &grammar) : grammar(grammar) {}
|
||||
FirstSetVisitor(const PreparedGrammar &grammar) : grammar(grammar) {}
|
||||
|
||||
set<Symbol> set_union(const set<Symbol> &left, const set<Symbol> &right) {
|
||||
set<Symbol> result = left;
|
||||
|
|
@ -42,14 +43,14 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
public:
|
||||
static set<Symbol> apply(const rule_ptr rule, const Grammar &grammar) {
|
||||
static set<Symbol> apply(const rule_ptr rule, const PreparedGrammar &grammar) {
|
||||
FirstSetVisitor visitor(grammar);
|
||||
rule->accept(visitor);
|
||||
return visitor.value;
|
||||
}
|
||||
};
|
||||
|
||||
set<Symbol> first_set(const rule_ptr &rule, const Grammar &grammar) {
|
||||
set<Symbol> first_set(const rule_ptr &rule, const PreparedGrammar &grammar) {
|
||||
return FirstSetVisitor::apply(rule, grammar);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,10 +5,10 @@
|
|||
#include <set>
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
class PreparedGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
std::set<rules::Symbol> first_set(const rules::rule_ptr &rule, const Grammar &grammar);
|
||||
std::set<rules::Symbol> first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#include "first_set.h"
|
||||
#include "rule_transitions.h"
|
||||
#include "rule_can_be_blank.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::set;
|
||||
|
|
@ -10,10 +10,8 @@ namespace tree_sitter {
|
|||
using rules::Symbol;
|
||||
using rules::rule_ptr;
|
||||
|
||||
class Grammar;
|
||||
|
||||
namespace build_tables {
|
||||
map<Symbol, set<Symbol>> follow_sets(const ParseItem &item, const Grammar &grammar) {
|
||||
map<Symbol, set<Symbol>> follow_sets(const ParseItem &item, const PreparedGrammar &grammar) {
|
||||
map<Symbol, set<Symbol>> result;
|
||||
|
||||
for (auto &pair : sym_transitions(item.rule)) {
|
||||
|
|
|
|||
|
|
@ -7,10 +7,10 @@
|
|||
#include <map>
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
class PreparedGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
std::map<rules::Symbol, std::set<rules::Symbol>> follow_sets(const ParseItem &item, const Grammar &grammar);
|
||||
std::map<rules::Symbol, std::set<rules::Symbol>> follow_sets(const ParseItem &item, const PreparedGrammar &grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
#include "./follow_sets.h"
|
||||
#include "item.h"
|
||||
#include "prepared_grammar.h"
|
||||
#include <algorithm>
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -13,7 +14,7 @@ namespace tree_sitter {
|
|||
return items.size() > 0 && (std::find(items.begin(), items.end(), item) != items.end());
|
||||
}
|
||||
|
||||
static void add_item(ParseItemSet &item_set, const ParseItem &item, const Grammar &grammar) {
|
||||
static void add_item(ParseItemSet &item_set, const ParseItem &item, const PreparedGrammar &grammar) {
|
||||
if (!contains(item_set, item)) {
|
||||
item_set.insert(item);
|
||||
for (auto pair : follow_sets(item, grammar)) {
|
||||
|
|
@ -27,7 +28,7 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
const ParseItemSet item_set_closure(const ParseItemSet &item_set, const Grammar &grammar) {
|
||||
const ParseItemSet item_set_closure(const ParseItemSet &item_set, const PreparedGrammar &grammar) {
|
||||
ParseItemSet result;
|
||||
for (ParseItem item : item_set)
|
||||
add_item(result, item, grammar);
|
||||
|
|
|
|||
|
|
@ -4,10 +4,10 @@
|
|||
#include "item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
class PreparedGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
const ParseItemSet item_set_closure(const ParseItemSet &item_set, const Grammar &grammar);
|
||||
const ParseItemSet item_set_closure(const ParseItemSet &item_set, const PreparedGrammar &grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ namespace tree_sitter {
|
|||
return result;
|
||||
}
|
||||
|
||||
map<Symbol, ParseItemSet> sym_transitions(const ParseItem &item, const Grammar &grammar) {
|
||||
map<Symbol, ParseItemSet> sym_transitions(const ParseItem &item, const PreparedGrammar &grammar) {
|
||||
map<Symbol, ParseItemSet> result;
|
||||
for (auto transition : sym_transitions(item.rule)) {
|
||||
Symbol rule = transition.first;
|
||||
|
|
@ -38,7 +38,7 @@ namespace tree_sitter {
|
|||
return result;
|
||||
}
|
||||
|
||||
map<CharacterSet, LexItemSet> char_transitions(const LexItemSet &item_set, const Grammar &grammar) {
|
||||
map<CharacterSet, LexItemSet> char_transitions(const LexItemSet &item_set, const PreparedGrammar &grammar) {
|
||||
map<CharacterSet, LexItemSet> result;
|
||||
for (const LexItem &item : item_set) {
|
||||
map<CharacterSet, LexItemSet> item_transitions = char_transitions(item);
|
||||
|
|
@ -49,7 +49,7 @@ namespace tree_sitter {
|
|||
return result;
|
||||
}
|
||||
|
||||
map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set, const Grammar &grammar) {
|
||||
map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set, const PreparedGrammar &grammar) {
|
||||
map<Symbol, ParseItemSet> result;
|
||||
for (const ParseItem &item : item_set) {
|
||||
map<Symbol, ParseItemSet> item_transitions = sym_transitions(item, grammar);
|
||||
|
|
|
|||
|
|
@ -5,14 +5,15 @@
|
|||
#include <map>
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
namespace rules {
|
||||
class CharacterSet;
|
||||
class Symbol;
|
||||
}
|
||||
|
||||
|
||||
namespace build_tables {
|
||||
std::map<rules::CharacterSet, LexItemSet> char_transitions(const LexItemSet &item_set, const Grammar &grammar);
|
||||
std::map<rules::Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set, const Grammar &grammar);
|
||||
std::map<rules::CharacterSet, LexItemSet> char_transitions(const LexItemSet &item_set, const PreparedGrammar &grammar);
|
||||
std::map<rules::Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set, const PreparedGrammar &grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include "./perform.h"
|
||||
#include "prepared_grammar.h"
|
||||
#include "item.h"
|
||||
#include "item_set_closure.h"
|
||||
#include "item_set_transitions.h"
|
||||
|
|
@ -18,8 +19,8 @@ namespace tree_sitter {
|
|||
static Symbol END_OF_INPUT("end", rules::SymbolTypeAuxiliary);
|
||||
|
||||
class TableBuilder {
|
||||
const Grammar grammar;
|
||||
const Grammar lex_grammar;
|
||||
const PreparedGrammar grammar;
|
||||
const PreparedGrammar lex_grammar;
|
||||
map<const ParseItemSet, size_t> parse_state_indices;
|
||||
map<const LexItemSet, size_t> lex_state_indices;
|
||||
ParseTable parse_table;
|
||||
|
|
@ -126,7 +127,7 @@ namespace tree_sitter {
|
|||
|
||||
public:
|
||||
|
||||
TableBuilder(const Grammar &grammar, const Grammar &lex_grammar) :
|
||||
TableBuilder(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) :
|
||||
grammar(grammar),
|
||||
lex_grammar(lex_grammar) {};
|
||||
|
||||
|
|
@ -138,7 +139,7 @@ namespace tree_sitter {
|
|||
}
|
||||
};
|
||||
|
||||
pair<ParseTable, LexTable> perform(const Grammar &grammar, const Grammar &lex_grammar) {
|
||||
pair<ParseTable, LexTable> perform(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) {
|
||||
return TableBuilder(grammar, lex_grammar).build();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,10 +5,10 @@
|
|||
#include "lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
class PreparedGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
std::pair<ParseTable, LexTable> perform(const Grammar &grammar, const Grammar &lex_grammar);
|
||||
std::pair<ParseTable, LexTable> perform(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#include "rule_can_be_blank.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "prepared_grammar.h"
|
||||
#include "rules/symbol.h"
|
||||
#include "rules/visitor.h"
|
||||
#include "rules/seq.h"
|
||||
|
|
@ -41,7 +42,7 @@ namespace tree_sitter {
|
|||
return visitor.value;
|
||||
}
|
||||
|
||||
bool rule_can_be_blank(const rule_ptr &rule, const Grammar &grammar) {
|
||||
bool rule_can_be_blank(const rule_ptr &rule, const PreparedGrammar &grammar) {
|
||||
if (rule_can_be_blank(rule)) return true;
|
||||
auto symbol = std::dynamic_pointer_cast<const Symbol>(rule);
|
||||
return (symbol.get() && grammar.has_definition(*symbol) && rule_can_be_blank(grammar.rule(*symbol), grammar));
|
||||
|
|
|
|||
|
|
@ -4,9 +4,11 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule);
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule, const Grammar &grammar);
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule, const PreparedGrammar &grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#include "prepare_grammar/perform.h"
|
||||
#include "build_tables/perform.h"
|
||||
#include "generate_code/c_code.h"
|
||||
#include "prepared_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
std::string compile(const Grammar &grammar, std::string name) {
|
||||
|
|
|
|||
|
|
@ -1,63 +1,31 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
#include "rules/symbol.h"
|
||||
#include "rules/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::vector;
|
||||
using std::string;
|
||||
using std::pair;
|
||||
using std::initializer_list;
|
||||
using std::map;
|
||||
using std::ostream;
|
||||
using rules::rule_ptr;
|
||||
using rules::Symbol;
|
||||
|
||||
Grammar::Grammar(std::string start_rule_name,
|
||||
const std::map<const std::string, const rule_ptr> &rules) :
|
||||
Grammar::Grammar(std::string start_rule_name, const map<const string, const rule_ptr> &rules) :
|
||||
start_rule_name(start_rule_name),
|
||||
rules(rules) {}
|
||||
|
||||
Grammar::Grammar(std::string start_rule_name,
|
||||
const map<const string, const rule_ptr> &rules,
|
||||
const map<const string, const rule_ptr> &aux_rules) :
|
||||
start_rule_name(start_rule_name),
|
||||
rules(rules),
|
||||
aux_rules(aux_rules) {}
|
||||
|
||||
const rule_ptr Grammar::rule(const Symbol &symbol) const {
|
||||
auto map = symbol.is_auxiliary() ? aux_rules : rules;
|
||||
auto iter = map.find(symbol.name);
|
||||
if (iter != map.end())
|
||||
return iter->second;
|
||||
else
|
||||
return rule_ptr();
|
||||
}
|
||||
|
||||
bool Grammar::operator==(const Grammar &other) const {
|
||||
if (other.start_rule_name != start_rule_name) return false;
|
||||
if (other.rules.size() != rules.size()) return false;
|
||||
if (other.aux_rules.size() != aux_rules.size()) return false;
|
||||
|
||||
for (auto pair : rules) {
|
||||
auto other_pair = other.rules.find(pair.first);
|
||||
if (other_pair == other.rules.end()) return false;
|
||||
if (!other_pair->second->operator==(*pair.second)) return false;
|
||||
}
|
||||
for (auto pair : aux_rules) {
|
||||
auto other_pair = other.aux_rules.find(pair.first);
|
||||
if (other_pair == other.aux_rules.end()) return false;
|
||||
if (!other_pair->second->operator==(*pair.second)) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Grammar::has_definition(const Symbol &symbol) const {
|
||||
return rule(symbol).get() != nullptr;
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const Grammar &grammar) {
|
||||
stream << string("#<grammar");
|
||||
|
||||
stream << string(" rules: {");
|
||||
bool started = false;
|
||||
for (auto pair : grammar.rules) {
|
||||
|
|
@ -67,19 +35,6 @@ namespace tree_sitter {
|
|||
stream << pair.second;
|
||||
started = true;
|
||||
}
|
||||
stream << string("}");
|
||||
|
||||
stream << string(" aux_rules: {");
|
||||
started = false;
|
||||
for (auto pair : grammar.aux_rules) {
|
||||
if (started) stream << string(", ");
|
||||
stream << pair.first;
|
||||
stream << string(" => ");
|
||||
stream << pair.second;
|
||||
started = true;
|
||||
}
|
||||
stream << string("}");
|
||||
|
||||
return stream << string(">");
|
||||
return stream << string("}>");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,11 +1,12 @@
|
|||
#include "expand_repeats.h"
|
||||
#include <map>
|
||||
#include "prepared_grammar.h"
|
||||
#include "rules/visitor.h"
|
||||
#include "rules/seq.h"
|
||||
#include "rules/symbol.h"
|
||||
#include "rules/choice.h"
|
||||
#include "rules/blank.h"
|
||||
#include "rules/repeat.h"
|
||||
#include <map>
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
|
|
@ -51,7 +52,7 @@ namespace tree_sitter {
|
|||
}
|
||||
};
|
||||
|
||||
Grammar expand_repeats(const Grammar &grammar) {
|
||||
PreparedGrammar expand_repeats(const PreparedGrammar &grammar) {
|
||||
map<const string, const rule_ptr> rules;
|
||||
map<const string, const rule_ptr> aux_rules(grammar.aux_rules);
|
||||
RepeatExpander visitor;
|
||||
|
|
@ -61,7 +62,7 @@ namespace tree_sitter {
|
|||
|
||||
aux_rules.insert(visitor.aux_rules.begin(), visitor.aux_rules.end());
|
||||
|
||||
return Grammar(grammar.start_rule_name, rules, aux_rules);
|
||||
return PreparedGrammar(grammar.start_rule_name, rules, aux_rules);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4,8 +4,10 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
Grammar expand_repeats(const Grammar &);
|
||||
PreparedGrammar expand_repeats(const PreparedGrammar &);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,14 @@
|
|||
#include "extract_tokens.h"
|
||||
#include "search_for_symbols.h"
|
||||
#include <map>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "prepared_grammar.h"
|
||||
#include "rules/visitor.h"
|
||||
#include "rules/seq.h"
|
||||
#include "rules/choice.h"
|
||||
#include "rules/repeat.h"
|
||||
#include "rules/blank.h"
|
||||
#include "rules/symbol.h"
|
||||
#include <map>
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::pair;
|
||||
|
|
@ -67,7 +68,7 @@ namespace tree_sitter {
|
|||
}
|
||||
};
|
||||
|
||||
pair<Grammar, Grammar> extract_tokens(const Grammar &input_grammar) {
|
||||
pair<PreparedGrammar, PreparedGrammar> extract_tokens(const PreparedGrammar &input_grammar) {
|
||||
TokenExtractor extractor;
|
||||
map<const string, const rule_ptr> rules;
|
||||
map<const string, const rule_ptr> tokens;
|
||||
|
|
@ -97,8 +98,8 @@ namespace tree_sitter {
|
|||
aux_tokens.insert(extractor.tokens.begin(), extractor.tokens.end());
|
||||
|
||||
return {
|
||||
Grammar(input_grammar.start_rule_name, rules, aux_rules),
|
||||
Grammar("", tokens, aux_tokens)
|
||||
PreparedGrammar(input_grammar.start_rule_name, rules, aux_rules),
|
||||
PreparedGrammar("", tokens, aux_tokens)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,10 +4,10 @@
|
|||
#include <utility>
|
||||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
class PreparedGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
std::pair<Grammar, Grammar> extract_tokens(const Grammar &);
|
||||
std::pair<PreparedGrammar, PreparedGrammar> extract_tokens(const PreparedGrammar &);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
#include "prepared_grammar.h"
|
||||
#include "./perform.h"
|
||||
#include "./extract_tokens.h"
|
||||
#include "./expand_repeats.h"
|
||||
|
|
@ -6,8 +7,9 @@ namespace tree_sitter {
|
|||
using std::pair;
|
||||
|
||||
namespace prepare_grammar {
|
||||
pair<Grammar, Grammar> perform(const Grammar &input_grammar) {
|
||||
auto grammars = prepare_grammar::extract_tokens(input_grammar);
|
||||
pair<PreparedGrammar, PreparedGrammar> perform(const Grammar &input_grammar) {
|
||||
PreparedGrammar grammar(input_grammar);
|
||||
auto grammars = prepare_grammar::extract_tokens(grammar);
|
||||
auto rule_grammar = expand_repeats(grammars.first);
|
||||
auto lex_grammar = grammars.second;
|
||||
return { rule_grammar, lex_grammar };
|
||||
|
|
|
|||
|
|
@ -5,9 +5,10 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
class PreparedGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
std::pair<Grammar, Grammar> perform(const Grammar &);
|
||||
std::pair<PreparedGrammar, PreparedGrammar> perform(const Grammar &);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
88
src/compiler/prepared_grammar.cpp
Normal file
88
src/compiler/prepared_grammar.cpp
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
#include "prepared_grammar.h"
|
||||
#include "rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::initializer_list;
|
||||
using std::pair;
|
||||
using std::map;
|
||||
using std::ostream;
|
||||
using rules::rule_ptr;
|
||||
using rules::Symbol;
|
||||
|
||||
PreparedGrammar::PreparedGrammar(std::string start_rule_name,
|
||||
const map<const string, const rule_ptr> &rules,
|
||||
const map<const string, const rule_ptr> &aux_rules) :
|
||||
Grammar(start_rule_name, rules),
|
||||
aux_rules(aux_rules) {}
|
||||
|
||||
PreparedGrammar::PreparedGrammar(std::string start_rule_name,
|
||||
const initializer_list<pair<const string, const rule_ptr>> &rules,
|
||||
const initializer_list<pair<const string, const rule_ptr>> &aux_rules) :
|
||||
Grammar(start_rule_name, rules),
|
||||
aux_rules(aux_rules) {}
|
||||
|
||||
PreparedGrammar::PreparedGrammar(const Grammar &grammar) :
|
||||
Grammar(grammar),
|
||||
aux_rules({}) {}
|
||||
|
||||
const rule_ptr PreparedGrammar::rule(const Symbol &symbol) const {
|
||||
auto map = symbol.is_auxiliary() ? aux_rules : rules;
|
||||
auto iter = map.find(symbol.name);
|
||||
if (iter != map.end())
|
||||
return iter->second;
|
||||
else
|
||||
return rule_ptr();
|
||||
}
|
||||
|
||||
bool PreparedGrammar::operator==(const PreparedGrammar &other) const {
|
||||
if (other.start_rule_name != start_rule_name) return false;
|
||||
if (other.rules.size() != rules.size()) return false;
|
||||
if (other.aux_rules.size() != aux_rules.size()) return false;
|
||||
|
||||
for (auto pair : rules) {
|
||||
auto other_pair = other.rules.find(pair.first);
|
||||
if (other_pair == other.rules.end()) return false;
|
||||
if (!other_pair->second->operator==(*pair.second)) return false;
|
||||
}
|
||||
for (auto pair : aux_rules) {
|
||||
auto other_pair = other.aux_rules.find(pair.first);
|
||||
if (other_pair == other.aux_rules.end()) return false;
|
||||
if (!other_pair->second->operator==(*pair.second)) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PreparedGrammar::has_definition(const Symbol &symbol) const {
|
||||
return rule(symbol).get() != nullptr;
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const PreparedGrammar &grammar) {
|
||||
stream << string("#<grammar");
|
||||
|
||||
stream << string(" rules: {");
|
||||
bool started = false;
|
||||
for (auto pair : grammar.rules) {
|
||||
if (started) stream << string(", ");
|
||||
stream << pair.first;
|
||||
stream << string(" => ");
|
||||
stream << pair.second;
|
||||
started = true;
|
||||
}
|
||||
stream << string("}");
|
||||
|
||||
stream << string(" aux_rules: {");
|
||||
started = false;
|
||||
for (auto pair : grammar.aux_rules) {
|
||||
if (started) stream << string(", ");
|
||||
stream << pair.first;
|
||||
stream << string(" => ");
|
||||
stream << pair.second;
|
||||
started = true;
|
||||
}
|
||||
stream << string("}");
|
||||
|
||||
return stream << string(">");
|
||||
}
|
||||
}
|
||||
29
src/compiler/prepared_grammar.h
Normal file
29
src/compiler/prepared_grammar.h
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
#ifndef __tree_sitter__prepared_grammar__
|
||||
#define __tree_sitter__prepared_grammar__
|
||||
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar : public Grammar {
|
||||
public:
|
||||
PreparedGrammar(std::string start_rule_name,
|
||||
const std::map<const std::string, const rules::rule_ptr> &rules,
|
||||
const std::map<const std::string, const rules::rule_ptr> &aux_rules);
|
||||
PreparedGrammar(std::string start_rule_name,
|
||||
const std::initializer_list<std::pair<const std::string, const rules::rule_ptr>> &rules,
|
||||
const std::initializer_list<std::pair<const std::string, const rules::rule_ptr>> &aux_rules);
|
||||
PreparedGrammar(const Grammar &grammar);
|
||||
|
||||
bool operator==(const PreparedGrammar &other) const;
|
||||
bool has_definition(const rules::Symbol &symbol) const;
|
||||
const rules::rule_ptr rule(const rules::Symbol &symbol) const;
|
||||
|
||||
const std::map<const std::string, const rules::rule_ptr> aux_rules;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const PreparedGrammar &grammar);
|
||||
|
||||
std::string compile(const Grammar &grammar, std::string name);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -16,6 +16,7 @@
|
|||
1214930E181E200B008E9BDA /* main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 121492E9181E200B008E9BDA /* main.cpp */; };
|
||||
1225CC6418765693000D4723 /* prepare_grammar_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1225CC6318765693000D4723 /* prepare_grammar_spec.cpp */; };
|
||||
1236A7C518B287DC00593ABB /* character_range.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1236A7C318B287DC00593ABB /* character_range.cpp */; };
|
||||
1236A7D218B554C800593ABB /* prepared_grammar.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1236A7D018B554C800593ABB /* prepared_grammar.cpp */; };
|
||||
1251209B1830145300C9B56A /* rule.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209A1830145300C9B56A /* rule.cpp */; };
|
||||
12661BF418A1505A00A259FB /* character_set_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12661BF318A1505A00A259FB /* character_set_spec.cpp */; };
|
||||
127528B318AACAAA006B682B /* rule_can_be_blank.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 127528B118AACAAA006B682B /* rule_can_be_blank.cpp */; };
|
||||
|
|
@ -102,6 +103,8 @@
|
|||
1236A7CA18B2ABB900593ABB /* equals_pointer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = equals_pointer.h; sourceTree = "<group>"; };
|
||||
1236A7CC18B2B4F000593ABB /* Makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = Makefile; sourceTree = "<group>"; };
|
||||
1236A7CE18B3CC4800593ABB /* .travis.yml */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .travis.yml; sourceTree = "<group>"; };
|
||||
1236A7D018B554C800593ABB /* prepared_grammar.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = prepared_grammar.cpp; sourceTree = "<group>"; };
|
||||
1236A7D118B554C800593ABB /* prepared_grammar.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = prepared_grammar.h; sourceTree = "<group>"; };
|
||||
1251209A1830145300C9B56A /* rule.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rule.cpp; sourceTree = "<group>"; };
|
||||
125120A3183083BD00C9B56A /* arithmetic.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; lineEnding = 0; path = arithmetic.hpp; sourceTree = "<group>"; xcLanguageSpecificationIdentifier = xcode.lang.cpp; };
|
||||
12661BF318A1505A00A259FB /* character_set_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = character_set_spec.cpp; path = spec/compiler/rules/character_set_spec.cpp; sourceTree = SOURCE_ROOT; };
|
||||
|
|
@ -384,6 +387,8 @@
|
|||
12EDCF9D18820116005A7A07 /* parse_table.cpp */,
|
||||
12EDCF9E18820116005A7A07 /* parse_table.h */,
|
||||
12ED72A5186FC6D90089229B /* prepare_grammar */,
|
||||
1236A7D018B554C800593ABB /* prepared_grammar.cpp */,
|
||||
1236A7D118B554C800593ABB /* prepared_grammar.h */,
|
||||
12130602182C344400FCF928 /* rules */,
|
||||
);
|
||||
path = compiler;
|
||||
|
|
@ -535,6 +540,7 @@
|
|||
1225CC6418765693000D4723 /* prepare_grammar_spec.cpp in Sources */,
|
||||
12EDCF9A1881FCD9005A7A07 /* search_for_symbols.cpp in Sources */,
|
||||
12EDCFB21882039A005A7A07 /* perform.cpp in Sources */,
|
||||
1236A7D218B554C800593ABB /* prepared_grammar.cpp in Sources */,
|
||||
12FD40E718639B910041A84E /* visitor.cpp in Sources */,
|
||||
12EDCF991881FCD9005A7A07 /* perform.cpp in Sources */,
|
||||
12EDCFBC188205BF005A7A07 /* rule_transitions_spec.cpp in Sources */,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue