Store grammars' rules in vectors, not maps
This way, we can keep of the order in which the rules were given, for resolving reduce/reduce conflicts.
This commit is contained in:
parent
671f1a1ddc
commit
48baf056b4
18 changed files with 525 additions and 533 deletions
|
|
@ -5,7 +5,7 @@ namespace tree_sitter {
|
|||
using namespace tree_sitter::rules;
|
||||
|
||||
Grammar arithmetic() {
|
||||
return Grammar("expression", {
|
||||
return Grammar({
|
||||
{ "expression", choice({
|
||||
seq({
|
||||
sym("term"),
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
Grammar javascript() {
|
||||
return Grammar("program", {
|
||||
return Grammar({
|
||||
{ "program", repeat(sym("statement")) },
|
||||
{ "terminator", choice({ str(";"), str("\n") }) },
|
||||
{ "statement", choice({
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
Grammar json() {
|
||||
return Grammar("value", {
|
||||
return Grammar({
|
||||
{ "value", choice({
|
||||
sym("object"),
|
||||
sym("array"),
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -387,8 +387,8 @@ PARSE_TABLE() {
|
|||
STATE(7);
|
||||
SET_LEX_STATE(1);
|
||||
SHIFT(ts_sym_comma, 8)
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper2, 49)
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper1, 49)
|
||||
END_STATE();
|
||||
|
||||
STATE(8);
|
||||
|
|
@ -419,13 +419,13 @@ PARSE_TABLE() {
|
|||
STATE(11);
|
||||
SET_LEX_STATE(1);
|
||||
SHIFT(ts_sym_comma, 8)
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper2, 12)
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper1, 12)
|
||||
END_STATE();
|
||||
|
||||
STATE(12);
|
||||
SET_LEX_STATE(4);
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 5, COLLAPSE({1, 0, 1, 0, 1}))
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 5, COLLAPSE({1, 0, 1, 0, 1}))
|
||||
END_STATE();
|
||||
|
||||
STATE(13);
|
||||
|
|
@ -457,8 +457,8 @@ PARSE_TABLE() {
|
|||
STATE(16);
|
||||
SET_LEX_STATE(1);
|
||||
SHIFT(ts_sym_comma, 8)
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper2, 17)
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper1, 17)
|
||||
END_STATE();
|
||||
|
||||
STATE(17);
|
||||
|
|
@ -497,8 +497,8 @@ PARSE_TABLE() {
|
|||
STATE(21);
|
||||
SET_LEX_STATE(5);
|
||||
SHIFT(ts_sym_comma, 22)
|
||||
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper1, 40)
|
||||
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper2, 40)
|
||||
END_STATE();
|
||||
|
||||
STATE(22);
|
||||
|
|
@ -519,13 +519,13 @@ PARSE_TABLE() {
|
|||
STATE(23);
|
||||
SET_LEX_STATE(5);
|
||||
SHIFT(ts_sym_comma, 22)
|
||||
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper1, 24)
|
||||
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper2, 24)
|
||||
END_STATE();
|
||||
|
||||
STATE(24);
|
||||
SET_LEX_STATE(7);
|
||||
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper1, 3, COLLAPSE({1, 0, 1}))
|
||||
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper2, 3, COLLAPSE({1, 0, 1}))
|
||||
END_STATE();
|
||||
|
||||
STATE(25);
|
||||
|
|
@ -557,8 +557,8 @@ PARSE_TABLE() {
|
|||
STATE(28);
|
||||
SET_LEX_STATE(1);
|
||||
SHIFT(ts_sym_comma, 8)
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper2, 29)
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper1, 29)
|
||||
END_STATE();
|
||||
|
||||
STATE(29);
|
||||
|
|
@ -581,8 +581,8 @@ PARSE_TABLE() {
|
|||
STATE(32);
|
||||
SET_LEX_STATE(1);
|
||||
SHIFT(ts_sym_comma, 8)
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper2, 33)
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper1, 33)
|
||||
END_STATE();
|
||||
|
||||
STATE(33);
|
||||
|
|
@ -615,8 +615,8 @@ PARSE_TABLE() {
|
|||
STATE(36);
|
||||
SET_LEX_STATE(5);
|
||||
SHIFT(ts_sym_comma, 22)
|
||||
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper1, 37)
|
||||
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper2, 37)
|
||||
END_STATE();
|
||||
|
||||
STATE(37);
|
||||
|
|
@ -662,8 +662,8 @@ PARSE_TABLE() {
|
|||
STATE(44);
|
||||
SET_LEX_STATE(1);
|
||||
SHIFT(ts_sym_comma, 8)
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper2, 45)
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper1, 45)
|
||||
END_STATE();
|
||||
|
||||
STATE(45);
|
||||
|
|
@ -680,13 +680,13 @@ PARSE_TABLE() {
|
|||
STATE(47);
|
||||
SET_LEX_STATE(1);
|
||||
SHIFT(ts_sym_comma, 8)
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper2, 48)
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper1, 48)
|
||||
END_STATE();
|
||||
|
||||
STATE(48);
|
||||
SET_LEX_STATE(4);
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 3, COLLAPSE({1, 0, 1}))
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 3, COLLAPSE({1, 0, 1}))
|
||||
END_STATE();
|
||||
|
||||
STATE(49);
|
||||
|
|
@ -707,8 +707,8 @@ PARSE_TABLE() {
|
|||
STATE(52);
|
||||
SET_LEX_STATE(1);
|
||||
SHIFT(ts_sym_comma, 8)
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper2, 53)
|
||||
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper1, 53)
|
||||
END_STATE();
|
||||
|
||||
STATE(53);
|
||||
|
|
@ -740,8 +740,8 @@ PARSE_TABLE() {
|
|||
STATE(56);
|
||||
SET_LEX_STATE(5);
|
||||
SHIFT(ts_sym_comma, 22)
|
||||
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper1, 57)
|
||||
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
|
||||
SHIFT(ts_aux_sym_repeat_helper2, 57)
|
||||
END_STATE();
|
||||
|
||||
STATE(57);
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#define TREE_SITTER_COMPILER_H_
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
|
|
@ -26,10 +25,11 @@ namespace tree_sitter {
|
|||
|
||||
class Grammar {
|
||||
public:
|
||||
Grammar(std::string start_rule_name, const std::map<const std::string, const rules::rule_ptr> &rules);
|
||||
Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules);
|
||||
bool operator==(const Grammar &other) const;
|
||||
const std::string start_rule_name;
|
||||
const std::map<const std::string, const rules::rule_ptr> rules;
|
||||
std::string start_rule_name() const;
|
||||
const rules::rule_ptr rule(const std::string &name) const;
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> rules;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const Grammar &grammar);
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ static set<Symbol> keys(const map<Symbol, parse_actions> &map) {
|
|||
START_TEST
|
||||
|
||||
describe("building parse and lex tables", []() {
|
||||
PreparedGrammar grammar("expression", {
|
||||
PreparedGrammar grammar({
|
||||
{ "expression", choice({
|
||||
seq({
|
||||
sym("term"),
|
||||
|
|
@ -37,7 +37,7 @@ describe("building parse and lex tables", []() {
|
|||
}) }) }
|
||||
}, {});
|
||||
|
||||
PreparedGrammar lex_grammar("", {
|
||||
PreparedGrammar lex_grammar({
|
||||
{ "plus", str("+") },
|
||||
{ "variable", pattern("\\w+") },
|
||||
{ "number", pattern("\\d+") },
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ using namespace rules;
|
|||
START_TEST
|
||||
|
||||
describe("computing FIRST sets", []() {
|
||||
const PreparedGrammar null_grammar("", {}, {});
|
||||
const PreparedGrammar null_grammar({}, {});
|
||||
|
||||
describe("for a sequence AB", [&]() {
|
||||
it("ignores B when A cannot be blank", [&]() {
|
||||
|
|
@ -41,7 +41,7 @@ describe("computing FIRST sets", []() {
|
|||
sym("A") }),
|
||||
sym("A") });
|
||||
|
||||
Grammar grammar("A", {
|
||||
Grammar grammar({
|
||||
{ "A", choice({
|
||||
seq({
|
||||
sym("y"),
|
||||
|
|
@ -56,7 +56,7 @@ describe("computing FIRST sets", []() {
|
|||
});
|
||||
|
||||
it("includes FIRST(B) when A is a non-terminal and its expansion can be blank", [&]() {
|
||||
Grammar grammar("A", {{ "A", choice({ sym("x"), blank() }) }});
|
||||
Grammar grammar({{ "A", choice({ sym("x"), blank() }) }});
|
||||
|
||||
auto rule = seq({
|
||||
sym("A"),
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ using namespace rules;
|
|||
START_TEST
|
||||
|
||||
describe("computing FOLLOW sets", []() {
|
||||
const PreparedGrammar grammar("", {
|
||||
const PreparedGrammar grammar({
|
||||
{ "A", sym("a") },
|
||||
{ "B", sym("b") },
|
||||
}, {});
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ using namespace rules;
|
|||
START_TEST
|
||||
|
||||
describe("computing closures of item sets", []() {
|
||||
PreparedGrammar grammar("E", {
|
||||
PreparedGrammar grammar({
|
||||
{ "E", choice({
|
||||
seq({
|
||||
sym("T"),
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ using prepare_grammar::prepare_grammar;
|
|||
describe("preparing a grammar", []() {
|
||||
describe("extracting tokens", []() {
|
||||
it("moves strings and patterns into a separate 'lexical' grammar", [&]() {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = prepare_grammar(Grammar("rule1", {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = prepare_grammar(Grammar({
|
||||
{ "rule1", seq({
|
||||
str("ab"),
|
||||
seq({
|
||||
|
|
@ -20,7 +20,7 @@ describe("preparing a grammar", []() {
|
|||
str("ab") }) }
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar("rule1", {
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "rule1", seq({
|
||||
make_shared<Symbol>("token1", SymbolTypeAuxiliary),
|
||||
seq({
|
||||
|
|
@ -29,42 +29,42 @@ describe("preparing a grammar", []() {
|
|||
make_shared<Symbol>("token1", SymbolTypeAuxiliary) }) }
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar("", {}, {
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
{ "token1", str("ab") },
|
||||
})));
|
||||
});
|
||||
|
||||
it("moves entire rules into the lexical grammar when possible, preserving their names", [&]() {
|
||||
auto result = prepare_grammar(Grammar("rule1", {
|
||||
auto result = prepare_grammar(Grammar({
|
||||
{ "rule1", sym("rule2") },
|
||||
{ "rule2", pattern("a|b") }
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar("rule1", {
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "rule1", sym("rule2") }
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar("", {
|
||||
AssertThat(result.second, Equals(PreparedGrammar({
|
||||
{ "rule2", pattern("a|b") },
|
||||
}, {})));
|
||||
});
|
||||
|
||||
it("does not extract blanks into tokens", [&]() {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = prepare_grammar(Grammar("rule1", {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = prepare_grammar(Grammar({
|
||||
{ "rule1", choice({ sym("rule2"), blank() }) },
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar("rule1", {
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "rule1", choice({ sym("rule2"), blank() }) },
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar("", {}, {})));
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {})));
|
||||
});
|
||||
});
|
||||
|
||||
describe("expanding repeats", []() {
|
||||
it("replaces repeat rules with pairs of recursive rules", [&]() {
|
||||
PreparedGrammar result = prepare_grammar(Grammar("rule1", {
|
||||
PreparedGrammar result = prepare_grammar(Grammar({
|
||||
{ "rule1", seq({
|
||||
sym("x"),
|
||||
repeat(seq({ sym("a"), sym("b") })),
|
||||
|
|
@ -72,7 +72,7 @@ describe("preparing a grammar", []() {
|
|||
}) },
|
||||
})).first;
|
||||
|
||||
AssertThat(result, Equals(PreparedGrammar("rule1", {
|
||||
AssertThat(result, Equals(PreparedGrammar({
|
||||
{ "rule1", seq({
|
||||
sym("x"),
|
||||
make_shared<Symbol>("repeat_helper1", SymbolTypeAuxiliary),
|
||||
|
|
|
|||
|
|
@ -11,7 +11,9 @@ var x = {};
|
|||
==========================================
|
||||
parses if statements
|
||||
==========================================
|
||||
if (1) { var x = 2; }
|
||||
if (1) {
|
||||
var x = 2;
|
||||
}
|
||||
---
|
||||
(program
|
||||
(statement (if
|
||||
|
|
|
|||
|
|
@ -153,7 +153,7 @@ namespace tree_sitter {
|
|||
lex_grammar(lex_grammar) {}
|
||||
|
||||
pair<ParseTable, LexTable> build() {
|
||||
auto start_symbol = make_shared<Symbol>(grammar.start_rule_name);
|
||||
auto start_symbol = make_shared<Symbol>(grammar.start_rule_name());
|
||||
ParseItem item(rules::START, start_symbol, {}, rules::END_OF_INPUT);
|
||||
ParseItemSet item_set = item_set_closure(ParseItemSet({ item }), grammar);
|
||||
add_parse_state(item_set);
|
||||
|
|
|
|||
|
|
@ -3,26 +3,28 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::map;
|
||||
using std::ostream;
|
||||
using rules::rule_ptr;
|
||||
|
||||
Grammar::Grammar(std::string start_rule_name, const map<const string, const rule_ptr> &rules) :
|
||||
start_rule_name(start_rule_name),
|
||||
Grammar::Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules) :
|
||||
rules(rules) {}
|
||||
|
||||
bool Grammar::operator==(const Grammar &other) const {
|
||||
if (other.start_rule_name != start_rule_name) return false;
|
||||
if (other.rules.size() != rules.size()) return false;
|
||||
|
||||
for (auto pair : rules) {
|
||||
auto other_pair = other.rules.find(pair.first);
|
||||
if (other_pair == other.rules.end()) return false;
|
||||
if (!other_pair->second->operator==(*pair.second)) return false;
|
||||
for (size_t i = 0; i < rules.size(); i++) {
|
||||
auto &pair = rules[i];
|
||||
auto &other_pair = other.rules[i];
|
||||
if (other_pair.first != pair.first) return false;
|
||||
if (!other_pair.second->operator==(*pair.second)) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
string Grammar::start_rule_name() const {
|
||||
return rules.front().first;
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &stream, const Grammar &grammar) {
|
||||
stream << string("#<grammar");
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
#include "compiler/prepare_grammar/expand_repeats.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
|
@ -11,8 +11,9 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::pair;
|
||||
using std::to_string;
|
||||
using std::map;
|
||||
using std::make_shared;
|
||||
using rules::rule_ptr;
|
||||
using rules::Blank;
|
||||
|
|
@ -33,7 +34,7 @@ namespace tree_sitter {
|
|||
void visit(const Repeat *rule) {
|
||||
rule_ptr inner_rule = apply(rule->content);
|
||||
string helper_rule_name = string("repeat_helper") + to_string(aux_rules.size() + 1);
|
||||
aux_rules.insert({ helper_rule_name, make_repeat_helper(helper_rule_name, inner_rule) });
|
||||
aux_rules.push_back({ helper_rule_name, make_repeat_helper(helper_rule_name, inner_rule) });
|
||||
value = make_shared<Symbol>(helper_rule_name, rules::SymbolTypeAuxiliary);
|
||||
}
|
||||
|
||||
|
|
@ -50,19 +51,19 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
public:
|
||||
map<const string, const rule_ptr> aux_rules;
|
||||
vector<pair<string, const rules::rule_ptr>> aux_rules;
|
||||
};
|
||||
|
||||
PreparedGrammar expand_repeats(const PreparedGrammar &grammar) {
|
||||
map<const string, const rule_ptr> rules, aux_rules(grammar.aux_rules);
|
||||
vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules);
|
||||
ExpandRepeats expander;
|
||||
|
||||
for (auto &pair : grammar.rules)
|
||||
rules.insert({ pair.first, expander.apply(pair.second) });
|
||||
rules.push_back({ pair.first, expander.apply(pair.second) });
|
||||
|
||||
aux_rules.insert(expander.aux_rules.begin(), expander.aux_rules.end());
|
||||
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(), expander.aux_rules.end());
|
||||
|
||||
return PreparedGrammar(grammar.start_rule_name, rules, aux_rules);
|
||||
return PreparedGrammar(rules, aux_rules);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
#include "compiler/prepare_grammar/extract_tokens.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
|
@ -16,7 +16,7 @@ namespace tree_sitter {
|
|||
using std::pair;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::map;
|
||||
using std::vector;
|
||||
using std::make_shared;
|
||||
using rules::rule_ptr;
|
||||
|
||||
|
|
@ -41,7 +41,7 @@ namespace tree_sitter {
|
|||
if (*pair.second == *rule)
|
||||
return pair.first;
|
||||
string name = "token" + to_string(tokens.size() + 1);
|
||||
tokens.insert({ name, rule });
|
||||
tokens.push_back({ name, rule });
|
||||
return name;
|
||||
}
|
||||
|
||||
|
|
@ -67,36 +67,36 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
public:
|
||||
map<const string, const rule_ptr> tokens;
|
||||
vector<pair<string, const rules::rule_ptr>> tokens;
|
||||
};
|
||||
|
||||
pair<PreparedGrammar, PreparedGrammar> extract_tokens(const PreparedGrammar &input_grammar) {
|
||||
map<const string, const rule_ptr> rules, tokens, aux_rules, aux_tokens;
|
||||
vector<pair<string, rules::rule_ptr>> rules, tokens, aux_rules, aux_tokens;
|
||||
TokenExtractor extractor;
|
||||
|
||||
for (auto &pair : input_grammar.rules) {
|
||||
string name = pair.first;
|
||||
rule_ptr rule = pair.second;
|
||||
if (IsToken().apply(rule))
|
||||
tokens.insert({ name, rule });
|
||||
tokens.push_back({ name, rule });
|
||||
else
|
||||
rules.insert({ name, extractor.apply(rule) });
|
||||
rules.push_back({ name, extractor.apply(rule) });
|
||||
}
|
||||
|
||||
for (auto &pair : input_grammar.aux_rules) {
|
||||
string name = pair.first;
|
||||
rule_ptr rule = pair.second;
|
||||
if (IsToken().apply(rule))
|
||||
aux_tokens.insert({ name, rule });
|
||||
aux_tokens.push_back({ name, rule });
|
||||
else
|
||||
aux_rules.insert({ name, extractor.apply(rule) });
|
||||
aux_rules.push_back({ name, extractor.apply(rule) });
|
||||
}
|
||||
|
||||
aux_tokens.insert(extractor.tokens.begin(), extractor.tokens.end());
|
||||
aux_tokens.insert(aux_tokens.end(), extractor.tokens.begin(), extractor.tokens.end());
|
||||
|
||||
return {
|
||||
PreparedGrammar(input_grammar.start_rule_name, rules, aux_rules),
|
||||
PreparedGrammar("", tokens, aux_tokens)
|
||||
PreparedGrammar(rules, aux_rules),
|
||||
PreparedGrammar(tokens, aux_tokens)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,28 +1,19 @@
|
|||
#include "compiler/prepared_grammar.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
using std::initializer_list;
|
||||
using std::pair;
|
||||
using std::map;
|
||||
using std::ostream;
|
||||
using rules::rule_ptr;
|
||||
using rules::Symbol;
|
||||
|
||||
PreparedGrammar::PreparedGrammar(std::string start_rule_name,
|
||||
const map<const string, const rule_ptr> &rules,
|
||||
const map<const string, const rule_ptr> &aux_rules) :
|
||||
Grammar(start_rule_name, rules),
|
||||
aux_rules(aux_rules) {}
|
||||
|
||||
PreparedGrammar::PreparedGrammar(std::string start_rule_name,
|
||||
const initializer_list<pair<const string, const rule_ptr>> &rules,
|
||||
const initializer_list<pair<const string, const rule_ptr>> &aux_rules) :
|
||||
Grammar(start_rule_name, rules),
|
||||
PreparedGrammar::PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules) :
|
||||
Grammar(rules),
|
||||
aux_rules(aux_rules) {}
|
||||
|
||||
PreparedGrammar::PreparedGrammar(const Grammar &grammar) :
|
||||
|
|
@ -30,21 +21,21 @@ namespace tree_sitter {
|
|||
aux_rules({}) {}
|
||||
|
||||
const rule_ptr PreparedGrammar::rule(const Symbol &symbol) const {
|
||||
auto map = symbol.is_auxiliary() ? aux_rules : rules;
|
||||
auto iter = map.find(symbol.name);
|
||||
if (iter != map.end())
|
||||
return iter->second;
|
||||
else
|
||||
return rule_ptr();
|
||||
auto rule_set = symbol.is_auxiliary() ? aux_rules : rules;
|
||||
for (auto &pair : rule_set)
|
||||
if (pair.first == symbol.name)
|
||||
return pair.second;
|
||||
return rule_ptr();
|
||||
}
|
||||
|
||||
bool PreparedGrammar::operator==(const PreparedGrammar &other) const {
|
||||
if (!Grammar::operator==(other)) return false;
|
||||
if (other.aux_rules.size() != aux_rules.size()) return false;
|
||||
for (auto pair : aux_rules) {
|
||||
auto other_pair = other.aux_rules.find(pair.first);
|
||||
if (other_pair == other.aux_rules.end()) return false;
|
||||
if (!other_pair->second->operator==(*pair.second)) return false;
|
||||
for (size_t i = 0; i < aux_rules.size(); i++) {
|
||||
auto &pair = aux_rules[i];
|
||||
auto &other_pair = other.aux_rules[i];
|
||||
if (other_pair.first != pair.first) return false;
|
||||
if (!other_pair.second->operator==(*pair.second)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef COMPILER_PREPARED_GRAMMAR_H_
|
||||
#define COMPILER_PREPARED_GRAMMAR_H_
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
|
@ -10,19 +10,15 @@
|
|||
namespace tree_sitter {
|
||||
class PreparedGrammar : public Grammar {
|
||||
public:
|
||||
PreparedGrammar(std::string start_rule_name,
|
||||
const std::map<const std::string, const rules::rule_ptr> &rules,
|
||||
const std::map<const std::string, const rules::rule_ptr> &aux_rules);
|
||||
PreparedGrammar(std::string start_rule_name,
|
||||
const std::initializer_list<std::pair<const std::string, const rules::rule_ptr>> &rules,
|
||||
const std::initializer_list<std::pair<const std::string, const rules::rule_ptr>> &aux_rules);
|
||||
PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
|
||||
PreparedGrammar(const Grammar &grammar);
|
||||
|
||||
bool operator==(const PreparedGrammar &other) const;
|
||||
bool has_definition(const rules::Symbol &symbol) const;
|
||||
const rules::rule_ptr rule(const rules::Symbol &symbol) const;
|
||||
|
||||
const std::map<const std::string, const rules::rule_ptr> aux_rules;
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const PreparedGrammar &grammar);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue