Store grammars' rules in vectors, not maps

This way, we can keep of the order in which the rules
were given, for resolving reduce/reduce conflicts.
This commit is contained in:
Max Brunsfeld 2014-03-24 13:05:04 -07:00
parent 671f1a1ddc
commit 48baf056b4
18 changed files with 525 additions and 533 deletions

View file

@ -5,7 +5,7 @@ namespace tree_sitter {
using namespace tree_sitter::rules;
Grammar arithmetic() {
return Grammar("expression", {
return Grammar({
{ "expression", choice({
seq({
sym("term"),

View file

@ -15,7 +15,7 @@ namespace tree_sitter {
}
Grammar javascript() {
return Grammar("program", {
return Grammar({
{ "program", repeat(sym("statement")) },
{ "terminator", choice({ str(";"), str("\n") }) },
{ "statement", choice({

View file

@ -15,7 +15,7 @@ namespace tree_sitter {
}
Grammar json() {
return Grammar("value", {
return Grammar({
{ "value", choice({
sym("object"),
sym("array"),

File diff suppressed because it is too large Load diff

View file

@ -387,8 +387,8 @@ PARSE_TABLE() {
STATE(7);
SET_LEX_STATE(1);
SHIFT(ts_sym_comma, 8)
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper2, 49)
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper1, 49)
END_STATE();
STATE(8);
@ -419,13 +419,13 @@ PARSE_TABLE() {
STATE(11);
SET_LEX_STATE(1);
SHIFT(ts_sym_comma, 8)
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper2, 12)
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper1, 12)
END_STATE();
STATE(12);
SET_LEX_STATE(4);
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 5, COLLAPSE({1, 0, 1, 0, 1}))
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 5, COLLAPSE({1, 0, 1, 0, 1}))
END_STATE();
STATE(13);
@ -457,8 +457,8 @@ PARSE_TABLE() {
STATE(16);
SET_LEX_STATE(1);
SHIFT(ts_sym_comma, 8)
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper2, 17)
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper1, 17)
END_STATE();
STATE(17);
@ -497,8 +497,8 @@ PARSE_TABLE() {
STATE(21);
SET_LEX_STATE(5);
SHIFT(ts_sym_comma, 22)
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper1, 40)
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper2, 40)
END_STATE();
STATE(22);
@ -519,13 +519,13 @@ PARSE_TABLE() {
STATE(23);
SET_LEX_STATE(5);
SHIFT(ts_sym_comma, 22)
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper1, 24)
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper2, 24)
END_STATE();
STATE(24);
SET_LEX_STATE(7);
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper1, 3, COLLAPSE({1, 0, 1}))
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper2, 3, COLLAPSE({1, 0, 1}))
END_STATE();
STATE(25);
@ -557,8 +557,8 @@ PARSE_TABLE() {
STATE(28);
SET_LEX_STATE(1);
SHIFT(ts_sym_comma, 8)
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper2, 29)
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper1, 29)
END_STATE();
STATE(29);
@ -581,8 +581,8 @@ PARSE_TABLE() {
STATE(32);
SET_LEX_STATE(1);
SHIFT(ts_sym_comma, 8)
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper2, 33)
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper1, 33)
END_STATE();
STATE(33);
@ -615,8 +615,8 @@ PARSE_TABLE() {
STATE(36);
SET_LEX_STATE(5);
SHIFT(ts_sym_comma, 22)
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper1, 37)
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper2, 37)
END_STATE();
STATE(37);
@ -662,8 +662,8 @@ PARSE_TABLE() {
STATE(44);
SET_LEX_STATE(1);
SHIFT(ts_sym_comma, 8)
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper2, 45)
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper1, 45)
END_STATE();
STATE(45);
@ -680,13 +680,13 @@ PARSE_TABLE() {
STATE(47);
SET_LEX_STATE(1);
SHIFT(ts_sym_comma, 8)
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper2, 48)
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper1, 48)
END_STATE();
STATE(48);
SET_LEX_STATE(4);
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 3, COLLAPSE({1, 0, 1}))
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 3, COLLAPSE({1, 0, 1}))
END_STATE();
STATE(49);
@ -707,8 +707,8 @@ PARSE_TABLE() {
STATE(52);
SET_LEX_STATE(1);
SHIFT(ts_sym_comma, 8)
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper2, 53)
REDUCE(ts_sym_right_brace, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper1, 53)
END_STATE();
STATE(53);
@ -740,8 +740,8 @@ PARSE_TABLE() {
STATE(56);
SET_LEX_STATE(5);
SHIFT(ts_sym_comma, 22)
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper1, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper1, 57)
REDUCE(ts_sym_right_bracket, ts_aux_sym_repeat_helper2, 0, COLLAPSE({}))
SHIFT(ts_aux_sym_repeat_helper2, 57)
END_STATE();
STATE(57);

View file

@ -2,7 +2,6 @@
#define TREE_SITTER_COMPILER_H_
#include <vector>
#include <map>
#include <string>
#include <memory>
@ -26,10 +25,11 @@ namespace tree_sitter {
class Grammar {
public:
Grammar(std::string start_rule_name, const std::map<const std::string, const rules::rule_ptr> &rules);
Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules);
bool operator==(const Grammar &other) const;
const std::string start_rule_name;
const std::map<const std::string, const rules::rule_ptr> rules;
std::string start_rule_name() const;
const rules::rule_ptr rule(const std::string &name) const;
const std::vector<std::pair<std::string, rules::rule_ptr>> rules;
};
std::ostream& operator<<(std::ostream &stream, const Grammar &grammar);

View file

@ -20,7 +20,7 @@ static set<Symbol> keys(const map<Symbol, parse_actions> &map) {
START_TEST
describe("building parse and lex tables", []() {
PreparedGrammar grammar("expression", {
PreparedGrammar grammar({
{ "expression", choice({
seq({
sym("term"),
@ -37,7 +37,7 @@ describe("building parse and lex tables", []() {
}) }) }
}, {});
PreparedGrammar lex_grammar("", {
PreparedGrammar lex_grammar({
{ "plus", str("+") },
{ "variable", pattern("\\w+") },
{ "number", pattern("\\d+") },

View file

@ -9,7 +9,7 @@ using namespace rules;
START_TEST
describe("computing FIRST sets", []() {
const PreparedGrammar null_grammar("", {}, {});
const PreparedGrammar null_grammar({}, {});
describe("for a sequence AB", [&]() {
it("ignores B when A cannot be blank", [&]() {
@ -41,7 +41,7 @@ describe("computing FIRST sets", []() {
sym("A") }),
sym("A") });
Grammar grammar("A", {
Grammar grammar({
{ "A", choice({
seq({
sym("y"),
@ -56,7 +56,7 @@ describe("computing FIRST sets", []() {
});
it("includes FIRST(B) when A is a non-terminal and its expansion can be blank", [&]() {
Grammar grammar("A", {{ "A", choice({ sym("x"), blank() }) }});
Grammar grammar({{ "A", choice({ sym("x"), blank() }) }});
auto rule = seq({
sym("A"),

View file

@ -9,7 +9,7 @@ using namespace rules;
START_TEST
describe("computing FOLLOW sets", []() {
const PreparedGrammar grammar("", {
const PreparedGrammar grammar({
{ "A", sym("a") },
{ "B", sym("b") },
}, {});

View file

@ -9,7 +9,7 @@ using namespace rules;
START_TEST
describe("computing closures of item sets", []() {
PreparedGrammar grammar("E", {
PreparedGrammar grammar({
{ "E", choice({
seq({
sym("T"),

View file

@ -11,7 +11,7 @@ using prepare_grammar::prepare_grammar;
describe("preparing a grammar", []() {
describe("extracting tokens", []() {
it("moves strings and patterns into a separate 'lexical' grammar", [&]() {
pair<PreparedGrammar, PreparedGrammar> result = prepare_grammar(Grammar("rule1", {
pair<PreparedGrammar, PreparedGrammar> result = prepare_grammar(Grammar({
{ "rule1", seq({
str("ab"),
seq({
@ -20,7 +20,7 @@ describe("preparing a grammar", []() {
str("ab") }) }
}));
AssertThat(result.first, Equals(PreparedGrammar("rule1", {
AssertThat(result.first, Equals(PreparedGrammar({
{ "rule1", seq({
make_shared<Symbol>("token1", SymbolTypeAuxiliary),
seq({
@ -29,42 +29,42 @@ describe("preparing a grammar", []() {
make_shared<Symbol>("token1", SymbolTypeAuxiliary) }) }
}, {})));
AssertThat(result.second, Equals(PreparedGrammar("", {}, {
AssertThat(result.second, Equals(PreparedGrammar({}, {
{ "token1", str("ab") },
})));
});
it("moves entire rules into the lexical grammar when possible, preserving their names", [&]() {
auto result = prepare_grammar(Grammar("rule1", {
auto result = prepare_grammar(Grammar({
{ "rule1", sym("rule2") },
{ "rule2", pattern("a|b") }
}));
AssertThat(result.first, Equals(PreparedGrammar("rule1", {
AssertThat(result.first, Equals(PreparedGrammar({
{ "rule1", sym("rule2") }
}, {})));
AssertThat(result.second, Equals(PreparedGrammar("", {
AssertThat(result.second, Equals(PreparedGrammar({
{ "rule2", pattern("a|b") },
}, {})));
});
it("does not extract blanks into tokens", [&]() {
pair<PreparedGrammar, PreparedGrammar> result = prepare_grammar(Grammar("rule1", {
pair<PreparedGrammar, PreparedGrammar> result = prepare_grammar(Grammar({
{ "rule1", choice({ sym("rule2"), blank() }) },
}));
AssertThat(result.first, Equals(PreparedGrammar("rule1", {
AssertThat(result.first, Equals(PreparedGrammar({
{ "rule1", choice({ sym("rule2"), blank() }) },
}, {})));
AssertThat(result.second, Equals(PreparedGrammar("", {}, {})));
AssertThat(result.second, Equals(PreparedGrammar({}, {})));
});
});
describe("expanding repeats", []() {
it("replaces repeat rules with pairs of recursive rules", [&]() {
PreparedGrammar result = prepare_grammar(Grammar("rule1", {
PreparedGrammar result = prepare_grammar(Grammar({
{ "rule1", seq({
sym("x"),
repeat(seq({ sym("a"), sym("b") })),
@ -72,7 +72,7 @@ describe("preparing a grammar", []() {
}) },
})).first;
AssertThat(result, Equals(PreparedGrammar("rule1", {
AssertThat(result, Equals(PreparedGrammar({
{ "rule1", seq({
sym("x"),
make_shared<Symbol>("repeat_helper1", SymbolTypeAuxiliary),

View file

@ -11,7 +11,9 @@ var x = {};
==========================================
parses if statements
==========================================
if (1) { var x = 2; }
if (1) {
var x = 2;
}
---
(program
(statement (if

View file

@ -153,7 +153,7 @@ namespace tree_sitter {
lex_grammar(lex_grammar) {}
pair<ParseTable, LexTable> build() {
auto start_symbol = make_shared<Symbol>(grammar.start_rule_name);
auto start_symbol = make_shared<Symbol>(grammar.start_rule_name());
ParseItem item(rules::START, start_symbol, {}, rules::END_OF_INPUT);
ParseItemSet item_set = item_set_closure(ParseItemSet({ item }), grammar);
add_parse_state(item_set);

View file

@ -3,26 +3,28 @@
namespace tree_sitter {
using std::string;
using std::map;
using std::ostream;
using rules::rule_ptr;
Grammar::Grammar(std::string start_rule_name, const map<const string, const rule_ptr> &rules) :
start_rule_name(start_rule_name),
Grammar::Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules) :
rules(rules) {}
bool Grammar::operator==(const Grammar &other) const {
if (other.start_rule_name != start_rule_name) return false;
if (other.rules.size() != rules.size()) return false;
for (auto pair : rules) {
auto other_pair = other.rules.find(pair.first);
if (other_pair == other.rules.end()) return false;
if (!other_pair->second->operator==(*pair.second)) return false;
for (size_t i = 0; i < rules.size(); i++) {
auto &pair = rules[i];
auto &other_pair = other.rules[i];
if (other_pair.first != pair.first) return false;
if (!other_pair.second->operator==(*pair.second)) return false;
}
return true;
}
string Grammar::start_rule_name() const {
return rules.front().first;
}
ostream& operator<<(ostream &stream, const Grammar &grammar) {
stream << string("#<grammar");

View file

@ -1,5 +1,5 @@
#include "compiler/prepare_grammar/expand_repeats.h"
#include <map>
#include <vector>
#include <string>
#include "compiler/prepared_grammar.h"
#include "compiler/rules/visitor.h"
@ -11,8 +11,9 @@
namespace tree_sitter {
using std::string;
using std::vector;
using std::pair;
using std::to_string;
using std::map;
using std::make_shared;
using rules::rule_ptr;
using rules::Blank;
@ -33,7 +34,7 @@ namespace tree_sitter {
void visit(const Repeat *rule) {
rule_ptr inner_rule = apply(rule->content);
string helper_rule_name = string("repeat_helper") + to_string(aux_rules.size() + 1);
aux_rules.insert({ helper_rule_name, make_repeat_helper(helper_rule_name, inner_rule) });
aux_rules.push_back({ helper_rule_name, make_repeat_helper(helper_rule_name, inner_rule) });
value = make_shared<Symbol>(helper_rule_name, rules::SymbolTypeAuxiliary);
}
@ -50,19 +51,19 @@ namespace tree_sitter {
}
public:
map<const string, const rule_ptr> aux_rules;
vector<pair<string, const rules::rule_ptr>> aux_rules;
};
PreparedGrammar expand_repeats(const PreparedGrammar &grammar) {
map<const string, const rule_ptr> rules, aux_rules(grammar.aux_rules);
vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules);
ExpandRepeats expander;
for (auto &pair : grammar.rules)
rules.insert({ pair.first, expander.apply(pair.second) });
rules.push_back({ pair.first, expander.apply(pair.second) });
aux_rules.insert(expander.aux_rules.begin(), expander.aux_rules.end());
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(), expander.aux_rules.end());
return PreparedGrammar(grammar.start_rule_name, rules, aux_rules);
return PreparedGrammar(rules, aux_rules);
}
}
}

View file

@ -1,5 +1,5 @@
#include "compiler/prepare_grammar/extract_tokens.h"
#include <map>
#include <vector>
#include <string>
#include "tree_sitter/compiler.h"
#include "compiler/prepared_grammar.h"
@ -16,7 +16,7 @@ namespace tree_sitter {
using std::pair;
using std::string;
using std::to_string;
using std::map;
using std::vector;
using std::make_shared;
using rules::rule_ptr;
@ -41,7 +41,7 @@ namespace tree_sitter {
if (*pair.second == *rule)
return pair.first;
string name = "token" + to_string(tokens.size() + 1);
tokens.insert({ name, rule });
tokens.push_back({ name, rule });
return name;
}
@ -67,36 +67,36 @@ namespace tree_sitter {
}
public:
map<const string, const rule_ptr> tokens;
vector<pair<string, const rules::rule_ptr>> tokens;
};
pair<PreparedGrammar, PreparedGrammar> extract_tokens(const PreparedGrammar &input_grammar) {
map<const string, const rule_ptr> rules, tokens, aux_rules, aux_tokens;
vector<pair<string, rules::rule_ptr>> rules, tokens, aux_rules, aux_tokens;
TokenExtractor extractor;
for (auto &pair : input_grammar.rules) {
string name = pair.first;
rule_ptr rule = pair.second;
if (IsToken().apply(rule))
tokens.insert({ name, rule });
tokens.push_back({ name, rule });
else
rules.insert({ name, extractor.apply(rule) });
rules.push_back({ name, extractor.apply(rule) });
}
for (auto &pair : input_grammar.aux_rules) {
string name = pair.first;
rule_ptr rule = pair.second;
if (IsToken().apply(rule))
aux_tokens.insert({ name, rule });
aux_tokens.push_back({ name, rule });
else
aux_rules.insert({ name, extractor.apply(rule) });
aux_rules.push_back({ name, extractor.apply(rule) });
}
aux_tokens.insert(extractor.tokens.begin(), extractor.tokens.end());
aux_tokens.insert(aux_tokens.end(), extractor.tokens.begin(), extractor.tokens.end());
return {
PreparedGrammar(input_grammar.start_rule_name, rules, aux_rules),
PreparedGrammar("", tokens, aux_tokens)
PreparedGrammar(rules, aux_rules),
PreparedGrammar(tokens, aux_tokens)
};
}
}

View file

@ -1,28 +1,19 @@
#include "compiler/prepared_grammar.h"
#include <map>
#include <vector>
#include <string>
#include <utility>
#include "compiler/rules/symbol.h"
namespace tree_sitter {
using std::string;
using std::initializer_list;
using std::pair;
using std::map;
using std::ostream;
using rules::rule_ptr;
using rules::Symbol;
PreparedGrammar::PreparedGrammar(std::string start_rule_name,
const map<const string, const rule_ptr> &rules,
const map<const string, const rule_ptr> &aux_rules) :
Grammar(start_rule_name, rules),
aux_rules(aux_rules) {}
PreparedGrammar::PreparedGrammar(std::string start_rule_name,
const initializer_list<pair<const string, const rule_ptr>> &rules,
const initializer_list<pair<const string, const rule_ptr>> &aux_rules) :
Grammar(start_rule_name, rules),
PreparedGrammar::PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules) :
Grammar(rules),
aux_rules(aux_rules) {}
PreparedGrammar::PreparedGrammar(const Grammar &grammar) :
@ -30,21 +21,21 @@ namespace tree_sitter {
aux_rules({}) {}
const rule_ptr PreparedGrammar::rule(const Symbol &symbol) const {
auto map = symbol.is_auxiliary() ? aux_rules : rules;
auto iter = map.find(symbol.name);
if (iter != map.end())
return iter->second;
else
return rule_ptr();
auto rule_set = symbol.is_auxiliary() ? aux_rules : rules;
for (auto &pair : rule_set)
if (pair.first == symbol.name)
return pair.second;
return rule_ptr();
}
bool PreparedGrammar::operator==(const PreparedGrammar &other) const {
if (!Grammar::operator==(other)) return false;
if (other.aux_rules.size() != aux_rules.size()) return false;
for (auto pair : aux_rules) {
auto other_pair = other.aux_rules.find(pair.first);
if (other_pair == other.aux_rules.end()) return false;
if (!other_pair->second->operator==(*pair.second)) return false;
for (size_t i = 0; i < aux_rules.size(); i++) {
auto &pair = aux_rules[i];
auto &other_pair = other.aux_rules[i];
if (other_pair.first != pair.first) return false;
if (!other_pair.second->operator==(*pair.second)) return false;
}
return true;
}

View file

@ -1,7 +1,7 @@
#ifndef COMPILER_PREPARED_GRAMMAR_H_
#define COMPILER_PREPARED_GRAMMAR_H_
#include <map>
#include <vector>
#include <string>
#include <utility>
#include "tree_sitter/compiler.h"
@ -10,19 +10,15 @@
namespace tree_sitter {
class PreparedGrammar : public Grammar {
public:
PreparedGrammar(std::string start_rule_name,
const std::map<const std::string, const rules::rule_ptr> &rules,
const std::map<const std::string, const rules::rule_ptr> &aux_rules);
PreparedGrammar(std::string start_rule_name,
const std::initializer_list<std::pair<const std::string, const rules::rule_ptr>> &rules,
const std::initializer_list<std::pair<const std::string, const rules::rule_ptr>> &aux_rules);
PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
PreparedGrammar(const Grammar &grammar);
bool operator==(const PreparedGrammar &other) const;
bool has_definition(const rules::Symbol &symbol) const;
const rules::rule_ptr rule(const rules::Symbol &symbol) const;
const std::map<const std::string, const rules::rule_ptr> aux_rules;
const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;
};
std::ostream& operator<<(std::ostream &stream, const PreparedGrammar &grammar);