Make separate types for syntax and lexical grammars

This way, the separator characters can be added as a field to
lexical grammars only
This commit is contained in:
Max Brunsfeld 2014-06-25 13:27:16 -07:00
parent d5674d33c4
commit 7df35f9b8d
49 changed files with 467 additions and 395 deletions

View file

@ -191,5 +191,7 @@ namespace tree_sitter_examples {
{ "null", keyword("null") },
{ "true", keyword("true") },
{ "false", keyword("false") },
}).ubiquitous_tokens({ "comment" });
})
.ubiquitous_tokens({ "comment" })
.separators({ ' ', '\t', '\r' });
}

View file

@ -30,6 +30,7 @@ namespace tree_sitter {
protected:
const std::vector<std::pair<std::string, rules::rule_ptr>> rules_;
std::vector<std::string> ubiquitous_tokens_;
std::vector<char> separators_;
public:
Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules);
@ -37,9 +38,11 @@ namespace tree_sitter {
std::string start_rule_name() const;
const rules::rule_ptr rule(const std::string &name) const;
const std::vector<std::string> & ubiquitous_tokens() const;
const Grammar & ubiquitous_tokens(const std::vector<std::string> &ubiquitous_tokens);
const std::vector<std::pair<std::string, rules::rule_ptr>> & rules() const;
const std::vector<std::string> & ubiquitous_tokens() const;
Grammar & ubiquitous_tokens(const std::vector<std::string> &ubiquitous_tokens);
const std::vector<char> & separators() const;
Grammar & separators(const std::vector<char> &separators);
};
struct Conflict {

View file

@ -10,16 +10,16 @@ using namespace build_tables;
START_TEST
describe("building parse tables", []() {
auto parse_grammar = PreparedGrammar({
SyntaxGrammar parse_grammar({
{ "rule0", choice({ i_sym(1), i_sym(2) }) },
{ "rule1", i_token(0) },
{ "rule2", i_token(1) },
}, {}).ubiquitous_tokens({ Symbol(2, SymbolOptionToken) });
}, {}, { Symbol(2, SymbolOptionToken) });
PreparedGrammar lex_grammar({
LexicalGrammar lex_grammar({
{ "token0", pattern("[a-c]") },
{ "token1", pattern("[b-d]") },
}, {});
}, {}, {});
it("first looks for the start rule and its item set closure", [&]() {
auto result = build_parse_table(parse_grammar, lex_grammar);

View file

@ -1,6 +1,7 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/build_tables/parse_conflict_manager.h"
#include "compiler/build_tables/lex_conflict_manager.h"
#include "compiler/prepared_grammar.h"
using namespace rules;
using namespace build_tables;
@ -10,16 +11,16 @@ START_TEST
describe("resolving parse conflicts", []() {
bool update;
PreparedGrammar parse_grammar({
SyntaxGrammar parse_grammar({
{ "rule1", seq({ sym("rule2"), sym("token2") }) },
{ "rule2", sym("token1") },
}, {});
}, {}, {});
PreparedGrammar lex_grammar({
LexicalGrammar lex_grammar({
{ "token1", pattern("[a-c]") },
{ "token2", pattern("[b-d]") },
{ "token3", keyword("stuff") },
}, {});
}, {}, {});
describe("lexical conflicts", [&]() {
Symbol sym1(0, SymbolOptionToken);

View file

@ -10,7 +10,7 @@ using namespace rules;
START_TEST
describe("computing FIRST sets", []() {
const PreparedGrammar null_grammar({}, {});
const SyntaxGrammar null_grammar;
describe("for a sequence AB", [&]() {
it("ignores B when A cannot be blank", [&]() {
@ -41,12 +41,12 @@ describe("computing FIRST sets", []() {
i_token(1) }),
i_sym(0) });
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "rule0", seq({
i_token(2),
i_token(3),
i_token(4) }) }
}, {});
}, {}, {});
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
Symbol(0, SymbolOptionToken),
@ -59,11 +59,11 @@ describe("computing FIRST sets", []() {
i_sym(0),
i_token(1) });
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "rule0", choice({
i_token(0),
blank() }) }
}, {});
}, {}, {});
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
Symbol(0, SymbolOptionToken),
@ -74,12 +74,12 @@ describe("computing FIRST sets", []() {
describe("when there are left-recursive rules", [&]() {
it("terminates", [&]() {
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "rule0", choice({
seq({ i_sym(0), i_token(10) }),
i_token(11),
}) },
}, {});
}, {}, {});
auto rule = i_sym(0);

View file

@ -9,14 +9,14 @@ using namespace rules;
START_TEST
describe("computing closures of item sets", []() {
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "E", seq({
i_sym(1),
i_token(11) }) },
{ "T", seq({
i_token(12),
i_token(13) }) },
}, {});
}, {}, {});
it("adds items at the beginnings of referenced rules", [&]() {
ParseItemSet item_set = item_set_closure(ParseItem(Symbol(0), grammar.rule(Symbol(0)), 0),

View file

@ -8,15 +8,13 @@ using namespace build_tables;
START_TEST
describe("lexical item set transitions", []() {
PreparedGrammar grammar({}, {});
describe("when two items in the set have transitions on the same character", [&]() {
it("merges the transitions by computing the union of the two item sets", [&]() {
LexItemSet set1({
LexItem(Symbol(1), character({ {'a', 'f'} })),
LexItem(Symbol(2), character({ {'e', 'x'} })) });
AssertThat(char_transitions(set1, grammar), Equals(map<CharacterSet, LexItemSet>({
AssertThat(char_transitions(set1), Equals(map<CharacterSet, LexItemSet>({
{ CharacterSet({ {'a', 'd'} }), LexItemSet({
LexItem(Symbol(1), blank()) }) },
{ CharacterSet({ {'e', 'f'} }), LexItemSet({
@ -30,10 +28,10 @@ describe("lexical item set transitions", []() {
});
describe("syntactic item set transitions", [&]() {
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "A", blank() },
{ "B", i_token(21) },
}, {});
}, {}, {});
it("computes the closure of the new item sets", [&]() {
ParseItemSet set1({

View file

@ -56,14 +56,14 @@ describe("checking if rules can be blank", [&]() {
});
describe("checking recursively (by expanding non-terminals)", [&]() {
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "A", choice({
seq({ i_sym(0), i_token(11) }),
blank() }) },
{ "B", choice({
seq({ i_sym(1), i_token(12) }),
i_token(13) }) },
}, {});
}, {}, {});
it("terminates for left-recursive rules that can be blank", [&]() {
rule = i_sym(0);

View file

@ -1,26 +1,11 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/build_tables/rule_transitions.h"
#include "compiler/rules/metadata.h"
#include "compiler/helpers/containers.h"
using namespace rules;
using namespace build_tables;
template<typename K>
class rule_map : public map<K, rule_ptr> {
public:
bool operator==(const map<K, rule_ptr> &other) const {
if (this->size() != other.size()) return false;
for (const auto &pair : *this) {
auto other_pair = other.find(pair.first);
if (other_pair == other.end()) return false;
if (!pair.second->operator==(*other_pair->second)) return false;
}
return true;
}
rule_map(const initializer_list<pair<const K, rule_ptr>> &list) : map<K, rule_ptr>(list) {}
};
START_TEST
describe("rule transitions", []() {

View file

@ -0,0 +1,52 @@
#ifndef HELPERS_CONTAINERS_H_
#define HELPERS_CONTAINERS_H_
#include <map>
#include <vector>
#include <string>
#include <initializer_list>
#include "tree_sitter/compiler.h"
#include "compiler/rules/rule.h"
using std::map;
using std::vector;
using std::string;
using std::initializer_list;
using std::pair;
using tree_sitter::rules::rule_ptr;
template<typename K>
class rule_map : public map<K, rule_ptr> {
public:
bool operator==(const map<K, rule_ptr> &other) const {
if (this->size() != other.size()) return false;
for (const auto &pair : *this) {
auto other_pair = other.find(pair.first);
if (other_pair == other.end()) return false;
if (!pair.second->operator==(*other_pair->second)) return false;
}
return true;
}
rule_map(const initializer_list<pair<const K, rule_ptr>> &list) : map<K, rule_ptr>(list) {}
};
class rule_list : public vector<pair<string, rule_ptr>> {
public:
bool operator==(const vector<pair<string, rule_ptr>> &other) const {
if (this->size() != other.size()) return false;
for (size_t i = 0; i < this->size(); i++) {
auto pair = this->operator[](i);
auto other_pair = other[i];
if (!pair.second->operator==(*other_pair.second))
return false;
}
return true;
}
rule_list(const initializer_list<pair<string, rule_ptr>> &list) :
vector<pair<string, rule_ptr>>(list) {}
};
#endif // HELPERS_CONTAINERS_H_

View file

@ -1,6 +1,7 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/prepared_grammar.h"
#include "compiler/prepare_grammar/expand_repeats.h"
#include "compiler/helpers/containers.h"
START_TEST
@ -9,29 +10,33 @@ using prepare_grammar::expand_repeats;
describe("expanding repeat rules in a grammar", []() {
it("replaces repeat rules with pairs of recursive rules", [&]() {
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "rule0", repeat(i_token(0)) },
}, {});
}, {}, {});
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
auto match = expand_repeats(grammar);
AssertThat(match.rules, Equals(rule_list({
{ "rule0", i_aux_sym(0) },
}, {
{ "rule0_repeat0", choice({
seq({
i_token(0),
i_aux_sym(0) }),
blank() }) },
})));
AssertThat(match.aux_rules, Equals(rule_list({
{ "rule0_repeat0", choice({ seq({ i_token(0), i_aux_sym(0) }), blank() }) },
})));
});
it("replaces repeats inside of sequences", [&]() {
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "rule0", seq({ i_token(10), repeat(i_token(11)) }) },
}, {});
}, {}, {});
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
auto match = expand_repeats(grammar);
AssertThat(match.rules, Equals(rule_list({
{ "rule0", seq({ i_token(10), i_aux_sym(0) }) },
}, {
})));
AssertThat(match.aux_rules, Equals(rule_list({
{ "rule0_repeat0", choice({
seq({ i_token(11), i_aux_sym(0) }),
blank() }) },
@ -39,13 +44,17 @@ describe("expanding repeat rules in a grammar", []() {
});
it("replaces repeats inside of choices", [&]() {
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "rule0", choice({ i_token(10), repeat(i_token(11)) }) },
}, {});
}, {}, {});
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
auto match = expand_repeats(grammar);
AssertThat(match.rules, Equals(rule_list({
{ "rule0", choice({ i_token(10), i_aux_sym(0) }) },
}, {
})));
AssertThat(match.aux_rules, Equals(rule_list({
{ "rule0_repeat0", choice({
seq({ i_token(11), i_aux_sym(0) }),
blank() }) },
@ -53,13 +62,17 @@ describe("expanding repeat rules in a grammar", []() {
});
it("can replace multiple repeats in the same rule", [&]() {
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "rule0", seq({ repeat(i_token(10)), repeat(i_token(11)) }) },
}, {});
}, {}, {});
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
auto match = expand_repeats(grammar);
AssertThat(match.rules, Equals(rule_list({
{ "rule0", seq({ i_aux_sym(0), i_aux_sym(1) }) },
}, {
})));
AssertThat(match.aux_rules, Equals(rule_list({
{ "rule0_repeat0", choice({
seq({
i_token(10),
@ -74,15 +87,19 @@ describe("expanding repeat rules in a grammar", []() {
});
it("can replace repeats in multiple rules", [&]() {
PreparedGrammar grammar({
SyntaxGrammar grammar({
{ "rule0", repeat(i_token(10)) },
{ "rule1", repeat(i_token(11)) },
}, {});
}, {}, {});
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
auto match = expand_repeats(grammar);
AssertThat(match.rules, Equals(rule_list({
{ "rule0", i_aux_sym(0) },
{ "rule1", i_aux_sym(1) },
}, {
})));
AssertThat(match.aux_rules, Equals(rule_list({
{ "rule0_repeat0", choice({
seq({ i_token(10), i_aux_sym(0) }),
blank() }) },

View file

@ -1,5 +1,6 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/prepared_grammar.h"
#include "compiler/helpers/containers.h"
#include "compiler/prepare_grammar/expand_tokens.h"
START_TEST
@ -9,50 +10,50 @@ using prepare_grammar::expand_tokens;
describe("expanding token rules", []() {
it("replaces regex patterns with their expansion", [&]() {
PreparedGrammar grammar({
LexicalGrammar grammar({
{ "rule_A", seq({
i_sym(10),
pattern("x*"),
i_sym(11) }) },
}, {});
}, {}, {});
auto result = expand_tokens(grammar);
AssertThat(result.second, Equals((const GrammarError *)nullptr));
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "rule_A", seq({
i_sym(10),
repeat(character({ 'x' })),
i_sym(11) }) },
}, {})));
})));
});
it("replaces string rules with a sequence of characters", [&]() {
PreparedGrammar grammar({
LexicalGrammar grammar({
{ "rule_A", seq({
i_sym(10),
str("xyz"),
i_sym(11) }) },
}, {});
}, {}, {});
auto result = expand_tokens(grammar);
AssertThat(result.second, Equals((const GrammarError *)nullptr));
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "rule_A", seq({
i_sym(10),
seq({ character({ 'x' }), character({ 'y' }), character({ 'z' }) }),
i_sym(11) }) },
}, {})));
})));
});
it("returns an error when the grammar contains an invalid regex", [&]() {
PreparedGrammar grammar({
LexicalGrammar grammar({
{ "rule_A", seq({
pattern("("),
str("xyz"),
pattern("[") }) },
}, {});
}, {}, {});
auto result = expand_tokens(grammar);

View file

@ -1,160 +1,172 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/prepared_grammar.h"
#include "compiler/prepare_grammar/extract_tokens.h"
#include "compiler/prepare_grammar/interned_grammar.h"
#include "compiler/prepared_grammar.h"
#include "compiler/helpers/containers.h"
START_TEST
using namespace rules;
using prepare_grammar::extract_tokens;
using prepare_grammar::InternedGrammar;
describe("extracting tokens from a grammar", []() {
it("moves string rules into the lexical grammar", [&]() {
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
}, {}));
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
{
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
},
{},
{}
});
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
}, {})));
AssertThat(result.second, Equals(PreparedGrammar({}, {
})));
AssertThat(result.first.aux_rules, IsEmpty())
AssertThat(result.second.rules, IsEmpty())
AssertThat(result.second.aux_rules, Equals(rule_list({
{ "'ab'", str("ab") },
})));
});
it("moves pattern rules into the lexical grammar", [&]() {
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
{ "rule_A", seq({ pattern("a+"), i_sym(0) }) }
}, {}));
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
{
{ "rule_A", seq({ pattern("a+"), i_sym(0) }) }
},
{},
{}
});
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
}, {})));
AssertThat(result.second, Equals(PreparedGrammar({}, {
})));
AssertThat(result.first.aux_rules, IsEmpty())
AssertThat(result.second.rules, IsEmpty())
AssertThat(result.second.aux_rules, Equals(rule_list({
{ "/a+/", pattern("a+") },
})));
});
it("moves other rules marked as tokens into the lexical grammar", [&]() {
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
{ "rule_A", seq({
token(seq({ pattern("."), choice({ str("a"), str("b") }) })),
i_sym(0) }) }
}, {}));
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
{
{ "rule_A", seq({
token(seq({ pattern("."), choice({ str("a"), str("b") }) })),
i_sym(0) }) }
},
{},
{}
});
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
}, {})));
AssertThat(result.second, Equals(PreparedGrammar({}, {
})));
AssertThat(result.first.aux_rules, IsEmpty())
AssertThat(result.second.rules, IsEmpty())
AssertThat(result.second.aux_rules, Equals(rule_list({
{ "(seq /./ (choice 'a' 'b'))", token(seq({ pattern("."), choice({ str("a"), str("b") }) })) },
})));
});
it("does not extract blanks", [&]() {
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
{ "rule_A", choice({ i_sym(0), blank() }) },
}, {}));
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
{
{ "rule_A", choice({ i_sym(0), blank() }) },
},
{},
{}
});
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "rule_A", choice({ i_sym(0), blank() }) },
}, {})));
AssertThat(result.second, Equals(PreparedGrammar({}, {})));
})));
AssertThat(result.first.aux_rules, IsEmpty())
AssertThat(result.second.rules, IsEmpty())
AssertThat(result.second.aux_rules, IsEmpty())
});
it("does not create duplicate tokens in the lexical grammar", [&]() {
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
{ "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
}, {}));
pair<SyntaxGrammar, LexicalGrammar> result = extract_tokens(InternedGrammar{
{
{ "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
},
{},
{}
});
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "rule_A", seq({ i_aux_token(0), i_sym(0), i_aux_token(0) }) }
}, {})));
AssertThat(result.second, Equals(PreparedGrammar({}, {
})));
AssertThat(result.first.aux_rules, IsEmpty())
AssertThat(result.second.rules, IsEmpty())
AssertThat(result.second.aux_rules, Equals(rule_list({
{ "'ab'", str("ab") },
})));
});
it("extracts tokens from the grammar's auxiliary rules", [&]() {
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({}, {
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
}));
AssertThat(result.first, Equals(PreparedGrammar({}, {
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
})));
AssertThat(result.second, Equals(PreparedGrammar({}, {
{ "'ab'", str("ab") },
})));
})))
});
describe("when an entire rule can be extracted", [&]() {
it("moves the rule the lexical grammar when possible and updates referencing symbols", [&]() {
auto result = extract_tokens(PreparedGrammar({
{ "rule_A", i_sym(1) },
{ "rule_B", pattern("a|b") },
{ "rule_C", token(seq({ str("a"), str("b") })) },
}, {}));
auto result = extract_tokens(InternedGrammar{
{
{ "rule_A", i_sym(1) },
{ "rule_B", pattern("a|b") },
{ "rule_C", token(seq({ str("a"), str("b") })) },
},
{},
{}
});
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "rule_A", i_token(0) }
}, {})));
AssertThat(result.second, Equals(PreparedGrammar({
})));
AssertThat(result.first.aux_rules, IsEmpty());
AssertThat(result.second.rules, Equals(rule_list({
{ "rule_B", pattern("a|b") },
{ "rule_C", token(seq({ str("a"), str("b") })) },
}, {})));
})));
AssertThat(result.second.aux_rules, IsEmpty());
});
it("updates symbols whose indices need to change due to deleted rules", [&]() {
auto result = extract_tokens(PreparedGrammar({
{ "rule_A", str("ab") },
{ "rule_B", i_sym(0) },
{ "rule_C", i_sym(1) },
}, {}));
auto result = extract_tokens(InternedGrammar{
{
{ "rule_A", str("ab") },
{ "rule_B", i_sym(0) },
{ "rule_C", i_sym(1) },
},
{},
{}
});
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "rule_B", i_token(0) },
{ "rule_C", i_sym(0) },
}, {})));
AssertThat(result.second, Equals(PreparedGrammar({
})));
AssertThat(result.first.aux_rules, IsEmpty());
AssertThat(result.second.rules, Equals(rule_list({
{ "rule_A", str("ab") },
}, {})));
})));
AssertThat(result.second.aux_rules, IsEmpty());
});
it("updates the grammar's ubiquitous_tokens", [&]() {
auto result = extract_tokens(PreparedGrammar({
{ "rule_A", str("ab") },
{ "rule_B", i_sym(0) },
{ "rule_C", i_sym(1) },
}, {}).ubiquitous_tokens({ Symbol(0) }));
auto result = extract_tokens(InternedGrammar{
{
{ "rule_A", str("ab") },
{ "rule_B", i_sym(0) },
{ "rule_C", i_sym(1) },
},
{ Symbol(0) },
{}
});
AssertThat(result.first.ubiquitous_tokens(), Equals(vector<Symbol>({
AssertThat(result.first.ubiquitous_tokens, Equals(vector<Symbol>({
{ Symbol(0, SymbolOptionToken) }
})));
});
it("extracts entire auxiliary rules", [&]() {
auto result = extract_tokens(PreparedGrammar({}, {
{ "rule_A", str("ab") },
{ "rule_B", i_aux_sym(0) },
{ "rule_C", i_aux_sym(1) },
}));
AssertThat(result.first, Equals(PreparedGrammar({}, {
{ "rule_B", i_aux_token(0) },
{ "rule_C", i_aux_sym(0) },
})));
AssertThat(result.second, Equals(PreparedGrammar({}, {
{ "rule_A", str("ab") },
})));
});
});
});

View file

@ -3,6 +3,7 @@
#include "compiler/prepare_grammar/intern_symbols.h"
#include "compiler/rules/named_symbol.h"
#include "compiler/rules/symbol.h"
#include "compiler/helpers/containers.h"
START_TEST
@ -20,11 +21,11 @@ describe("interning symbols in a grammar", []() {
auto result = intern_symbols(grammar);
AssertThat(result.second, Equals((GrammarError *)nullptr));
AssertThat(result.first, Equals(PreparedGrammar({
AssertThat(result.first.rules, Equals(rule_list({
{ "x", choice({ i_sym(1), i_sym(2) }) },
{ "y", i_sym(2) },
{ "z", str("stuff") },
}, {})));
})));
});
describe("when there are symbols that reference undefined rules", [&]() {
@ -49,10 +50,20 @@ describe("interning symbols in a grammar", []() {
auto result = intern_symbols(grammar);
AssertThat(result.second, Equals((GrammarError *)nullptr));
AssertThat(result.first.ubiquitous_tokens(), Equals(vector<Symbol>({
AssertThat(result.first.ubiquitous_tokens, Equals(vector<Symbol>({
Symbol(2)
})));
});
it("preserves the grammar's separator character set", [&]() {
auto grammar = Grammar({
{ "z", str("stuff") }
}).separators({ 'x', 'y' });
auto result = intern_symbols(grammar);
AssertThat(result.first.separators, Equals(vector<char>({ 'x', 'y' })))
});
});
END_TEST

View file

@ -25,7 +25,7 @@ namespace tree_sitter {
namespace build_tables {
class LexTableBuilder {
const PreparedGrammar lex_grammar;
const LexicalGrammar lex_grammar;
ParseTable *parse_table;
LexConflictManager conflict_manager;
unordered_map<const LexItemSet, LexStateId> lex_state_ids;
@ -65,7 +65,7 @@ namespace tree_sitter {
}
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
auto transitions = char_transitions(item_set, lex_grammar);
auto transitions = char_transitions(item_set);
for (const auto &transition : transitions) {
CharacterSet rule = transition.first;
LexItemSet new_item_set = transition.second;
@ -114,7 +114,7 @@ namespace tree_sitter {
}
public:
LexTableBuilder(ParseTable *parse_table, const PreparedGrammar &lex_grammar) :
LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar) :
lex_grammar(lex_grammar),
parse_table(parse_table),
conflict_manager(LexConflictManager(lex_grammar)) {}
@ -129,7 +129,7 @@ namespace tree_sitter {
}
};
LexTable build_lex_table(ParseTable *parse_table, const PreparedGrammar &lex_grammar) {
LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar) {
return LexTableBuilder(parse_table, lex_grammar).build();
}
}

View file

@ -5,12 +5,11 @@
#include "compiler/lex_table.h"
namespace tree_sitter {
class PreparedGrammar;
class LexicalGrammar;
class ParseTable;
namespace build_tables {
LexTable
build_lex_table(ParseTable *parse_table, const PreparedGrammar &lex_grammar);
LexTable build_lex_table(ParseTable *parse_table, const LexicalGrammar &lex_grammar);
}
}

View file

@ -23,7 +23,7 @@ namespace tree_sitter {
namespace build_tables {
class ParseTableBuilder {
const PreparedGrammar grammar;
const SyntaxGrammar grammar;
ParseConflictManager conflict_manager;
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
ParseTable parse_table;
@ -59,7 +59,7 @@ namespace tree_sitter {
}
void add_ubiquitous_token_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const Symbol &symbol : grammar.ubiquitous_tokens()) {
for (const Symbol &symbol : grammar.ubiquitous_tokens) {
auto &actions = parse_table.states[state_id].actions;
if (actions.find(symbol) == actions.end())
parse_table.add_action(state_id, symbol, ParseAction::Shift(state_id, { 0 }));
@ -99,7 +99,7 @@ namespace tree_sitter {
}
public:
ParseTableBuilder(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) :
ParseTableBuilder(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) :
grammar(grammar),
conflict_manager(ParseConflictManager(grammar, lex_grammar)) {}
@ -111,7 +111,7 @@ namespace tree_sitter {
};
pair<ParseTable, vector<Conflict>>
build_parse_table(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) {
build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
return ParseTableBuilder(grammar, lex_grammar).build();
}
}

View file

@ -7,11 +7,12 @@
#include "compiler/parse_table.h"
namespace tree_sitter {
class PreparedGrammar;
class SyntaxGrammar;
class LexicalGrammar;
namespace build_tables {
std::pair<ParseTable, std::vector<Conflict>>
build_parse_table(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar);
build_parse_table(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar);
}
}

View file

@ -1,6 +1,7 @@
#include "compiler/build_tables/build_tables.h"
#include "compiler/build_tables/build_parse_table.h"
#include "compiler/build_tables/build_lex_table.h"
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
using std::tuple;
@ -9,8 +10,8 @@ namespace tree_sitter {
namespace build_tables {
tuple<ParseTable, LexTable, vector<Conflict>>
build_tables(const PreparedGrammar &grammar,
const PreparedGrammar &lex_grammar) {
build_tables(const SyntaxGrammar &grammar,
const LexicalGrammar &lex_grammar) {
auto parse_table_result = build_parse_table(grammar, lex_grammar);
ParseTable parse_table = parse_table_result.first;
vector<Conflict> conflicts = parse_table_result.second;

View file

@ -8,12 +8,13 @@
#include "compiler/lex_table.h"
namespace tree_sitter {
class PreparedGrammar;
class SyntaxGrammar;
class LexicalGrammar;
namespace build_tables {
std::tuple<ParseTable, LexTable, std::vector<Conflict>>
build_tables(const PreparedGrammar &grammar,
const PreparedGrammar &lex_grammar);
build_tables(const SyntaxGrammar &grammar,
const LexicalGrammar &lex_grammar);
}
}

View file

@ -14,11 +14,11 @@ namespace tree_sitter {
namespace build_tables {
class FirstSet : public rules::RuleFn<set<Symbol>> {
const PreparedGrammar *grammar;
const SyntaxGrammar *grammar;
set<Symbol> visited_symbols;
public:
explicit FirstSet(const PreparedGrammar *grammar) : grammar(grammar) {}
explicit FirstSet(const SyntaxGrammar *grammar) : grammar(grammar) {}
set<Symbol> apply_to(const Symbol *rule) {
auto insertion_result = visited_symbols.insert(*rule);
@ -54,7 +54,7 @@ namespace tree_sitter {
}
};
set<Symbol> first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar) {
set<Symbol> first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
return FirstSet(&grammar).apply(rule);
}
}

View file

@ -6,17 +6,17 @@
#include "compiler/rules/symbol.h"
namespace tree_sitter {
class PreparedGrammar;
class SyntaxGrammar;
namespace build_tables {
/*
* Returns the set of terminal symbols that can appear at
* the beginning of a string derivable from a given rule,
* in a given gramamr.
* in a given grammar.
*/
std::set<rules::Symbol>
first_set(const rules::rule_ptr &rule, const PreparedGrammar &grammar);
first_set(const rules::rule_ptr &rule, const SyntaxGrammar &grammar);
}
}

View file

@ -19,7 +19,7 @@ namespace tree_sitter {
namespace build_tables {
const ParseItemSet item_set_closure(const ParseItem &starting_item,
const set<Symbol> &starting_lookahead_symbols,
const PreparedGrammar &grammar) {
const SyntaxGrammar &grammar) {
ParseItemSet result;
vector<pair<ParseItem, set<Symbol>>> items_to_process = {{starting_item, starting_lookahead_symbols}};

View file

@ -6,12 +6,12 @@
#include "compiler/build_tables/parse_item.h"
namespace tree_sitter {
class PreparedGrammar;
class SyntaxGrammar;
namespace build_tables {
const ParseItemSet item_set_closure(const ParseItem &item,
const std::set<rules::Symbol> &lookahead_symbols,
const PreparedGrammar &grammar);
const SyntaxGrammar &grammar);
}
}

View file

@ -4,6 +4,7 @@
#include "compiler/build_tables/rule_transitions.h"
#include "compiler/build_tables/merge_transitions.h"
#include "compiler/rules/symbol.h"
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
using std::map;
@ -13,7 +14,7 @@ namespace tree_sitter {
namespace build_tables {
map<Symbol, ParseItemSet>
sym_transitions(const ParseItemSet &item_set, const PreparedGrammar &grammar) {
sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar) {
map<Symbol, ParseItemSet> result;
for (const auto &pair : item_set) {
const ParseItem &item = pair.first;
@ -31,7 +32,7 @@ namespace tree_sitter {
}
map<CharacterSet, LexItemSet>
char_transitions(const LexItemSet &item_set, const PreparedGrammar &grammar) {
char_transitions(const LexItemSet &item_set) {
map<CharacterSet, LexItemSet> result;
for (const LexItem &item : item_set) {
for (auto &transition : char_transitions(item.rule)) {

View file

@ -6,7 +6,7 @@
#include "compiler/build_tables/parse_item.h"
namespace tree_sitter {
class PreparedGrammar;
class SyntaxGrammar;
namespace rules {
class CharacterSet;
class Symbol;
@ -14,10 +14,10 @@ namespace tree_sitter {
namespace build_tables {
std::map<rules::Symbol, ParseItemSet>
sym_transitions(const ParseItemSet &item_set, const PreparedGrammar &grammar);
sym_transitions(const ParseItemSet &item_set, const SyntaxGrammar &grammar);
std::map<rules::CharacterSet, LexItemSet>
char_transitions(const LexItemSet &item_set, const PreparedGrammar &grammar);
char_transitions(const LexItemSet &item_set);
}
}

View file

@ -4,6 +4,7 @@
#include <string>
#include <set>
#include "compiler/util/string_helpers.h"
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
namespace build_tables {
@ -13,7 +14,7 @@ namespace tree_sitter {
using std::set;
using std::vector;
LexConflictManager::LexConflictManager(const PreparedGrammar &grammar) :
LexConflictManager::LexConflictManager(const LexicalGrammar &grammar) :
grammar(grammar) {}
bool LexConflictManager::resolve_lex_action(const LexAction &old_action,

View file

@ -8,10 +8,10 @@
namespace tree_sitter {
namespace build_tables {
class LexConflictManager {
const PreparedGrammar grammar;
const LexicalGrammar grammar;
public:
explicit LexConflictManager(const PreparedGrammar &grammar);
explicit LexConflictManager(const LexicalGrammar &grammar);
bool resolve_lex_action(const LexAction &old_action,
const LexAction &new_action);
};

View file

@ -4,6 +4,7 @@
#include <string>
#include <set>
#include "compiler/util/string_helpers.h"
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
namespace build_tables {
@ -13,8 +14,8 @@ namespace tree_sitter {
using std::set;
using std::vector;
ParseConflictManager::ParseConflictManager(const PreparedGrammar &parse_grammar,
const PreparedGrammar &lex_grammar) :
ParseConflictManager::ParseConflictManager(const SyntaxGrammar &parse_grammar,
const LexicalGrammar &lex_grammar) :
parse_grammar(parse_grammar),
lex_grammar(lex_grammar) {}
@ -87,7 +88,7 @@ namespace tree_sitter {
return precedences + ")";
}
string message_for_action(const ParseAction &action, const PreparedGrammar &parse_grammar) {
string message_for_action(const ParseAction &action, const SyntaxGrammar &parse_grammar) {
switch (action.type) {
case ParseActionTypeShift:
return "shift " + precedence_string(action);

View file

@ -13,13 +13,13 @@
namespace tree_sitter {
namespace build_tables {
class ParseConflictManager {
const PreparedGrammar parse_grammar;
const PreparedGrammar lex_grammar;
const SyntaxGrammar parse_grammar;
const LexicalGrammar lex_grammar;
std::set<Conflict> conflicts_;
public:
ParseConflictManager(const PreparedGrammar &parse_grammar,
const PreparedGrammar &lex_grammar);
ParseConflictManager(const SyntaxGrammar &parse_grammar,
const LexicalGrammar &lex_grammar);
bool resolve_parse_action(const rules::Symbol &symbol,
const ParseAction &old_action,
const ParseAction &new_action);

View file

@ -39,13 +39,13 @@ namespace tree_sitter {
};
class CanBeBlankRecursive : public CanBeBlank {
const PreparedGrammar *grammar;
const SyntaxGrammar *grammar;
set<rules::Symbol> visited_symbols;
using CanBeBlank::visit;
public:
using CanBeBlank::apply_to;
explicit CanBeBlankRecursive(const PreparedGrammar *grammar) : grammar(grammar) {}
explicit CanBeBlankRecursive(const SyntaxGrammar *grammar) : grammar(grammar) {}
bool apply_to(const rules::Symbol *rule) {
if (visited_symbols.find(*rule) == visited_symbols.end()) {
@ -61,7 +61,7 @@ namespace tree_sitter {
return CanBeBlank().apply(rule);
}
bool rule_can_be_blank(const rules::rule_ptr &rule, const PreparedGrammar &grammar) {
bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar) {
return CanBeBlankRecursive(&grammar).apply(rule);
}
}

View file

@ -4,11 +4,11 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
class PreparedGrammar;
class SyntaxGrammar;
namespace build_tables {
bool rule_can_be_blank(const rules::rule_ptr &rule);
bool rule_can_be_blank(const rules::rule_ptr &rule, const PreparedGrammar &grammar);
bool rule_can_be_blank(const rules::rule_ptr &rule, const SyntaxGrammar &grammar);
}
}

View file

@ -14,8 +14,8 @@ namespace tree_sitter {
tuple<string, vector<Conflict>, const GrammarError *>
compile(const Grammar &grammar, std::string name) {
auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar);
const PreparedGrammar &syntax_grammar = get<0>(prepare_grammar_result);
const PreparedGrammar &lexical_grammar = get<1>(prepare_grammar_result);
const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
const GrammarError *error = get<2>(prepare_grammar_result);
if (error)

View file

@ -27,16 +27,16 @@ namespace tree_sitter {
const string name;
const ParseTable parse_table;
const LexTable lex_table;
const PreparedGrammar syntax_grammar;
const PreparedGrammar lexical_grammar;
const SyntaxGrammar syntax_grammar;
const LexicalGrammar lexical_grammar;
map<string, string> sanitized_names;
public:
CCodeGenerator(string name,
const ParseTable &parse_table,
const LexTable &lex_table,
const PreparedGrammar &syntax_grammar,
const PreparedGrammar &lexical_grammar) :
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar) :
indent_level(0),
name(name),
parse_table(parse_table),
@ -107,7 +107,7 @@ namespace tree_sitter {
void ubiquitous_symbols_list() {
line("UBIQUITOUS_SYMBOLS = {");
indent([&]() {
for (auto &symbol : syntax_grammar.ubiquitous_tokens())
for (auto &symbol : syntax_grammar.ubiquitous_tokens)
line("[" + symbol_id(symbol) + "] = 1,");
});
line("};");
@ -118,7 +118,7 @@ namespace tree_sitter {
line("HIDDEN_SYMBOLS = {");
indent([&]() {
for (auto &symbol : parse_table.symbols)
if (!symbol.is_built_in() && (symbol.is_auxiliary() || grammar_for_symbol(symbol).rule_name(symbol)[0] == '_'))
if (!symbol.is_built_in() && (symbol.is_auxiliary() || rule_name(symbol)[0] == '_'))
line("[" + symbol_id(symbol) + "] = 1,");
});
line("};");
@ -178,8 +178,10 @@ namespace tree_sitter {
line();
}
const PreparedGrammar & grammar_for_symbol(const rules::Symbol &symbol) {
return symbol.is_token() ? lexical_grammar : syntax_grammar;
string rule_name(const rules::Symbol &symbol) {
return symbol.is_token() ?
lexical_grammar.rule_name(symbol) :
syntax_grammar.rule_name(symbol);
}
string symbol_id(const rules::Symbol &symbol) {
@ -188,7 +190,7 @@ namespace tree_sitter {
"ts_builtin_sym_error" :
"ts_builtin_sym_end";
} else {
string name = sanitize_name(grammar_for_symbol(symbol).rule_name(symbol));
string name = sanitize_name(rule_name(symbol));
if (symbol.is_auxiliary())
return "ts_aux_sym_" + name;
else
@ -238,9 +240,9 @@ namespace tree_sitter {
if (symbol.is_built_in()) {
return (symbol == rules::ERROR()) ? "error" : "end";
} else if (symbol.is_token() && symbol.is_auxiliary()) {
return grammar_for_symbol(symbol).rule_name(symbol);
return rule_name(symbol);
} else {
return grammar_for_symbol(symbol).rule_name(symbol);
return rule_name(symbol);
}
}
@ -397,8 +399,8 @@ namespace tree_sitter {
string c_code(string name,
const ParseTable &parse_table,
const LexTable &lex_table,
const PreparedGrammar &syntax_grammar,
const PreparedGrammar &lexical_grammar) {
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar) {
return CCodeGenerator(name, parse_table, lex_table, syntax_grammar, lexical_grammar).code();
}
}

View file

@ -7,14 +7,15 @@
#include "compiler/lex_table.h"
namespace tree_sitter {
class PreparedGrammar;
class SyntaxGrammar;
class LexicalGrammar;
namespace generate_code {
std::string c_code(std::string name,
const ParseTable &parse_table,
const LexTable &lex_table,
const PreparedGrammar &syntax_grammar,
const PreparedGrammar &lexical_grammar);
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar);
}
}

View file

@ -62,11 +62,20 @@ namespace tree_sitter {
return ubiquitous_tokens_;
}
const Grammar & Grammar::ubiquitous_tokens(const vector<string> &ubiquitous_tokens) {
Grammar & Grammar::ubiquitous_tokens(const vector<string> &ubiquitous_tokens) {
ubiquitous_tokens_ = ubiquitous_tokens;
return *this;
}
const vector<char> & Grammar::separators() const {
return separators_;
}
Grammar & Grammar::separators(const vector<char> &separators) {
separators_ = separators;
return *this;
}
const vector<pair<string, rule_ptr>> & Grammar::rules() const {
return rules_;
}

View file

@ -50,17 +50,16 @@ namespace tree_sitter {
vector<pair<string, rules::rule_ptr>> aux_rules;
};
PreparedGrammar expand_repeats(const PreparedGrammar &grammar) {
vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules());
SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules);
for (auto &pair : grammar.rules()) {
for (auto &pair : grammar.rules) {
ExpandRepeats expander(pair.first, aux_rules.size());
rules.push_back({ pair.first, expander.apply(pair.second) });
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(), expander.aux_rules.end());
}
return PreparedGrammar(rules, aux_rules).
ubiquitous_tokens(grammar.ubiquitous_tokens());
return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens);
}
}
}

View file

@ -4,10 +4,10 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
class PreparedGrammar;
class SyntaxGrammar;
namespace prepare_grammar {
PreparedGrammar expand_repeats(const PreparedGrammar &);
SyntaxGrammar expand_repeats(const SyntaxGrammar &);
}
}

View file

@ -43,28 +43,29 @@ namespace tree_sitter {
ExpandTokens() : error(nullptr) {}
};
pair<PreparedGrammar, const GrammarError *>
expand_tokens(const PreparedGrammar &grammar) {
pair<LexicalGrammar, const GrammarError *>
expand_tokens(const LexicalGrammar &grammar) {
vector<pair<string, rule_ptr>> rules, aux_rules;
ExpandTokens expander;
for (auto &pair : grammar.rules()) {
for (auto &pair : grammar.rules) {
auto rule = expander.apply(pair.second);
if (expander.error)
return { PreparedGrammar(), expander.error };
return { LexicalGrammar({}, {}, {}), expander.error };
rules.push_back({ pair.first, rule });
}
for (auto &pair : grammar.aux_rules()) {
for (auto &pair : grammar.aux_rules) {
auto rule = expander.apply(pair.second);
if (expander.error)
return { PreparedGrammar(), expander.error };
return { LexicalGrammar({}, {}, {}), expander.error };
aux_rules.push_back({ pair.first, rule });
}
return {
PreparedGrammar(rules, aux_rules).ubiquitous_tokens(grammar.ubiquitous_tokens()),
nullptr };
LexicalGrammar(rules, aux_rules, grammar.separators),
nullptr,
};
}
}
}

View file

@ -5,11 +5,11 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
class PreparedGrammar;
class LexicalGrammar;
namespace prepare_grammar {
std::pair<PreparedGrammar, const GrammarError *>
expand_tokens(const PreparedGrammar &);
std::pair<LexicalGrammar, const GrammarError *>
expand_tokens(const LexicalGrammar &);
}
}

View file

@ -9,6 +9,7 @@
#include "compiler/rules/string.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/pattern.h"
#include "compiler/prepare_grammar/interned_grammar.h"
#include "compiler/prepare_grammar/token_description.h"
namespace tree_sitter {
@ -93,15 +94,15 @@ namespace tree_sitter {
vector<pair<string, rule_ptr>> tokens;
};
pair<PreparedGrammar, PreparedGrammar> extract_tokens(const PreparedGrammar &input_grammar) {
pair<SyntaxGrammar, LexicalGrammar> extract_tokens(const InternedGrammar &input_grammar) {
vector<pair<string, rule_ptr>> rules, tokens, aux_rules, aux_tokens;
vector<Symbol> ubiquitous_tokens;
TokenExtractor extractor;
map<Symbol, Symbol> symbol_replacements;
for (size_t i = 0; i < input_grammar.rules().size(); i++) {
auto pair = input_grammar.rules()[i];
for (size_t i = 0; i < input_grammar.rules.size(); i++) {
auto pair = input_grammar.rules[i];
if (IsToken().apply(pair.second)) {
tokens.push_back(pair);
symbol_replacements.insert({
@ -113,32 +114,17 @@ namespace tree_sitter {
}
}
for (size_t i = 0; i < input_grammar.aux_rules().size(); i++) {
auto pair = input_grammar.aux_rules()[i];
if (IsToken().apply(pair.second)) {
aux_tokens.push_back(pair);
symbol_replacements.insert({
Symbol(i, rules::SymbolOptionAuxiliary),
Symbol(aux_tokens.size() - 1, rules::SymbolOption(rules::SymbolOptionAuxiliary|rules::SymbolOptionToken))
});
} else {
aux_rules.push_back({ pair.first, extractor.apply(pair.second) });
}
}
aux_tokens.insert(aux_tokens.end(), extractor.tokens.begin(), extractor.tokens.end());
SymbolInliner inliner(symbol_replacements);
for (auto &pair : rules)
pair.second = inliner.apply(pair.second);
for (auto &pair : aux_rules)
pair.second = inliner.apply(pair.second);
for (auto &symbol : input_grammar.ubiquitous_tokens())
for (auto &symbol : input_grammar.ubiquitous_tokens)
ubiquitous_tokens.push_back(inliner.replace_symbol(symbol));
return {
PreparedGrammar(rules, aux_rules).ubiquitous_tokens(ubiquitous_tokens),
PreparedGrammar(tokens, aux_tokens)
SyntaxGrammar(rules, aux_rules, ubiquitous_tokens),
LexicalGrammar(tokens, aux_tokens, {}),
};
}
}

View file

@ -2,12 +2,14 @@
#define COMPILER_PREPARE_GRAMMAR_EXTRACT_TOKENS_H_
#include <utility>
#include "compiler/prepare_grammar/interned_grammar.h"
namespace tree_sitter {
class PreparedGrammar;
class SyntaxGrammar;
class LexicalGrammar;
namespace prepare_grammar {
std::pair<PreparedGrammar, PreparedGrammar> extract_tokens(const PreparedGrammar &);
std::pair<SyntaxGrammar, LexicalGrammar> extract_tokens(const InternedGrammar &);
}
}

View file

@ -2,6 +2,7 @@
#include <memory>
#include <vector>
#include "tree_sitter/compiler.h"
#include "compiler/prepare_grammar/interned_grammar.h"
#include "compiler/prepared_grammar.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/named_symbol.h"
@ -37,15 +38,16 @@ namespace tree_sitter {
string missing_rule_name;
};
pair<PreparedGrammar, const GrammarError *> missing_rule_error(string rule_name) {
pair<InternedGrammar, const GrammarError *> missing_rule_error(string rule_name) {
InternedGrammar grammar;
return {
PreparedGrammar({}, {}),
grammar,
new GrammarError(GrammarErrorTypeUndefinedSymbol,
"Undefined rule '" + rule_name + "'")
};
}
pair<PreparedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
InternSymbols interner(grammar);
vector<pair<string, rule_ptr>> rules;
@ -64,10 +66,12 @@ namespace tree_sitter {
ubiquitous_tokens.push_back(*token);
}
return {
PreparedGrammar(rules, {}).ubiquitous_tokens(ubiquitous_tokens),
nullptr
};
InternedGrammar result;
result.rules = rules;
result.ubiquitous_tokens = ubiquitous_tokens;
result.separators = grammar.separators();
return { result, nullptr };
}
}
}

View file

@ -4,13 +4,13 @@
#include <utility>
#include <string>
#include "tree_sitter/compiler.h"
#include "compiler/prepare_grammar/interned_grammar.h"
namespace tree_sitter {
class Grammar;
class PreparedGrammar;
namespace prepare_grammar {
std::pair<PreparedGrammar, const GrammarError *> intern_symbols(const Grammar &);
std::pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &);
}
}

View file

@ -0,0 +1,21 @@
#ifndef COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
#define COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
#include <utility>
#include <vector>
#include <string>
#include "tree_sitter/compiler.h"
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace prepare_grammar {
class InternedGrammar {
public:
std::vector<std::pair<std::string, rules::rule_ptr>> rules;
std::vector<rules::Symbol> ubiquitous_tokens;
std::vector<char> separators;
};
}
}
#endif // COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_

View file

@ -4,29 +4,31 @@
#include "compiler/prepare_grammar/expand_repeats.h"
#include "compiler/prepare_grammar/expand_tokens.h"
#include "compiler/prepare_grammar/intern_symbols.h"
#include "compiler/prepare_grammar/interned_grammar.h"
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
using std::tuple;
using std::make_tuple;
namespace prepare_grammar {
tuple<PreparedGrammar, PreparedGrammar, const GrammarError *>
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *>
prepare_grammar(const Grammar &input_grammar) {
auto result = intern_symbols(input_grammar);
const PreparedGrammar &grammar = result.first;
const InternedGrammar &grammar = result.first;
const GrammarError *error = result.second;
if (error)
return make_tuple(PreparedGrammar(), PreparedGrammar(), error);
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
auto grammars = extract_tokens(grammar);
const PreparedGrammar &rule_grammar = expand_repeats(grammars.first);
const SyntaxGrammar &rule_grammar = expand_repeats(grammars.first);
auto expand_tokens_result = expand_tokens(grammars.second);
const PreparedGrammar &lex_grammar = expand_tokens_result.first;
const LexicalGrammar &lex_grammar = expand_tokens_result.first;
error = expand_tokens_result.second;
if (error)
return make_tuple(PreparedGrammar(), PreparedGrammar(), error);
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
return make_tuple(rule_grammar, lex_grammar, nullptr);
}

View file

@ -2,14 +2,14 @@
#define COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_
#include <utility>
#include "compiler/prepared_grammar.h"
namespace tree_sitter {
class Grammar;
class GrammarError;
class PreparedGrammar;
namespace prepare_grammar {
std::tuple<PreparedGrammar, PreparedGrammar, const GrammarError *>
std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *>
prepare_grammar(const Grammar &);
}
}

View file

@ -7,98 +7,41 @@
namespace tree_sitter {
using std::string;
using std::pair;
using std::ostream;
using std::vector;
using rules::rule_ptr;
using rules::Symbol;
PreparedGrammar::PreparedGrammar() :
rules_({}),
aux_rules_({}),
ubiquitous_tokens_({}) {}
PreparedGrammar::PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules) :
rules_(rules),
aux_rules_(aux_rules),
ubiquitous_tokens_({}) {}
const rule_ptr & PreparedGrammar::rule(const Symbol &symbol) const {
const rules::rule_ptr & PreparedGrammar::rule(const rules::Symbol &symbol) const {
return symbol.is_auxiliary() ?
aux_rules_[symbol.index].second :
rules_[symbol.index].second;
aux_rules[symbol.index].second :
rules[symbol.index].second;
}
const string & PreparedGrammar::rule_name(const Symbol &symbol) const {
const string & PreparedGrammar::rule_name(const rules::Symbol &symbol) const {
return symbol.is_auxiliary() ?
aux_rules_[symbol.index].first :
rules_[symbol.index].first;
aux_rules[symbol.index].first :
rules[symbol.index].first;
}
bool PreparedGrammar::operator==(const PreparedGrammar &other) const {
if (other.rules_.size() != rules_.size()) return false;
PreparedGrammar::PreparedGrammar() {}
SyntaxGrammar::SyntaxGrammar() {}
LexicalGrammar::LexicalGrammar() {}
for (size_t i = 0; i < rules_.size(); i++) {
auto &pair = rules_[i];
auto &other_pair = other.rules_[i];
if (other_pair.first != pair.first) return false;
if (!other_pair.second->operator==(*pair.second)) return false;
}
PreparedGrammar::PreparedGrammar(
const vector<pair<string, rules::rule_ptr>> &rules,
const vector<pair<string, rules::rule_ptr>> &aux_rules) :
rules(rules),
aux_rules(aux_rules) {}
if (other.aux_rules_.size() != aux_rules_.size()) return false;
for (size_t i = 0; i < aux_rules_
.size(); i++) {
auto &pair = aux_rules_[i];
auto &other_pair = other.aux_rules_[i];
if (other_pair.first != pair.first) return false;
if (!other_pair.second->operator==(*pair.second)) return false;
}
SyntaxGrammar::SyntaxGrammar(
const vector<pair<string, rules::rule_ptr>> &rules,
const vector<pair<string, rules::rule_ptr>> &aux_rules,
const vector<rules::Symbol> &ubiquitous_tokens) :
PreparedGrammar(rules, aux_rules),
ubiquitous_tokens(ubiquitous_tokens) {}
return true;
}
const vector<pair<string, rule_ptr>> & PreparedGrammar::rules() const {
return rules_;
}
const vector<pair<string, rule_ptr>> & PreparedGrammar::aux_rules() const {
return aux_rules_;
}
const vector<Symbol> & PreparedGrammar::ubiquitous_tokens() const {
return ubiquitous_tokens_;
}
const PreparedGrammar & PreparedGrammar::ubiquitous_tokens(const vector<Symbol> &ubiquitous_tokens) {
ubiquitous_tokens_ = ubiquitous_tokens;
return *this;
}
ostream& operator<<(ostream &stream, const PreparedGrammar &grammar) {
stream << string("#<grammar");
stream << string(" rules: {");
bool started = false;
for (auto pair : grammar.rules()) {
if (started) stream << string(", ");
stream << pair.first;
stream << string(" => ");
stream << pair.second;
started = true;
}
stream << string("}");
stream << string(" aux_rules: {");
started = false;
for (auto pair : grammar.aux_rules()) {
if (started) stream << string(", ");
stream << pair.first;
stream << string(" => ");
stream << pair.second;
started = true;
}
stream << string("}");
return stream << string(">");
}
LexicalGrammar::LexicalGrammar(
const vector<pair<string, rules::rule_ptr>> &rules,
const vector<pair<string, rules::rule_ptr>> &aux_rules,
const vector<char> &separators) :
PreparedGrammar(rules, aux_rules),
separators(separators) {}
}

View file

@ -9,25 +9,40 @@
namespace tree_sitter {
class PreparedGrammar {
const std::vector<std::pair<std::string, rules::rule_ptr>> rules_;
const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules_;
std::vector<rules::Symbol> ubiquitous_tokens_;
public:
PreparedGrammar();
PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
PreparedGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
const std::vector<std::pair<std::string, rules::rule_ptr>> rules;
const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;
bool operator==(const PreparedGrammar &other) const;
const std::string & rule_name(const rules::Symbol &symbol) const;
const rules::rule_ptr & rule(const rules::Symbol &symbol) const;
const std::vector<rules::Symbol> & ubiquitous_tokens() const;
const PreparedGrammar & ubiquitous_tokens(const std::vector<rules::Symbol> &ubiquitous_tokens);
const std::vector<std::pair<std::string, rules::rule_ptr>> & rules() const;
const std::vector<std::pair<std::string, rules::rule_ptr>> & aux_rules() const;
};
std::ostream& operator<<(std::ostream &stream, const PreparedGrammar &grammar);
class SyntaxGrammar : public PreparedGrammar {
public:
SyntaxGrammar();
SyntaxGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
const std::vector<rules::Symbol> &ubiquitous_tokens);
std::vector<rules::Symbol> ubiquitous_tokens;
};
class LexicalGrammar : public PreparedGrammar {
public:
LexicalGrammar();
LexicalGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
const std::vector<char> &separators);
std::vector<char> separators;
};
}
#endif // COMPILER_PREPARED_GRAMMAR_H_