Add expected_conflicts field to grammar
This commit is contained in:
parent
36d9c3be14
commit
c9a482bbf3
17 changed files with 151 additions and 67 deletions
|
|
@ -38,6 +38,7 @@ std::ostream &operator<<(std::ostream &stream, const rules::rule_ptr &rule);
|
|||
class Grammar {
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> rules_;
|
||||
std::set<rules::rule_ptr> ubiquitous_tokens_;
|
||||
std::set<std::set<std::string>> expected_conflicts_;
|
||||
|
||||
public:
|
||||
explicit Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &);
|
||||
|
|
@ -47,6 +48,8 @@ class Grammar {
|
|||
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules() const;
|
||||
const std::set<rules::rule_ptr> &ubiquitous_tokens() const;
|
||||
Grammar &ubiquitous_tokens(const std::set<rules::rule_ptr> &);
|
||||
const std::set<std::set<std::string>> &expected_conflicts() const;
|
||||
Grammar &expected_conflicts(const std::set<std::set<std::string>> &);
|
||||
};
|
||||
|
||||
enum GrammarErrorType {
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ describe("build_parse_table", []() {
|
|||
{ "rule0", choice({ i_sym(1), i_sym(2) }) },
|
||||
{ "rule1", i_token(0) },
|
||||
{ "rule2", i_token(1) },
|
||||
}, {}, { Symbol(2, SymbolOptionToken) });
|
||||
}, {}, { Symbol(2, SymbolOptionToken) }, set<set<Symbol>>());
|
||||
|
||||
LexicalGrammar lex_grammar({
|
||||
{ "token0", pattern("[a-c]") },
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ describe("sym_transitions(ParseItemSet, SyntaxGrammar)", [&]() {
|
|||
SyntaxGrammar grammar({
|
||||
{ "A", blank() },
|
||||
{ "B", i_token(21) },
|
||||
}, {}, set<Symbol>());
|
||||
}, {}, set<Symbol>(), set<set<Symbol>>());
|
||||
|
||||
it("computes the closure of the new item sets", [&]() {
|
||||
ParseItemSet set1({
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ describe("ParseConflictManager", []() {
|
|||
{ "reduced_rule", i_token(0) },
|
||||
{ "other_rule1", i_token(0) },
|
||||
{ "other_rule2", i_token(0) },
|
||||
}, {}, { Symbol(2, SymbolOptionToken) });
|
||||
}, {}, { Symbol(2, SymbolOptionToken) }, set<set<Symbol>>());
|
||||
|
||||
LexicalGrammar lexical_grammar({
|
||||
{ "other_token", pattern("[a-b]") },
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ describe("rule_can_be_blank", [&]() {
|
|||
{ "B", choice({
|
||||
seq({ i_sym(1), i_token(12) }),
|
||||
i_token(13) }) },
|
||||
}, {}, set<Symbol>());
|
||||
}, {}, set<Symbol>(), set<set<Symbol>>());
|
||||
|
||||
it("terminates for left-recursive rules that can be blank", [&]() {
|
||||
rule = i_sym(0);
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ describe("expand_repeats", []() {
|
|||
it("replaces repeat rules with pairs of recursive rules", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", repeat(i_token(0)) },
|
||||
}, {}, set<Symbol>());
|
||||
}, {}, set<Symbol>(), set<set<Symbol>>());
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
|
|
@ -32,7 +32,7 @@ describe("expand_repeats", []() {
|
|||
{ "rule0", seq({
|
||||
i_token(10),
|
||||
repeat(i_token(11)) }) },
|
||||
}, {}, set<Symbol>());
|
||||
}, {}, set<Symbol>(), set<set<Symbol>>());
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
|
|
@ -52,7 +52,7 @@ describe("expand_repeats", []() {
|
|||
it("replaces repeats inside of choices", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", choice({ i_token(10), repeat(i_token(11)) }) },
|
||||
}, {}, set<Symbol>());
|
||||
}, {}, set<Symbol>(), set<set<Symbol>>());
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
|
|
@ -73,7 +73,7 @@ describe("expand_repeats", []() {
|
|||
seq({ i_token(1), repeat(i_token(4)) }),
|
||||
seq({ i_token(2), repeat(i_token(4)) }) }) },
|
||||
{ "rule1", seq({ i_token(3), repeat(i_token(4)) }) },
|
||||
}, {}, set<Symbol>());
|
||||
}, {}, set<Symbol>(), set<set<Symbol>>());
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
|
|
@ -96,7 +96,7 @@ describe("expand_repeats", []() {
|
|||
{ "rule0", seq({
|
||||
repeat(i_token(10)),
|
||||
repeat(i_token(11)) }) },
|
||||
}, {}, set<Symbol>());
|
||||
}, {}, set<Symbol>(), set<set<Symbol>>());
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
|
|
@ -120,7 +120,7 @@ describe("expand_repeats", []() {
|
|||
SyntaxGrammar grammar({
|
||||
{ "rule0", repeat(i_token(10)) },
|
||||
{ "rule1", repeat(i_token(11)) },
|
||||
}, {}, set<Symbol>());
|
||||
}, {}, set<Symbol>(), set<set<Symbol>>());
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
#include "compiler/prepare_grammar/extract_tokens.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
|
||||
|
|
@ -8,13 +9,16 @@ START_TEST
|
|||
|
||||
using namespace rules;
|
||||
using prepare_grammar::extract_tokens;
|
||||
using prepare_grammar::InternedGrammar;
|
||||
|
||||
describe("extract_tokens", []() {
|
||||
const set<rules::rule_ptr> no_ubiquitous_tokens;
|
||||
const set<set<rules::Symbol>> no_expected_conflicts;
|
||||
|
||||
it("moves string rules into the lexical grammar", [&]() {
|
||||
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> result =
|
||||
extract_tokens(Grammar({
|
||||
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
|
||||
}));
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
|
||||
}, no_ubiquitous_tokens, no_expected_conflicts});
|
||||
|
||||
AssertThat(get<0>(result).rules, Equals(rule_list({
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
|
||||
|
|
@ -28,9 +32,9 @@ describe("extract_tokens", []() {
|
|||
});
|
||||
|
||||
it("moves pattern rules into the lexical grammar", [&]() {
|
||||
auto result = extract_tokens(Grammar({
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", seq({ pattern("a+"), i_sym(0) }) }
|
||||
}));
|
||||
}, no_ubiquitous_tokens, no_expected_conflicts});
|
||||
|
||||
AssertThat(get<0>(result).rules, Equals(rule_list({
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
|
||||
|
|
@ -44,11 +48,11 @@ describe("extract_tokens", []() {
|
|||
});
|
||||
|
||||
it("moves other rules marked as tokens into the lexical grammar", [&]() {
|
||||
auto result = extract_tokens(Grammar({
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", seq({
|
||||
token(seq({ pattern("."), choice({ str("a"), str("b") }) })),
|
||||
i_sym(0) }) }
|
||||
}));
|
||||
}, no_ubiquitous_tokens, no_expected_conflicts});
|
||||
|
||||
AssertThat(get<0>(result).rules, Equals(rule_list({
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
|
||||
|
|
@ -62,9 +66,9 @@ describe("extract_tokens", []() {
|
|||
});
|
||||
|
||||
it("does not move blank rules", [&]() {
|
||||
auto result = extract_tokens(Grammar({
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", choice({ i_sym(0), blank() }) },
|
||||
}));
|
||||
}, no_ubiquitous_tokens, no_expected_conflicts});
|
||||
|
||||
AssertThat(get<0>(result).rules, Equals(rule_list({
|
||||
{ "rule_A", choice({ i_sym(0), blank() }) },
|
||||
|
|
@ -76,9 +80,9 @@ describe("extract_tokens", []() {
|
|||
});
|
||||
|
||||
it("does not create duplicate tokens in the lexical grammar", [&]() {
|
||||
auto result = extract_tokens(Grammar({
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
|
||||
}));
|
||||
}, no_ubiquitous_tokens, no_expected_conflicts});
|
||||
|
||||
AssertThat(get<0>(result).rules, Equals(rule_list({
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0), i_aux_token(0) }) }
|
||||
|
|
@ -91,13 +95,31 @@ describe("extract_tokens", []() {
|
|||
})))
|
||||
});
|
||||
|
||||
it("updates the grammar's expected conflict symbols", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
{ "rule_A", str("ok") },
|
||||
{ "rule_B", repeat(i_sym(0)) },
|
||||
{ "rule_C", repeat(seq({ i_sym(0), i_sym(0) })) },
|
||||
},
|
||||
{ str(" ") },
|
||||
{ { Symbol(1), Symbol(2) } }
|
||||
});
|
||||
|
||||
AssertThat(get<0>(result).rules.size(), Equals<size_t>(2));
|
||||
AssertThat(get<1>(result).rules.size(), Equals<size_t>(1));
|
||||
AssertThat(get<0>(result).expected_conflicts, Equals(set<set<Symbol>>({
|
||||
{ Symbol(0), Symbol(1) },
|
||||
})));
|
||||
});
|
||||
|
||||
describe("when an entire rule can be extracted", [&]() {
|
||||
it("moves the rule the lexical grammar when possible and updates referencing symbols", [&]() {
|
||||
auto result = extract_tokens(Grammar({
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", i_sym(1) },
|
||||
{ "rule_B", pattern("a|b") },
|
||||
{ "rule_C", token(seq({ str("a"), str("b") })) },
|
||||
}));
|
||||
}, no_ubiquitous_tokens, no_expected_conflicts});
|
||||
|
||||
AssertThat(get<0>(result).rules, Equals(rule_list({
|
||||
{ "rule_A", i_token(0) }
|
||||
|
|
@ -112,11 +134,11 @@ describe("extract_tokens", []() {
|
|||
});
|
||||
|
||||
it("updates symbols whose indices need to change due to deleted rules", [&]() {
|
||||
auto result = extract_tokens(Grammar({
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", str("ab") },
|
||||
{ "rule_B", i_sym(0) },
|
||||
{ "rule_C", i_sym(1) },
|
||||
}));
|
||||
}, no_ubiquitous_tokens, no_expected_conflicts});
|
||||
|
||||
AssertThat(get<0>(result).rules, Equals(rule_list({
|
||||
{ "rule_B", i_token(0) },
|
||||
|
|
@ -134,12 +156,12 @@ describe("extract_tokens", []() {
|
|||
describe("handling ubiquitous tokens", [&]() {
|
||||
describe("ubiquitous tokens that are not symbols", [&]() {
|
||||
it("adds them to the lexical grammar's separators", [&]() {
|
||||
auto result = extract_tokens(Grammar({
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", str("x") },
|
||||
}).ubiquitous_tokens({
|
||||
}, {
|
||||
pattern("\\s+"),
|
||||
str("y"),
|
||||
}));
|
||||
}, no_expected_conflicts});
|
||||
|
||||
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
|
||||
|
||||
|
|
@ -154,13 +176,13 @@ describe("extract_tokens", []() {
|
|||
|
||||
describe("ubiquitous tokens that point to moved rules", [&]() {
|
||||
it("updates them according to the new symbol numbers", [&]() {
|
||||
auto result = extract_tokens(Grammar( {
|
||||
auto result = extract_tokens(InternedGrammar{ {
|
||||
{ "rule_A", seq({ str("w"), i_sym(1) }) },
|
||||
{ "rule_B", str("x") },
|
||||
{ "rule_C", str("y") },
|
||||
}).ubiquitous_tokens({
|
||||
}, {
|
||||
i_sym(2),
|
||||
}));
|
||||
}, no_expected_conflicts});
|
||||
|
||||
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
|
||||
|
||||
|
|
@ -174,10 +196,10 @@ describe("extract_tokens", []() {
|
|||
|
||||
describe("ubiquitous tokens that are visible", [&]() {
|
||||
it("preserves them in the syntactic grammar", [&]() {
|
||||
auto result = extract_tokens(Grammar({
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", str("ab") },
|
||||
{ "rule_B", str("bc") },
|
||||
}).ubiquitous_tokens({ i_sym(1) }));
|
||||
}, { i_sym(1) }, no_expected_conflicts});
|
||||
|
||||
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
|
||||
|
||||
|
|
@ -191,10 +213,10 @@ describe("extract_tokens", []() {
|
|||
|
||||
describe("ubiquitous tokens that are used in other grammar rules", [&]() {
|
||||
it("preserves them in the syntactic grammar", [&]() {
|
||||
auto result = extract_tokens(Grammar({
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", seq({ i_sym(1), str("ab") }) },
|
||||
{ "_rule_B", str("bc") },
|
||||
}).ubiquitous_tokens({ i_sym(1) }));
|
||||
}, { i_sym(1) }, no_expected_conflicts});
|
||||
|
||||
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
|
||||
|
||||
|
|
@ -208,10 +230,10 @@ describe("extract_tokens", []() {
|
|||
|
||||
describe("ubiquitous tokens that are non-token symbols", [&]() {
|
||||
it("returns an error", [&]() {
|
||||
auto result = extract_tokens(Grammar({
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", seq({ str("x"), i_sym(1) }), },
|
||||
{ "rule_B", seq({ str("y"), str("z") }) },
|
||||
}).ubiquitous_tokens({ i_sym(1) }));
|
||||
}, { i_sym(1) }, no_expected_conflicts});
|
||||
|
||||
AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(get<2>(result), EqualsPointer(
|
||||
|
|
@ -220,12 +242,12 @@ describe("extract_tokens", []() {
|
|||
});
|
||||
});
|
||||
|
||||
describe("ubiquitous tokens that are non-token symbols", [&]() {
|
||||
describe("ubiquitous tokens that are not symbols", [&]() {
|
||||
it("returns an error", [&]() {
|
||||
auto result = extract_tokens(Grammar({
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{ "rule_A", str("x") },
|
||||
{ "rule_B", str("y") },
|
||||
}).ubiquitous_tokens({ choice({ i_sym(1), blank() }) }));
|
||||
}, { choice({ i_sym(1), blank() }) }, no_expected_conflicts});
|
||||
|
||||
AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(get<2>(result), EqualsPointer(
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ describe("intern_symbols", []() {
|
|||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((GrammarError *)nullptr));
|
||||
AssertThat(result.first.rules(), Equals(rule_list({
|
||||
AssertThat(result.first.rules, Equals(rule_list({
|
||||
{ "x", choice({ i_sym(1), i_sym(2) }) },
|
||||
{ "y", i_sym(2) },
|
||||
{ "z", str("stuff") },
|
||||
|
|
@ -49,8 +49,8 @@ describe("intern_symbols", []() {
|
|||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((GrammarError *)nullptr));
|
||||
AssertThat(result.first.ubiquitous_tokens().size(), Equals<size_t>(1));
|
||||
AssertThat(*result.first.ubiquitous_tokens().begin(), EqualsPointer(i_sym(2)));
|
||||
AssertThat(result.first.ubiquitous_tokens.size(), Equals<size_t>(1));
|
||||
AssertThat(*result.first.ubiquitous_tokens.begin(), EqualsPointer(i_sym(2)));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -69,6 +69,15 @@ Grammar &Grammar::ubiquitous_tokens(const set<rule_ptr> &ubiquitous_tokens) {
|
|||
return *this;
|
||||
}
|
||||
|
||||
const set<set<string>> &Grammar::expected_conflicts() const {
|
||||
return expected_conflicts_;
|
||||
}
|
||||
|
||||
Grammar &Grammar::expected_conflicts(const set<set<string>> &expected_conflicts) {
|
||||
expected_conflicts_ = expected_conflicts;
|
||||
return *this;
|
||||
}
|
||||
|
||||
const vector<pair<string, rule_ptr>> &Grammar::rules() const { return rules_; }
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -78,7 +78,8 @@ SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
|
|||
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(),
|
||||
expander.aux_rules.end());
|
||||
|
||||
return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens);
|
||||
return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens,
|
||||
grammar.expected_conflicts);
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ static tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> ubiq_token_err
|
|||
}
|
||||
|
||||
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
|
||||
const Grammar &grammar) {
|
||||
const InternedGrammar &grammar) {
|
||||
vector<pair<string, rule_ptr>> rules, tokens;
|
||||
vector<rule_ptr> separators;
|
||||
set<Symbol> ubiquitous_tokens;
|
||||
|
|
@ -109,7 +109,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
|
|||
TokenExtractor extractor;
|
||||
|
||||
size_t i = 0;
|
||||
for (auto &pair : grammar.rules()) {
|
||||
for (auto &pair : grammar.rules) {
|
||||
if (is_token(pair.second)) {
|
||||
tokens.push_back(pair);
|
||||
symbol_replacer.replacements.insert(
|
||||
|
|
@ -123,7 +123,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
|
|||
for (auto &pair : rules)
|
||||
pair.second = symbol_replacer.apply(pair.second);
|
||||
|
||||
for (auto &rule : grammar.ubiquitous_tokens()) {
|
||||
for (auto &rule : grammar.ubiquitous_tokens) {
|
||||
if (is_token(rule)) {
|
||||
separators.push_back(rule);
|
||||
} else {
|
||||
|
|
@ -139,7 +139,15 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
|
|||
}
|
||||
}
|
||||
|
||||
return make_tuple(SyntaxGrammar(rules, {}, ubiquitous_tokens),
|
||||
set<set<rules::Symbol>> expected_conflicts;
|
||||
for (auto &symbol_set : grammar.expected_conflicts) {
|
||||
set<Symbol> new_symbol_set;
|
||||
for (const Symbol &symbol : symbol_set)
|
||||
new_symbol_set.insert(symbol_replacer.replace_symbol(symbol));
|
||||
expected_conflicts.insert(new_symbol_set);
|
||||
}
|
||||
|
||||
return make_tuple(SyntaxGrammar(rules, {}, ubiquitous_tokens, expected_conflicts),
|
||||
LexicalGrammar(tokens, extractor.tokens, separators),
|
||||
nullptr);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include <utility>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
@ -13,7 +14,7 @@ class LexicalGrammar;
|
|||
namespace prepare_grammar {
|
||||
|
||||
std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
|
||||
const Grammar &);
|
||||
const InternedGrammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -22,8 +22,10 @@ class InternSymbols : public rules::IdentityRuleFn {
|
|||
|
||||
rule_ptr apply_to(const rules::NamedSymbol *rule) {
|
||||
auto result = symbol_for_rule_name(rule->name);
|
||||
if (!result.get())
|
||||
if (!result.get()) {
|
||||
missing_rule_name = rule->name;
|
||||
return rules::blank();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -40,31 +42,40 @@ class InternSymbols : public rules::IdentityRuleFn {
|
|||
string missing_rule_name;
|
||||
};
|
||||
|
||||
pair<Grammar, const GrammarError *> missing_rule_error(string rule_name) {
|
||||
return { Grammar({}), new GrammarError(GrammarErrorTypeUndefinedSymbol,
|
||||
"Undefined rule '" + rule_name + "'") };
|
||||
const GrammarError * missing_rule_error(string rule_name) {
|
||||
return new GrammarError(GrammarErrorTypeUndefinedSymbol,
|
||||
"Undefined rule '" + rule_name + "'");
|
||||
}
|
||||
|
||||
pair<Grammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
|
||||
pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
|
||||
InternedGrammar result;
|
||||
InternSymbols interner(grammar);
|
||||
vector<pair<string, rule_ptr>> rules;
|
||||
|
||||
for (auto &pair : grammar.rules()) {
|
||||
auto new_rule = interner.apply(pair.second);
|
||||
if (!interner.missing_rule_name.empty())
|
||||
return missing_rule_error(interner.missing_rule_name);
|
||||
rules.push_back({ pair.first, new_rule });
|
||||
return {result, missing_rule_error(interner.missing_rule_name)};
|
||||
result.rules.push_back({ pair.first, new_rule });
|
||||
}
|
||||
|
||||
set<rules::rule_ptr> ubiquitous_tokens;
|
||||
for (auto &rule : grammar.ubiquitous_tokens()) {
|
||||
auto new_rule = interner.apply(rule);
|
||||
if (!interner.missing_rule_name.empty())
|
||||
return missing_rule_error(interner.missing_rule_name);
|
||||
ubiquitous_tokens.insert(new_rule);
|
||||
return {result, missing_rule_error(interner.missing_rule_name)};
|
||||
result.ubiquitous_tokens.insert(new_rule);
|
||||
}
|
||||
|
||||
return { Grammar(rules).ubiquitous_tokens(ubiquitous_tokens), nullptr };
|
||||
for (auto &names : grammar.expected_conflicts()) {
|
||||
set<rules::Symbol> entry;
|
||||
for (auto &name : names) {
|
||||
auto symbol = interner.symbol_for_rule_name(name);
|
||||
if (symbol.get())
|
||||
entry.insert(*symbol);
|
||||
}
|
||||
result.expected_conflicts.insert(entry);
|
||||
}
|
||||
|
||||
return { result, nullptr };
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#include <utility>
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
@ -11,7 +12,7 @@ class Grammar;
|
|||
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::pair<Grammar, const GrammarError *> intern_symbols(const Grammar &);
|
||||
std::pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
24
src/compiler/prepare_grammar/interned_grammar.h
Normal file
24
src/compiler/prepare_grammar/interned_grammar.h
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
#ifndef COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
|
||||
#define COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
|
||||
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
struct InternedGrammar {
|
||||
std::vector<std::pair<std::string, rules::rule_ptr>> rules;
|
||||
std::set<rules::rule_ptr> ubiquitous_tokens;
|
||||
std::set<std::set<rules::Symbol>> expected_conflicts;
|
||||
};
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
||||
|
||||
#endif // COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
|
||||
|
|
@ -19,8 +19,10 @@ SyntaxGrammar::SyntaxGrammar(const vector<pair<string, rules::rule_ptr>> &rules,
|
|||
|
||||
SyntaxGrammar::SyntaxGrammar(const vector<pair<string, rules::rule_ptr>> &rules,
|
||||
const vector<pair<string, rules::rule_ptr>> &aux_rules,
|
||||
const set<rules::Symbol> &ubiquitous_tokens)
|
||||
: rules(rules), aux_rules(aux_rules), ubiquitous_tokens(ubiquitous_tokens) {}
|
||||
const set<rules::Symbol> &ubiquitous_tokens,
|
||||
const set<set<rules::Symbol>> &expected_conflicts)
|
||||
: rules(rules), aux_rules(aux_rules),
|
||||
ubiquitous_tokens(ubiquitous_tokens), expected_conflicts(expected_conflicts) {}
|
||||
|
||||
const rules::rule_ptr &SyntaxGrammar::rule(const rules::Symbol &symbol) const {
|
||||
return symbol.is_auxiliary() ? aux_rules[symbol.index].second
|
||||
|
|
|
|||
|
|
@ -19,14 +19,16 @@ class SyntaxGrammar {
|
|||
SyntaxGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
|
||||
const std::set<rules::Symbol> &ubiquitous_tokens);
|
||||
const std::set<rules::Symbol> &ubiquitous_tokens,
|
||||
const std::set<std::set<rules::Symbol>> &expected_conflicts);
|
||||
|
||||
const std::string &rule_name(const rules::Symbol &symbol) const;
|
||||
const rules::rule_ptr &rule(const rules::Symbol &symbol) const;
|
||||
|
||||
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> rules;
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;
|
||||
std::set<rules::Symbol> ubiquitous_tokens;
|
||||
std::set<std::set<rules::Symbol>> expected_conflicts;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue