Add expected_conflicts field to grammar

This commit is contained in:
Max Brunsfeld 2015-06-26 16:14:08 -05:00
parent 36d9c3be14
commit c9a482bbf3
17 changed files with 151 additions and 67 deletions

View file

@ -38,6 +38,7 @@ std::ostream &operator<<(std::ostream &stream, const rules::rule_ptr &rule);
class Grammar {
const std::vector<std::pair<std::string, rules::rule_ptr>> rules_;
std::set<rules::rule_ptr> ubiquitous_tokens_;
std::set<std::set<std::string>> expected_conflicts_;
public:
explicit Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &);
@ -47,6 +48,8 @@ class Grammar {
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules() const;
const std::set<rules::rule_ptr> &ubiquitous_tokens() const;
Grammar &ubiquitous_tokens(const std::set<rules::rule_ptr> &);
const std::set<std::set<std::string>> &expected_conflicts() const;
Grammar &expected_conflicts(const std::set<std::set<std::string>> &);
};
enum GrammarErrorType {

View file

@ -15,7 +15,7 @@ describe("build_parse_table", []() {
{ "rule0", choice({ i_sym(1), i_sym(2) }) },
{ "rule1", i_token(0) },
{ "rule2", i_token(1) },
}, {}, { Symbol(2, SymbolOptionToken) });
}, {}, { Symbol(2, SymbolOptionToken) }, set<set<Symbol>>());
LexicalGrammar lex_grammar({
{ "token0", pattern("[a-c]") },

View file

@ -45,7 +45,7 @@ describe("sym_transitions(ParseItemSet, SyntaxGrammar)", [&]() {
SyntaxGrammar grammar({
{ "A", blank() },
{ "B", i_token(21) },
}, {}, set<Symbol>());
}, {}, set<Symbol>(), set<set<Symbol>>());
it("computes the closure of the new item sets", [&]() {
ParseItemSet set1({

View file

@ -16,7 +16,7 @@ describe("ParseConflictManager", []() {
{ "reduced_rule", i_token(0) },
{ "other_rule1", i_token(0) },
{ "other_rule2", i_token(0) },
}, {}, { Symbol(2, SymbolOptionToken) });
}, {}, { Symbol(2, SymbolOptionToken) }, set<set<Symbol>>());
LexicalGrammar lexical_grammar({
{ "other_token", pattern("[a-b]") },

View file

@ -63,7 +63,7 @@ describe("rule_can_be_blank", [&]() {
{ "B", choice({
seq({ i_sym(1), i_token(12) }),
i_token(13) }) },
}, {}, set<Symbol>());
}, {}, set<Symbol>(), set<set<Symbol>>());
it("terminates for left-recursive rules that can be blank", [&]() {
rule = i_sym(0);

View file

@ -12,7 +12,7 @@ describe("expand_repeats", []() {
it("replaces repeat rules with pairs of recursive rules", [&]() {
SyntaxGrammar grammar({
{ "rule0", repeat(i_token(0)) },
}, {}, set<Symbol>());
}, {}, set<Symbol>(), set<set<Symbol>>());
auto match = expand_repeats(grammar);
@ -32,7 +32,7 @@ describe("expand_repeats", []() {
{ "rule0", seq({
i_token(10),
repeat(i_token(11)) }) },
}, {}, set<Symbol>());
}, {}, set<Symbol>(), set<set<Symbol>>());
auto match = expand_repeats(grammar);
@ -52,7 +52,7 @@ describe("expand_repeats", []() {
it("replaces repeats inside of choices", [&]() {
SyntaxGrammar grammar({
{ "rule0", choice({ i_token(10), repeat(i_token(11)) }) },
}, {}, set<Symbol>());
}, {}, set<Symbol>(), set<set<Symbol>>());
auto match = expand_repeats(grammar);
@ -73,7 +73,7 @@ describe("expand_repeats", []() {
seq({ i_token(1), repeat(i_token(4)) }),
seq({ i_token(2), repeat(i_token(4)) }) }) },
{ "rule1", seq({ i_token(3), repeat(i_token(4)) }) },
}, {}, set<Symbol>());
}, {}, set<Symbol>(), set<set<Symbol>>());
auto match = expand_repeats(grammar);
@ -96,7 +96,7 @@ describe("expand_repeats", []() {
{ "rule0", seq({
repeat(i_token(10)),
repeat(i_token(11)) }) },
}, {}, set<Symbol>());
}, {}, set<Symbol>(), set<set<Symbol>>());
auto match = expand_repeats(grammar);
@ -120,7 +120,7 @@ describe("expand_repeats", []() {
SyntaxGrammar grammar({
{ "rule0", repeat(i_token(10)) },
{ "rule1", repeat(i_token(11)) },
}, {}, set<Symbol>());
}, {}, set<Symbol>(), set<set<Symbol>>());
auto match = expand_repeats(grammar);

View file

@ -1,6 +1,7 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/lexical_grammar.h"
#include "compiler/syntax_grammar.h"
#include "compiler/prepare_grammar/interned_grammar.h"
#include "compiler/prepare_grammar/extract_tokens.h"
#include "compiler/helpers/containers.h"
@ -8,13 +9,16 @@ START_TEST
using namespace rules;
using prepare_grammar::extract_tokens;
using prepare_grammar::InternedGrammar;
describe("extract_tokens", []() {
const set<rules::rule_ptr> no_ubiquitous_tokens;
const set<set<rules::Symbol>> no_expected_conflicts;
it("moves string rules into the lexical grammar", [&]() {
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> result =
extract_tokens(Grammar({
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
}));
auto result = extract_tokens(InternedGrammar{{
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
}, no_ubiquitous_tokens, no_expected_conflicts});
AssertThat(get<0>(result).rules, Equals(rule_list({
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
@ -28,9 +32,9 @@ describe("extract_tokens", []() {
});
it("moves pattern rules into the lexical grammar", [&]() {
auto result = extract_tokens(Grammar({
auto result = extract_tokens(InternedGrammar{{
{ "rule_A", seq({ pattern("a+"), i_sym(0) }) }
}));
}, no_ubiquitous_tokens, no_expected_conflicts});
AssertThat(get<0>(result).rules, Equals(rule_list({
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
@ -44,11 +48,11 @@ describe("extract_tokens", []() {
});
it("moves other rules marked as tokens into the lexical grammar", [&]() {
auto result = extract_tokens(Grammar({
auto result = extract_tokens(InternedGrammar{{
{ "rule_A", seq({
token(seq({ pattern("."), choice({ str("a"), str("b") }) })),
i_sym(0) }) }
}));
}, no_ubiquitous_tokens, no_expected_conflicts});
AssertThat(get<0>(result).rules, Equals(rule_list({
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
@ -62,9 +66,9 @@ describe("extract_tokens", []() {
});
it("does not move blank rules", [&]() {
auto result = extract_tokens(Grammar({
auto result = extract_tokens(InternedGrammar{{
{ "rule_A", choice({ i_sym(0), blank() }) },
}));
}, no_ubiquitous_tokens, no_expected_conflicts});
AssertThat(get<0>(result).rules, Equals(rule_list({
{ "rule_A", choice({ i_sym(0), blank() }) },
@ -76,9 +80,9 @@ describe("extract_tokens", []() {
});
it("does not create duplicate tokens in the lexical grammar", [&]() {
auto result = extract_tokens(Grammar({
auto result = extract_tokens(InternedGrammar{{
{ "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
}));
}, no_ubiquitous_tokens, no_expected_conflicts});
AssertThat(get<0>(result).rules, Equals(rule_list({
{ "rule_A", seq({ i_aux_token(0), i_sym(0), i_aux_token(0) }) }
@ -91,13 +95,31 @@ describe("extract_tokens", []() {
})))
});
it("updates the grammar's expected conflict symbols", [&]() {
auto result = extract_tokens(InternedGrammar{
{
{ "rule_A", str("ok") },
{ "rule_B", repeat(i_sym(0)) },
{ "rule_C", repeat(seq({ i_sym(0), i_sym(0) })) },
},
{ str(" ") },
{ { Symbol(1), Symbol(2) } }
});
AssertThat(get<0>(result).rules.size(), Equals<size_t>(2));
AssertThat(get<1>(result).rules.size(), Equals<size_t>(1));
AssertThat(get<0>(result).expected_conflicts, Equals(set<set<Symbol>>({
{ Symbol(0), Symbol(1) },
})));
});
describe("when an entire rule can be extracted", [&]() {
it("moves the rule the lexical grammar when possible and updates referencing symbols", [&]() {
auto result = extract_tokens(Grammar({
auto result = extract_tokens(InternedGrammar{{
{ "rule_A", i_sym(1) },
{ "rule_B", pattern("a|b") },
{ "rule_C", token(seq({ str("a"), str("b") })) },
}));
}, no_ubiquitous_tokens, no_expected_conflicts});
AssertThat(get<0>(result).rules, Equals(rule_list({
{ "rule_A", i_token(0) }
@ -112,11 +134,11 @@ describe("extract_tokens", []() {
});
it("updates symbols whose indices need to change due to deleted rules", [&]() {
auto result = extract_tokens(Grammar({
auto result = extract_tokens(InternedGrammar{{
{ "rule_A", str("ab") },
{ "rule_B", i_sym(0) },
{ "rule_C", i_sym(1) },
}));
}, no_ubiquitous_tokens, no_expected_conflicts});
AssertThat(get<0>(result).rules, Equals(rule_list({
{ "rule_B", i_token(0) },
@ -134,12 +156,12 @@ describe("extract_tokens", []() {
describe("handling ubiquitous tokens", [&]() {
describe("ubiquitous tokens that are not symbols", [&]() {
it("adds them to the lexical grammar's separators", [&]() {
auto result = extract_tokens(Grammar({
auto result = extract_tokens(InternedGrammar{{
{ "rule_A", str("x") },
}).ubiquitous_tokens({
}, {
pattern("\\s+"),
str("y"),
}));
}, no_expected_conflicts});
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
@ -154,13 +176,13 @@ describe("extract_tokens", []() {
describe("ubiquitous tokens that point to moved rules", [&]() {
it("updates them according to the new symbol numbers", [&]() {
auto result = extract_tokens(Grammar( {
auto result = extract_tokens(InternedGrammar{ {
{ "rule_A", seq({ str("w"), i_sym(1) }) },
{ "rule_B", str("x") },
{ "rule_C", str("y") },
}).ubiquitous_tokens({
}, {
i_sym(2),
}));
}, no_expected_conflicts});
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
@ -174,10 +196,10 @@ describe("extract_tokens", []() {
describe("ubiquitous tokens that are visible", [&]() {
it("preserves them in the syntactic grammar", [&]() {
auto result = extract_tokens(Grammar({
auto result = extract_tokens(InternedGrammar{{
{ "rule_A", str("ab") },
{ "rule_B", str("bc") },
}).ubiquitous_tokens({ i_sym(1) }));
}, { i_sym(1) }, no_expected_conflicts});
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
@ -191,10 +213,10 @@ describe("extract_tokens", []() {
describe("ubiquitous tokens that are used in other grammar rules", [&]() {
it("preserves them in the syntactic grammar", [&]() {
auto result = extract_tokens(Grammar({
auto result = extract_tokens(InternedGrammar{{
{ "rule_A", seq({ i_sym(1), str("ab") }) },
{ "_rule_B", str("bc") },
}).ubiquitous_tokens({ i_sym(1) }));
}, { i_sym(1) }, no_expected_conflicts});
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
@ -208,10 +230,10 @@ describe("extract_tokens", []() {
describe("ubiquitous tokens that are non-token symbols", [&]() {
it("returns an error", [&]() {
auto result = extract_tokens(Grammar({
auto result = extract_tokens(InternedGrammar{{
{ "rule_A", seq({ str("x"), i_sym(1) }), },
{ "rule_B", seq({ str("y"), str("z") }) },
}).ubiquitous_tokens({ i_sym(1) }));
}, { i_sym(1) }, no_expected_conflicts});
AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
AssertThat(get<2>(result), EqualsPointer(
@ -220,12 +242,12 @@ describe("extract_tokens", []() {
});
});
describe("ubiquitous tokens that are non-token symbols", [&]() {
describe("ubiquitous tokens that are not symbols", [&]() {
it("returns an error", [&]() {
auto result = extract_tokens(Grammar({
auto result = extract_tokens(InternedGrammar{{
{ "rule_A", str("x") },
{ "rule_B", str("y") },
}).ubiquitous_tokens({ choice({ i_sym(1), blank() }) }));
}, { choice({ i_sym(1), blank() }) }, no_expected_conflicts});
AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
AssertThat(get<2>(result), EqualsPointer(

View file

@ -20,7 +20,7 @@ describe("intern_symbols", []() {
auto result = intern_symbols(grammar);
AssertThat(result.second, Equals((GrammarError *)nullptr));
AssertThat(result.first.rules(), Equals(rule_list({
AssertThat(result.first.rules, Equals(rule_list({
{ "x", choice({ i_sym(1), i_sym(2) }) },
{ "y", i_sym(2) },
{ "z", str("stuff") },
@ -49,8 +49,8 @@ describe("intern_symbols", []() {
auto result = intern_symbols(grammar);
AssertThat(result.second, Equals((GrammarError *)nullptr));
AssertThat(result.first.ubiquitous_tokens().size(), Equals<size_t>(1));
AssertThat(*result.first.ubiquitous_tokens().begin(), EqualsPointer(i_sym(2)));
AssertThat(result.first.ubiquitous_tokens.size(), Equals<size_t>(1));
AssertThat(*result.first.ubiquitous_tokens.begin(), EqualsPointer(i_sym(2)));
});
});

View file

@ -69,6 +69,15 @@ Grammar &Grammar::ubiquitous_tokens(const set<rule_ptr> &ubiquitous_tokens) {
return *this;
}
const set<set<string>> &Grammar::expected_conflicts() const {
return expected_conflicts_;
}
Grammar &Grammar::expected_conflicts(const set<set<string>> &expected_conflicts) {
expected_conflicts_ = expected_conflicts;
return *this;
}
const vector<pair<string, rule_ptr>> &Grammar::rules() const { return rules_; }
} // namespace tree_sitter

View file

@ -78,7 +78,8 @@ SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(),
expander.aux_rules.end());
return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens);
return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens,
grammar.expected_conflicts);
}
} // namespace prepare_grammar

View file

@ -100,7 +100,7 @@ static tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> ubiq_token_err
}
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
const Grammar &grammar) {
const InternedGrammar &grammar) {
vector<pair<string, rule_ptr>> rules, tokens;
vector<rule_ptr> separators;
set<Symbol> ubiquitous_tokens;
@ -109,7 +109,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
TokenExtractor extractor;
size_t i = 0;
for (auto &pair : grammar.rules()) {
for (auto &pair : grammar.rules) {
if (is_token(pair.second)) {
tokens.push_back(pair);
symbol_replacer.replacements.insert(
@ -123,7 +123,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
for (auto &pair : rules)
pair.second = symbol_replacer.apply(pair.second);
for (auto &rule : grammar.ubiquitous_tokens()) {
for (auto &rule : grammar.ubiquitous_tokens) {
if (is_token(rule)) {
separators.push_back(rule);
} else {
@ -139,7 +139,15 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
}
}
return make_tuple(SyntaxGrammar(rules, {}, ubiquitous_tokens),
set<set<rules::Symbol>> expected_conflicts;
for (auto &symbol_set : grammar.expected_conflicts) {
set<Symbol> new_symbol_set;
for (const Symbol &symbol : symbol_set)
new_symbol_set.insert(symbol_replacer.replace_symbol(symbol));
expected_conflicts.insert(new_symbol_set);
}
return make_tuple(SyntaxGrammar(rules, {}, ubiquitous_tokens, expected_conflicts),
LexicalGrammar(tokens, extractor.tokens, separators),
nullptr);
}

View file

@ -3,6 +3,7 @@
#include <utility>
#include "tree_sitter/compiler.h"
#include "compiler/prepare_grammar/interned_grammar.h"
namespace tree_sitter {
@ -13,7 +14,7 @@ class LexicalGrammar;
namespace prepare_grammar {
std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
const Grammar &);
const InternedGrammar &);
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -22,8 +22,10 @@ class InternSymbols : public rules::IdentityRuleFn {
rule_ptr apply_to(const rules::NamedSymbol *rule) {
auto result = symbol_for_rule_name(rule->name);
if (!result.get())
if (!result.get()) {
missing_rule_name = rule->name;
return rules::blank();
}
return result;
}
@ -40,31 +42,40 @@ class InternSymbols : public rules::IdentityRuleFn {
string missing_rule_name;
};
pair<Grammar, const GrammarError *> missing_rule_error(string rule_name) {
return { Grammar({}), new GrammarError(GrammarErrorTypeUndefinedSymbol,
"Undefined rule '" + rule_name + "'") };
const GrammarError * missing_rule_error(string rule_name) {
return new GrammarError(GrammarErrorTypeUndefinedSymbol,
"Undefined rule '" + rule_name + "'");
}
pair<Grammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
InternedGrammar result;
InternSymbols interner(grammar);
vector<pair<string, rule_ptr>> rules;
for (auto &pair : grammar.rules()) {
auto new_rule = interner.apply(pair.second);
if (!interner.missing_rule_name.empty())
return missing_rule_error(interner.missing_rule_name);
rules.push_back({ pair.first, new_rule });
return {result, missing_rule_error(interner.missing_rule_name)};
result.rules.push_back({ pair.first, new_rule });
}
set<rules::rule_ptr> ubiquitous_tokens;
for (auto &rule : grammar.ubiquitous_tokens()) {
auto new_rule = interner.apply(rule);
if (!interner.missing_rule_name.empty())
return missing_rule_error(interner.missing_rule_name);
ubiquitous_tokens.insert(new_rule);
return {result, missing_rule_error(interner.missing_rule_name)};
result.ubiquitous_tokens.insert(new_rule);
}
return { Grammar(rules).ubiquitous_tokens(ubiquitous_tokens), nullptr };
for (auto &names : grammar.expected_conflicts()) {
set<rules::Symbol> entry;
for (auto &name : names) {
auto symbol = interner.symbol_for_rule_name(name);
if (symbol.get())
entry.insert(*symbol);
}
result.expected_conflicts.insert(entry);
}
return { result, nullptr };
}
} // namespace prepare_grammar

View file

@ -4,6 +4,7 @@
#include <utility>
#include <string>
#include "tree_sitter/compiler.h"
#include "compiler/prepare_grammar/interned_grammar.h"
namespace tree_sitter {
@ -11,7 +12,7 @@ class Grammar;
namespace prepare_grammar {
std::pair<Grammar, const GrammarError *> intern_symbols(const Grammar &);
std::pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &);
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -0,0 +1,24 @@
#ifndef COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
#define COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_
#include <string>
#include <set>
#include <utility>
#include <vector>
#include "tree_sitter/compiler.h"
#include "compiler/rules/symbol.h"
namespace tree_sitter {
namespace prepare_grammar {
struct InternedGrammar {
std::vector<std::pair<std::string, rules::rule_ptr>> rules;
std::set<rules::rule_ptr> ubiquitous_tokens;
std::set<std::set<rules::Symbol>> expected_conflicts;
};
} // namespace prepare_grammar
} // namespace tree_sitter
#endif // COMPILER_PREPARE_GRAMMAR_INTERNED_GRAMMAR_H_

View file

@ -19,8 +19,10 @@ SyntaxGrammar::SyntaxGrammar(const vector<pair<string, rules::rule_ptr>> &rules,
SyntaxGrammar::SyntaxGrammar(const vector<pair<string, rules::rule_ptr>> &rules,
const vector<pair<string, rules::rule_ptr>> &aux_rules,
const set<rules::Symbol> &ubiquitous_tokens)
: rules(rules), aux_rules(aux_rules), ubiquitous_tokens(ubiquitous_tokens) {}
const set<rules::Symbol> &ubiquitous_tokens,
const set<set<rules::Symbol>> &expected_conflicts)
: rules(rules), aux_rules(aux_rules),
ubiquitous_tokens(ubiquitous_tokens), expected_conflicts(expected_conflicts) {}
const rules::rule_ptr &SyntaxGrammar::rule(const rules::Symbol &symbol) const {
return symbol.is_auxiliary() ? aux_rules[symbol.index].second

View file

@ -19,14 +19,16 @@ class SyntaxGrammar {
SyntaxGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
const std::set<rules::Symbol> &ubiquitous_tokens);
const std::set<rules::Symbol> &ubiquitous_tokens,
const std::set<std::set<rules::Symbol>> &expected_conflicts);
const std::string &rule_name(const rules::Symbol &symbol) const;
const rules::rule_ptr &rule(const rules::Symbol &symbol) const;
const std::vector<std::pair<std::string, rules::rule_ptr>> rules;
const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;
std::set<rules::Symbol> ubiquitous_tokens;
std::set<std::set<rules::Symbol>> expected_conflicts;
};
} // namespace tree_sitter