Trim whitespace
This commit is contained in:
parent
801f4bd0a8
commit
93df5579b4
15 changed files with 78 additions and 78 deletions
|
|
@ -40,14 +40,14 @@ describe("computing FIRST sets", []() {
|
|||
i_token(0),
|
||||
i_token(1) }),
|
||||
i_sym(0) });
|
||||
|
||||
|
||||
Grammar grammar({
|
||||
{ "rule0", seq({
|
||||
i_token(2),
|
||||
i_token(3),
|
||||
i_token(4) }) }
|
||||
});
|
||||
|
||||
|
||||
AssertThat(first_set(rule, grammar), Equals(set<ISymbol>({
|
||||
ISymbol(0, SymbolOptionToken),
|
||||
ISymbol(2, SymbolOptionToken),
|
||||
|
|
@ -58,7 +58,7 @@ describe("computing FIRST sets", []() {
|
|||
auto rule = seq({
|
||||
i_sym(0),
|
||||
i_token(1) });
|
||||
|
||||
|
||||
Grammar grammar({
|
||||
{ "rule0", choice({
|
||||
i_token(0),
|
||||
|
|
@ -80,7 +80,7 @@ describe("computing FIRST sets", []() {
|
|||
i_token(11),
|
||||
}) },
|
||||
});
|
||||
|
||||
|
||||
auto rule = i_sym(0);
|
||||
|
||||
AssertThat(first_set(rule, grammar), Equals(set<ISymbol>({
|
||||
|
|
|
|||
|
|
@ -17,19 +17,19 @@ namespace tree_sitter {
|
|||
else
|
||||
return CharacterSet(ranges).complement().copy();
|
||||
}
|
||||
|
||||
|
||||
rule_ptr i_sym(size_t index) {
|
||||
return make_shared<rules::ISymbol>(index);
|
||||
}
|
||||
|
||||
|
||||
rule_ptr i_aux_sym(size_t index) {
|
||||
return make_shared<rules::ISymbol>(index, SymbolOptionAuxiliary);
|
||||
}
|
||||
|
||||
|
||||
rule_ptr i_token(size_t index) {
|
||||
return make_shared<rules::ISymbol>(index, SymbolOptionToken);
|
||||
}
|
||||
|
||||
|
||||
rule_ptr i_aux_token(size_t index) {
|
||||
return make_shared<rules::ISymbol>(index, SymbolOption(SymbolOptionAuxiliary|SymbolOptionToken));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ describe("expanding repeat rules in a grammar", []() {
|
|||
PreparedGrammar grammar({
|
||||
{ "rule0", repeat(i_token(0)) },
|
||||
}, {});
|
||||
|
||||
|
||||
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
|
||||
{ "rule0", i_aux_sym(0) },
|
||||
}, {
|
||||
|
|
@ -24,12 +24,12 @@ describe("expanding repeat rules in a grammar", []() {
|
|||
blank() }) },
|
||||
})));
|
||||
});
|
||||
|
||||
|
||||
it("replaces repeats inside of sequences", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
{ "rule0", seq({ i_token(10), repeat(i_token(11)) }) },
|
||||
}, {});
|
||||
|
||||
|
||||
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
|
||||
{ "rule0", seq({ i_token(10), i_aux_sym(0) }) },
|
||||
}, {
|
||||
|
|
@ -38,12 +38,12 @@ describe("expanding repeat rules in a grammar", []() {
|
|||
blank() }) },
|
||||
})));
|
||||
});
|
||||
|
||||
|
||||
it("replaces repeats inside of choices", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
{ "rule0", choice({ i_token(10), repeat(i_token(11)) }) },
|
||||
}, {});
|
||||
|
||||
|
||||
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
|
||||
{ "rule0", choice({ i_token(10), i_aux_sym(0) }) },
|
||||
}, {
|
||||
|
|
@ -52,12 +52,12 @@ describe("expanding repeat rules in a grammar", []() {
|
|||
blank() }) },
|
||||
})));
|
||||
});
|
||||
|
||||
|
||||
it("can replace multiple repeats in the same rule", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
{ "rule0", seq({ repeat(i_token(10)), repeat(i_token(11)) }) },
|
||||
}, {});
|
||||
|
||||
|
||||
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
|
||||
{ "rule0", seq({ i_aux_sym(0), i_aux_sym(1) }) },
|
||||
}, {
|
||||
|
|
@ -73,13 +73,13 @@ describe("expanding repeat rules in a grammar", []() {
|
|||
blank() }) },
|
||||
})));
|
||||
});
|
||||
|
||||
|
||||
it("can replace repeats in multiple rules", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
{ "rule0", repeat(i_token(10)) },
|
||||
{ "rule1", repeat(i_token(11)) },
|
||||
}, {});
|
||||
|
||||
|
||||
AssertThat(expand_repeats(grammar), Equals(PreparedGrammar({
|
||||
{ "rule0", i_aux_sym(0) },
|
||||
{ "rule1", i_aux_sym(1) },
|
||||
|
|
|
|||
|
|
@ -13,39 +13,39 @@ describe("extracting tokens from a grammar", []() {
|
|||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
|
||||
{ "rule0", seq({ str("ab"), i_sym(0) }) }
|
||||
}, {}));
|
||||
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "rule0", seq({ i_aux_token(0), i_sym(0) }) }
|
||||
}, {})));
|
||||
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
{ "token0", str("ab") },
|
||||
})));
|
||||
});
|
||||
|
||||
|
||||
it("moves patterns into the lexical grammar", [&]() {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
|
||||
{ "rule0", seq({ pattern("a+"), i_sym(0) }) }
|
||||
}, {}));
|
||||
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "rule0", seq({ i_aux_token(0), i_sym(0) }) }
|
||||
}, {})));
|
||||
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
{ "token0", pattern("a+") },
|
||||
})));
|
||||
});
|
||||
|
||||
|
||||
it("does not extract blanks into tokens", [&]() {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(Grammar({
|
||||
{ "rule1", choice({ i_sym(0), blank() }) },
|
||||
}));
|
||||
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "rule1", choice({ i_sym(0), blank() }) },
|
||||
}, {})));
|
||||
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {})));
|
||||
});
|
||||
|
||||
|
|
@ -53,43 +53,43 @@ describe("extracting tokens from a grammar", []() {
|
|||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
|
||||
{ "rule0", seq({ str("ab"), i_sym(0), str("ab") }) },
|
||||
}, {}));
|
||||
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "rule0", seq({ i_aux_token(0), i_sym(0), i_aux_token(0) }) }
|
||||
}, {})));
|
||||
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
{ "token0", str("ab") },
|
||||
})));
|
||||
});
|
||||
|
||||
|
||||
it("moves entire rules into the lexical grammar when possible, updating referencing symbols", [&]() {
|
||||
auto result = extract_tokens(PreparedGrammar({
|
||||
{ "rule0", i_sym(1) },
|
||||
{ "rule1", pattern("a|b") },
|
||||
}, {}));
|
||||
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "rule0", i_token(0) }
|
||||
}, {})));
|
||||
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({
|
||||
{ "rule1", pattern("a|b") },
|
||||
}, {})));
|
||||
});
|
||||
|
||||
|
||||
it("updates symbols whose indices need to change due to deleted rules", [&]() {
|
||||
auto result = extract_tokens(PreparedGrammar({
|
||||
{ "rule0", str("ab") },
|
||||
{ "rule1", i_sym(0) },
|
||||
{ "rule2", i_sym(1) },
|
||||
}, {}));
|
||||
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "rule1", i_token(0) },
|
||||
{ "rule2", i_sym(0) },
|
||||
}, {})));
|
||||
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({
|
||||
{ "rule0", str("ab") },
|
||||
}, {})));
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ describe("interning symbols in a grammar", []() {
|
|||
});
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
|
||||
AssertThat((bool)result.second, IsFalse());
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "x", choice({ i_sym(1), i_sym(2) }) },
|
||||
|
|
@ -26,15 +26,15 @@ describe("interning symbols in a grammar", []() {
|
|||
{ "z", str("stuff") },
|
||||
}, {})));
|
||||
});
|
||||
|
||||
|
||||
describe("when there are symbols that reference undefined rules", [&]() {
|
||||
it("returns an error", []() {
|
||||
Grammar grammar({
|
||||
{ "x", sym("y") },
|
||||
});
|
||||
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
|
||||
AssertThat(result.second->message(), Equals("Undefined rule 'y'"));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ describe("parsing regex pattern rules", []() {
|
|||
character({ 'c' })
|
||||
})));
|
||||
});
|
||||
|
||||
|
||||
it("parses wildcard '.' characters", [&]() {
|
||||
Pattern rule(".");
|
||||
AssertThat(
|
||||
|
|
@ -121,7 +121,7 @@ describe("parsing regex pattern rules", []() {
|
|||
character({ '(' }),
|
||||
character({ 'b' })
|
||||
})));
|
||||
|
||||
|
||||
Pattern rule2("a\\.");
|
||||
AssertThat(
|
||||
rule2.to_rule_tree(),
|
||||
|
|
|
|||
|
|
@ -83,11 +83,11 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
private:
|
||||
|
||||
|
||||
const PreparedGrammar & grammar_for_symbol(const rules::ISymbol &symbol) {
|
||||
return symbol.is_token() ? lexical_grammar : syntax_grammar;
|
||||
}
|
||||
|
||||
|
||||
string symbol_id(const rules::ISymbol &symbol) {
|
||||
if (symbol.is_built_in()) {
|
||||
return (symbol == rules::ERROR()) ?
|
||||
|
|
@ -101,7 +101,7 @@ namespace tree_sitter {
|
|||
return "ts_sym_" + name;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
string symbol_name(const rules::ISymbol &symbol) {
|
||||
if (symbol.is_built_in()) {
|
||||
return (symbol == rules::ERROR()) ? "error" : "end";
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
|
||||
|
||||
namespace generate_code {
|
||||
std::string c_code(std::string name,
|
||||
const ParseTable &parse_table,
|
||||
|
|
|
|||
|
|
@ -6,18 +6,18 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
using std::string;
|
||||
|
||||
|
||||
namespace generate_code {
|
||||
class TokenDescription : public rules::RuleFn<string> {
|
||||
string apply_to(const rules::Pattern *rule) {
|
||||
return "/" + rule->value + "/";
|
||||
}
|
||||
|
||||
|
||||
string apply_to(const rules::String *rule) {
|
||||
return "'" + rule->value + "'";
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
std::string token_description(const rules::rule_ptr &rule) {
|
||||
return TokenDescription().apply(rule);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,20 +29,20 @@ namespace tree_sitter {
|
|||
bool apply_to(const rules::String *rule) { return true; }
|
||||
bool apply_to(const rules::Pattern *rule) { return true; }
|
||||
};
|
||||
|
||||
|
||||
class SymbolInliner : public rules::IdentityRuleFn {
|
||||
map<ISymbol, ISymbol> replacements;
|
||||
using rules::IdentityRuleFn::apply_to;
|
||||
|
||||
|
||||
int new_index_for_symbol(const ISymbol &symbol) {
|
||||
int result = symbol.index;
|
||||
for (const auto &pair : replacements)
|
||||
if (pair.first.index < symbol.index &&
|
||||
if (pair.first.index < symbol.index &&
|
||||
pair.first.is_auxiliary() == symbol.is_auxiliary())
|
||||
result--;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
rule_ptr apply_to(const ISymbol *rule) {
|
||||
auto replacement_pair = replacements.find(*rule);
|
||||
if (replacement_pair != replacements.end())
|
||||
|
|
@ -52,7 +52,7 @@ namespace tree_sitter {
|
|||
else
|
||||
return make_shared<ISymbol>(new_index_for_symbol(*rule), rule->options);
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
SymbolInliner(const map<ISymbol, ISymbol> &replacements, size_t rule_count, size_t aux_rule_count) :
|
||||
replacements(replacements)
|
||||
|
|
@ -78,7 +78,7 @@ namespace tree_sitter {
|
|||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
vector<pair<string, rule_ptr>> tokens;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -15,23 +15,23 @@ namespace tree_sitter {
|
|||
using std::exception;
|
||||
|
||||
GrammarError::GrammarError(string rule_name) : rule_name(rule_name) {}
|
||||
|
||||
|
||||
string GrammarError::message() const {
|
||||
return "Undefined rule '" + rule_name + "'";
|
||||
}
|
||||
|
||||
|
||||
namespace prepare_grammar {
|
||||
class InternSymbols : public rules::IdentityRuleFn {
|
||||
const Grammar grammar;
|
||||
using rules::IdentityRuleFn::apply_to;
|
||||
|
||||
|
||||
long index_of(string rule_name) {
|
||||
for (size_t i = 0; i < grammar.rules.size(); i++)
|
||||
if (grammar.rules[i].first == rule_name)
|
||||
return i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
rule_ptr apply_to(const rules::Symbol *rule) {
|
||||
long index = index_of(rule->name);
|
||||
if (index == -1)
|
||||
|
|
@ -44,7 +44,7 @@ namespace tree_sitter {
|
|||
|
||||
string missing_rule_name;
|
||||
};
|
||||
|
||||
|
||||
pair<PreparedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
|
||||
InternSymbols interner(grammar);
|
||||
vector<pair<string, rule_ptr>> rules;
|
||||
|
|
@ -58,7 +58,7 @@ namespace tree_sitter {
|
|||
};
|
||||
rules.push_back({ pair.first, new_rule });
|
||||
}
|
||||
|
||||
|
||||
return { PreparedGrammar(rules), nullptr };
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
namespace tree_sitter {
|
||||
class Grammar;
|
||||
class PreparedGrammar;
|
||||
|
||||
|
||||
class GrammarError {
|
||||
std::string rule_name;
|
||||
public:
|
||||
|
|
|
|||
|
|
@ -22,13 +22,13 @@ namespace tree_sitter {
|
|||
|
||||
const rule_ptr & PreparedGrammar::rule(const ISymbol &symbol) const {
|
||||
return symbol.is_auxiliary() ?
|
||||
aux_rules[symbol.index].second :
|
||||
aux_rules[symbol.index].second :
|
||||
rules[symbol.index].second;
|
||||
}
|
||||
|
||||
|
||||
const string & PreparedGrammar::rule_name(const ISymbol &symbol) const {
|
||||
return symbol.is_auxiliary() ?
|
||||
aux_rules[symbol.index].first :
|
||||
aux_rules[symbol.index].first :
|
||||
rules[symbol.index].first;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,16 +7,16 @@ namespace tree_sitter {
|
|||
using std::string;
|
||||
using std::to_string;
|
||||
using std::hash;
|
||||
|
||||
|
||||
namespace rules {
|
||||
ISymbol::ISymbol(int index) :
|
||||
index(index),
|
||||
options(SymbolOption(0)) {}
|
||||
|
||||
|
||||
ISymbol::ISymbol(int index, SymbolOption options) :
|
||||
index(index),
|
||||
options(options) {}
|
||||
|
||||
|
||||
bool ISymbol::operator==(const ISymbol &other) const {
|
||||
return (other.index == index) && (other.options == options);
|
||||
}
|
||||
|
|
@ -25,39 +25,39 @@ namespace tree_sitter {
|
|||
const ISymbol *other = dynamic_cast<const ISymbol *>(&rule);
|
||||
return other && this->operator==(*other);
|
||||
}
|
||||
|
||||
|
||||
size_t ISymbol::hash_code() const {
|
||||
return hash<int>()(index) ^ hash<int16_t>()(options);
|
||||
}
|
||||
|
||||
|
||||
rule_ptr ISymbol::copy() const {
|
||||
return std::make_shared<ISymbol>(*this);
|
||||
}
|
||||
|
||||
|
||||
string ISymbol::to_string() const {
|
||||
string name = (options & SymbolOptionAuxiliary) ? "aux_" : "";
|
||||
name += (options & SymbolOptionToken) ? "token" : "sym";
|
||||
return "#<" + name + std::to_string(index) + ">";
|
||||
}
|
||||
|
||||
|
||||
bool ISymbol::operator<(const ISymbol &other) const {
|
||||
if (options < other.options) return true;
|
||||
if (options > other.options) return false;
|
||||
return (index < other.index);
|
||||
}
|
||||
|
||||
|
||||
bool ISymbol::is_token() const {
|
||||
return options & SymbolOptionToken;
|
||||
}
|
||||
|
||||
|
||||
bool ISymbol::is_built_in() const {
|
||||
return index < 0;
|
||||
}
|
||||
|
||||
|
||||
bool ISymbol::is_auxiliary() const {
|
||||
return options & SymbolOptionAuxiliary;
|
||||
}
|
||||
|
||||
|
||||
void ISymbol::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,25 +9,25 @@ namespace tree_sitter {
|
|||
SymbolOptionToken = 1 << 0,
|
||||
SymbolOptionAuxiliary = 1 << 1,
|
||||
} SymbolOption;
|
||||
|
||||
|
||||
class ISymbol : public Rule {
|
||||
public:
|
||||
explicit ISymbol(int index);
|
||||
ISymbol(int index, SymbolOption options);
|
||||
|
||||
|
||||
bool operator==(const ISymbol &other) const;
|
||||
bool operator==(const Rule &other) const;
|
||||
|
||||
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
bool operator<(const ISymbol &other) const;
|
||||
|
||||
|
||||
bool is_token() const;
|
||||
bool is_built_in() const;
|
||||
bool is_auxiliary() const;
|
||||
|
||||
|
||||
int index;
|
||||
SymbolOption options;
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue